From mamidala at mvapich.cse.ohio-state.edu Tue Jan 1 21:06:25 2008 From: mamidala at mvapich.cse.ohio-state.edu (mamidala@mvapich.cse.ohio-state.edu) Date: Tue Jan 1 21:06:33 2008 Subject: [mvapich-commit] r1752 - mvapich/trunk/mpid/ch_gen2 Message-ID: <200801020206.m0226PXf003128@mvapich.cse.ohio-state.edu> Author: mamidala Date: 2008-01-01 21:06:23 -0500 (Tue, 01 Jan 2008) New Revision: 1752 Modified: mvapich/trunk/mpid/ch_gen2/coll_shmem.h mvapich/trunk/mpid/ch_gen2/shmem_coll.c Log: checking in the modifications necessary to make the compile time parameters for shared memory collectives run time. Modified: mvapich/trunk/mpid/ch_gen2/coll_shmem.h =================================================================== --- mvapich/trunk/mpid/ch_gen2/coll_shmem.h 2007-12-31 19:08:49 UTC (rev 1751) +++ mvapich/trunk/mpid/ch_gen2/coll_shmem.h 2008-01-02 02:06:23 UTC (rev 1752) @@ -91,21 +91,17 @@ #define SHMEM_BCAST_SIZE (1<<23) #define SHMEM_COLL_BLOCK_SIZE ((smpi.num_local_nodes) * shmem_coll_max_msg_size) #define SHMEM_COLL_OFFSET (shmem_coll_blocks * SHMEM_COLL_BLOCK_SIZE) -#define SHMEM_COLL_BUF_SIZE (shmem_coll_blocks * SHMEM_COLL_BLOCK_SIZE + SHMEM_BCAST_SIZE) +#define FLAG_TYPES 6 +#define SHMEM_COLL_BUF_SIZE (shmem_coll_blocks * SHMEM_COLL_BLOCK_SIZE + sizeof(shmem_coll_region) \ + + FLAG_TYPES*shmem_coll_num_comm*smpi.num_local_nodes) /* the shared area itself */ typedef struct { - volatile int child_complete_bcast[SHMEM_COLL_NUM_COMM][SHMEM_COLL_NUM_PROCS]; /* use for initial synchro */ - volatile int root_complete_bcast[SHMEM_COLL_NUM_COMM][SHMEM_COLL_NUM_PROCS]; - volatile int child_complete_gather[SHMEM_COLL_NUM_COMM][SHMEM_COLL_NUM_PROCS]; /* use for initial synchro */ - volatile int root_complete_gather[SHMEM_COLL_NUM_COMM][SHMEM_COLL_NUM_PROCS]; - volatile int barrier_gather[SHMEM_COLL_NUM_COMM][SHMEM_COLL_NUM_PROCS]; - volatile int barrier_bcast[SHMEM_COLL_NUM_COMM][SHMEM_COLL_NUM_PROCS]; + volatile int shmem_comm_count; pthread_spinlock_t shmem_coll_lock; /* the collective buffer */ - char shmem_coll_buf; }shmem_coll_region; @@ -113,6 +109,13 @@ struct shmem_coll_mgmt{ void *mmap_ptr; int fd; + int **child_complete_bcast; + int **root_complete_bcast; + int **child_complete_gather; + int **root_complete_gather; + int **barrier_gather; + int **barrier_bcast; + char* shmem_coll_buf; }; #define SHMEM_BCAST_FLAGS 1024 Modified: mvapich/trunk/mpid/ch_gen2/shmem_coll.c =================================================================== --- mvapich/trunk/mpid/ch_gen2/shmem_coll.c 2007-12-31 19:08:49 UTC (rev 1751) +++ mvapich/trunk/mpid/ch_gen2/shmem_coll.c 2008-01-02 02:06:23 UTC (rev 1752) @@ -57,7 +57,7 @@ extern int shmem_coll_blocks; extern int shmem_coll_max_msg_size; - +int shmem_coll_num_comm; #undef FUNCNAME #define FUNCNAME MPIDI_CH3I_SHMEM_COLL_Init #undef FCNAME @@ -69,6 +69,7 @@ volatile char tmpchar; #endif + shmem_coll_num_comm = shmem_coll_blocks; /* add pid for unique file name */ shmem_file = (char *) malloc(sizeof(char) * (SHMEM_COLL_HOSTNAME_LEN + 26 + PID_CHAR_LEN)); @@ -155,20 +156,59 @@ return -1; } shmem_coll = (shmem_coll_region *) shmem_coll_obj.mmap_ptr; + + + shmem_coll_obj.child_complete_bcast = malloc(sizeof(int*)*shmem_coll_num_comm); + shmem_coll_obj.root_complete_bcast = malloc(sizeof(int*)*shmem_coll_num_comm); + shmem_coll_obj.child_complete_gather = malloc(sizeof(int*)*shmem_coll_num_comm); + shmem_coll_obj.root_complete_gather = malloc(sizeof(int*)*shmem_coll_num_comm); + shmem_coll_obj.barrier_gather = malloc(sizeof(int*)*shmem_coll_num_comm); + shmem_coll_obj.barrier_bcast = malloc(sizeof(int*)*shmem_coll_num_comm); + + shmem_coll_obj.child_complete_bcast[0] = (int*)((char*)(shmem_coll_obj.mmap_ptr) + sizeof(shmem_coll)); + shmem_coll_obj.root_complete_bcast[0] = + (int*)(shmem_coll_obj.child_complete_bcast[0] + shmem_coll_num_comm*smpi.num_local_nodes); + shmem_coll_obj.child_complete_gather[0] = + (int*)(shmem_coll_obj.child_complete_bcast[0] + 2*shmem_coll_num_comm*smpi.num_local_nodes); + shmem_coll_obj.root_complete_gather[0] = + (int*)(shmem_coll_obj.child_complete_bcast[0] + 3*shmem_coll_num_comm*smpi.num_local_nodes); + shmem_coll_obj.barrier_gather[0] = + (int*)(shmem_coll_obj.child_complete_bcast[0] + 4*shmem_coll_num_comm*smpi.num_local_nodes); + shmem_coll_obj.barrier_bcast[0] = + (int*)(shmem_coll_obj.child_complete_bcast[0] + 5*shmem_coll_num_comm*smpi.num_local_nodes); + + for (j=1;jchild_complete_bcast[j][i] = 1; + shmem_coll_obj.child_complete_bcast[j][i] = 1; } for (i = 0; i < SHMEM_COLL_NUM_PROCS; i++){ - shmem_coll->root_complete_gather[j][i] = 1; + shmem_coll_obj.root_complete_gather[j][i] = 1; } } pthread_spin_init(&shmem_coll->shmem_coll_lock,0); } + shmem_coll_obj.shmem_coll_buf = + (char*)(shmem_coll_obj.child_complete_bcast[0] + 6*shmem_coll_num_comm*smpi.num_local_nodes); + return MPI_SUCCESS; } @@ -201,30 +241,30 @@ void MPID_SHMEM_COLL_GetShmemBuf(int size, int rank, int shmem_comm_rank, void** output_buf) { int i,myid; - char* shmem_coll_buf = (char*)(&(shmem_coll->shmem_coll_buf)); + char* shmem_coll_buf = shmem_coll_obj.shmem_coll_buf; myid = rank; if (myid == 0){ for (i=1; i < size; i++){ - while (shmem_coll->child_complete_gather[shmem_comm_rank][i] == 0) + while (shmem_coll_obj.child_complete_gather[shmem_comm_rank][i] == 0) { MPID_DeviceCheck(MPID_NOTBLOCKING); }; } /* Set the completion flags back to zero */ for (i=1; i < size; i++){ - shmem_coll->child_complete_gather[shmem_comm_rank][i] = 0; + shmem_coll_obj.child_complete_gather[shmem_comm_rank][i] = 0; } *output_buf = (char*)shmem_coll_buf + shmem_comm_rank*SHMEM_COLL_BLOCK_SIZE; } else{ - while (shmem_coll->root_complete_gather[shmem_comm_rank][myid] == 0) + while (shmem_coll_obj.root_complete_gather[shmem_comm_rank][myid] == 0) { MPID_DeviceCheck(MPID_NOTBLOCKING); }; - shmem_coll->root_complete_gather[shmem_comm_rank][myid] = 0; + shmem_coll_obj.root_complete_gather[shmem_comm_rank][myid] = 0; *output_buf = (char*)shmem_coll_buf + shmem_comm_rank*SHMEM_COLL_BLOCK_SIZE; } } @@ -288,11 +328,11 @@ if (myid == 0){ for (i=1; i < size; i++){ - shmem_coll->root_complete_gather[shmem_comm_rank][i] = 1; + shmem_coll_obj.root_complete_gather[shmem_comm_rank][i] = 1; } } else{ - shmem_coll->child_complete_gather[shmem_comm_rank][myid] = 1; + shmem_coll_obj.child_complete_gather[shmem_comm_rank][myid] = 1; } } @@ -303,17 +343,17 @@ if (rank == 0){ for (i=1; i < size; i++){ - while (shmem_coll->barrier_gather[shmem_comm_rank][i] == 0) + while (shmem_coll_obj.barrier_gather[shmem_comm_rank][i] == 0) { MPID_DeviceCheck(MPID_NOTBLOCKING); } } for (i=1; i < size; i++){ - shmem_coll->barrier_gather[shmem_comm_rank][i] = 0; + shmem_coll_obj.barrier_gather[shmem_comm_rank][i] = 0; } } else{ - shmem_coll->barrier_gather[shmem_comm_rank][myid] = 1; + shmem_coll_obj.barrier_gather[shmem_comm_rank][myid] = 1; } } @@ -324,15 +364,15 @@ if (rank == 0){ for (i=1; i < size; i++){ - shmem_coll->barrier_bcast[shmem_comm_rank][i] = 1; + shmem_coll_obj.barrier_bcast[shmem_comm_rank][i] = 1; } } else{ - while (shmem_coll->barrier_bcast[shmem_comm_rank][myid] == 0) + while (shmem_coll_obj.barrier_bcast[shmem_comm_rank][myid] == 0) { MPID_DeviceCheck(MPID_NOTBLOCKING); } - shmem_coll->barrier_bcast[shmem_comm_rank][myid] = 0; + shmem_coll_obj.barrier_bcast[shmem_comm_rank][myid] = 0; } MPID_DeviceCheck(MPID_NOTBLOCKING); } From mamidala at mvapich.cse.ohio-state.edu Thu Jan 3 11:21:32 2008 From: mamidala at mvapich.cse.ohio-state.edu (mamidala@mvapich.cse.ohio-state.edu) Date: Thu Jan 3 11:21:40 2008 Subject: [mvapich-commit] r1754 - mvapich/trunk/mpid/ch_gen2 Message-ID: <200801031621.m03GLWBp007980@mvapich.cse.ohio-state.edu> Author: mamidala Date: 2008-01-03 11:21:31 -0500 (Thu, 03 Jan 2008) New Revision: 1754 Modified: mvapich/trunk/mpid/ch_gen2/cm.c Log: checking in the changes sent by Pasha from Mellanox Modified: mvapich/trunk/mpid/ch_gen2/cm.c =================================================================== --- mvapich/trunk/mpid/ch_gen2/cm.c 2008-01-02 03:56:14 UTC (rev 1753) +++ mvapich/trunk/mpid/ch_gen2/cm.c 2008-01-03 16:21:31 UTC (rev 1754) @@ -883,7 +883,7 @@ memset(&attr, 0, sizeof(struct ibv_qp_attr)); attr.qp_state = IBV_QPS_INIT; - attr.port_num = viadev_default_port; + attr.port_num = viadev_default_port; set_pkey_index(&attr.pkey_index,viadev_default_port); attr.qkey = 0; @@ -898,8 +898,8 @@ } { struct ibv_qp_attr attr; + memset(&attr, 0, sizeof(struct ibv_qp_attr)); - attr.qp_state = IBV_QPS_RTR; if (ibv_modify_qp(cm_ud_qp, &attr, IBV_QP_STATE)) { CM_ERR("Failed to modify QP to RTR"); @@ -966,7 +966,7 @@ memset(&ah_attr, 0, sizeof(ah_attr)); ah_attr.is_global = 0; ah_attr.dlid = cm_lid[i]; - ah_attr.sl = 0; + ah_attr.sl = viadev_default_service_level; ah_attr.src_path_bits = 0; ah_attr.port_num = viadev_default_port; cm_ah[i] = ibv_create_ah(viadev.ptag, &ah_attr); From mamidala at mvapich.cse.ohio-state.edu Thu Jan 3 11:31:36 2008 From: mamidala at mvapich.cse.ohio-state.edu (mamidala@mvapich.cse.ohio-state.edu) Date: Thu Jan 3 11:31:42 2008 Subject: [mvapich-commit] r1755 - mvapich/trunk/src/coll Message-ID: <200801031631.m03GVa05007999@mvapich.cse.ohio-state.edu> Author: mamidala Date: 2008-01-03 11:31:35 -0500 (Thu, 03 Jan 2008) New Revision: 1755 Modified: mvapich/trunk/src/coll/intra_fns_new.c Log: checking in minor fixes to Alltoall, thanks to Pasha from Mellanox Modified: mvapich/trunk/src/coll/intra_fns_new.c =================================================================== --- mvapich/trunk/src/coll/intra_fns_new.c 2008-01-03 16:21:31 UTC (rev 1754) +++ mvapich/trunk/src/coll/intra_fns_new.c 2008-01-03 16:31:35 UTC (rev 1755) @@ -31,8 +31,6 @@ #define MPIR_BCAST_SHORT_MSG 12288 #define MPIR_BCAST_LONG_MSG 524288 #define MPIR_BCAST_MIN_PROCS 8 -#define MPIR_ALLTOALL_SHORT_MSG 256 -#define MPIR_ALLTOALL_MEDIUM_MSG 32768 #define MPIR_ALLGATHER_SHORT_MSG 81920 #define MPIR_ALLGATHER_LONG_MSG 524288 #define MPIR_REDUCE_SHORT_MSG 2048 From mamidala at mvapich.cse.ohio-state.edu Thu Jan 3 11:34:08 2008 From: mamidala at mvapich.cse.ohio-state.edu (mamidala@mvapich.cse.ohio-state.edu) Date: Thu Jan 3 11:34:14 2008 Subject: [mvapich-commit] r1756 - mvapich/trunk/src/env Message-ID: <200801031634.m03GY8Er008010@mvapich.cse.ohio-state.edu> Author: mamidala Date: 2008-01-03 11:34:07 -0500 (Thu, 03 Jan 2008) New Revision: 1756 Modified: mvapich/trunk/src/env/initutil.c Log: checking in minor fixes to Alltoall code flow, thanks to Pasha from Mellanox for the patch Modified: mvapich/trunk/src/env/initutil.c =================================================================== --- mvapich/trunk/src/env/initutil.c 2008-01-03 16:31:35 UTC (rev 1755) +++ mvapich/trunk/src/env/initutil.c 2008-01-03 16:34:07 UTC (rev 1756) @@ -283,35 +283,35 @@ exit(0); } #endif - if ((value = getenv("VIADEV_BCAST_KNOMIAL")) != NULL){ + if ((value = getenv("VIADEV_BCAST_KNOMIAL")) != NULL){ bcast_knomial_degree = atoi(value); if(bcast_knomial_degree<2) - bcast_knomial_degree = 2; + bcast_knomial_degree = 2; } - if ((value = getenv("MPIR_ALLTOALL_SHORT_MSG")) != NULL) { - mpir_alltoall_short_msg = atoi(value); - } + if ((value = getenv("MPIR_ALLTOALL_SHORT_MSG")) != NULL) { + mpir_alltoall_short_msg = atoi(value); + } - if ((value = getenv("MPIR_ALLTOALL_MEDIUM_MSG")) != NULL) { - mpir_alltoall_medium_msg = atoi(value); - } + if ((value = getenv("MPIR_ALLTOALL_MEDIUM_MSG")) != NULL) { + mpir_alltoall_medium_msg = atoi(value); + } #ifndef CH_PSM - if ((value = getenv("All_to_all_basic")) != NULL) { - if (atoi(value) > 0){ + if ((value = getenv("MPIR_ALLTOALL_BASIC")) != NULL) { + if (atoi(value) > 0){ enable_alltoall_basic = 1; - mpir_alltoall_short_msg = 256; + mpir_alltoall_short_msg = 256; mpir_alltoall_medium_msg = 32768; - } - } + } + } - if ((value = getenv("All_to_all_mcore_opt")) != NULL) { - if (atoi(value) > 0){ + if ((value = getenv("MPIR_ALLTOALL_MCORE_OPT")) != NULL) { + if (atoi(value) > 0){ mpir_alltoall_short_msg = 8192; mpir_alltoall_medium_msg = 8192; - } - } + } + } #endif /* If we wanted to be able to check if we're being debugged, From mamidala at mvapich.cse.ohio-state.edu Thu Jan 3 11:35:03 2008 From: mamidala at mvapich.cse.ohio-state.edu (mamidala@mvapich.cse.ohio-state.edu) Date: Thu Jan 3 11:35:09 2008 Subject: [mvapich-commit] r1757 - mvapich/trunk Message-ID: <200801031635.m03GZ3CF008022@mvapich.cse.ohio-state.edu> Author: mamidala Date: 2008-01-03 11:35:02 -0500 (Thu, 03 Jan 2008) New Revision: 1757 Modified: mvapich/trunk/mvapich.conf Log: checking in the new parameters Modified: mvapich/trunk/mvapich.conf =================================================================== --- mvapich/trunk/mvapich.conf 2008-01-03 16:34:07 UTC (rev 1756) +++ mvapich/trunk/mvapich.conf 2008-01-03 16:35:02 UTC (rev 1757) @@ -486,14 +486,14 @@ # only the "small" and "big" message size algorithms are used. The medium message size # algorithm is disabled. #----------------------------------------------------------------------------------------- -# All_to_all_basic=0 +# MPIR_ALLTOALL_BASIC=0 # # Default value: 0 # Turning this option sets the MPIR_ALLTOALL_SHORT_MSG to 256 and MPIR_ALLTOALL_MEDIUM_MSG # to 32768. This setting is for dual node clusters. This parameter is not # present for PSM Device. #----------------------------------------------------------------------------------------- -# All_to_all_mcore_opt=1 +# MPIR_ALLTOALL_MCORE_OPT=1 # # Default value:1 # Turning this option sets the MPIR_ALLTOALL_SHORT_MSG to 8192 and MPIR_ALLTOALL_MEDIUM_MSG @@ -589,7 +589,15 @@ # InfiniBand layer used by MVAPICH communication. # # VIADEV_DEFAULT_PSN -# VIADEV_DEFAULT_SERVICE_LEVEL +#----------------------------------------------------------------------------------------- +# VIADEV_DEFAULT_SERVICE_LEVEL=0 +# +# Default value : 0 +# One can manage execution priorities of IB application using Virtual Lame parameter (VL) +# Sending SL parameter for QP that should be translated into appropriate VL number when +# sending data. +#----------------------------------------------------------------------------------------- + # VIADEV_DEFAULT_STATIC_RATE # VIADEV_DEFAULT_SRC_PATH_BITS # VIADEV_DEFAULT_MAX_SG_LIST From perkinjo at mvapich.cse.ohio-state.edu Thu Jan 3 16:37:18 2008 From: perkinjo at mvapich.cse.ohio-state.edu (perkinjo@mvapich.cse.ohio-state.edu) Date: Thu Jan 3 16:37:48 2008 Subject: [mvapich-commit] r1760 - mvapich/trunk/osu_benchmarks Message-ID: <200801032137.m03LbIoO008477@mvapich.cse.ohio-state.edu> Author: perkinjo Date: 2008-01-03 16:37:17 -0500 (Thu, 03 Jan 2008) New Revision: 1760 Modified: mvapich/trunk/osu_benchmarks/makefile Log: Build alltoall with MPI-1 benchmarks. Modified: mvapich/trunk/osu_benchmarks/makefile =================================================================== --- mvapich/trunk/osu_benchmarks/makefile 2008-01-03 19:25:29 UTC (rev 1759) +++ mvapich/trunk/osu_benchmarks/makefile 2008-01-03 21:37:17 UTC (rev 1760) @@ -1,5 +1,5 @@ CC = mpicc -MPI1_BENCHMARKS = osu_bcast osu_bibw osu_bw osu_latency osu_mbw_mr +MPI1_BENCHMARKS = osu_alltoall osu_bcast osu_bibw osu_bw osu_latency osu_mbw_mr MPI2_BENCHMARKS = osu_acc_latency osu_get_bw osu_get_latency osu_latency_mt\ osu_put_bibw osu_put_bw osu_put_latency ALL_BENCHMARKS = ${MPI1_BENCHMARKS} ${MPI2_BENCHMARKS} From perkinjo at mvapich.cse.ohio-state.edu Thu Jan 3 14:25:30 2008 From: perkinjo at mvapich.cse.ohio-state.edu (perkinjo@mvapich.cse.ohio-state.edu) Date: Thu Jan 3 20:26:09 2008 Subject: [mvapich-commit] r1759 - mvapich/trunk/mpid/ch_gen2/process Message-ID: <200801031925.m03JPUBk008281@mvapich.cse.ohio-state.edu> Author: perkinjo Date: 2008-01-03 14:25:29 -0500 (Thu, 03 Jan 2008) New Revision: 1759 Modified: mvapich/trunk/mpid/ch_gen2/process/minidaemon.c mvapich/trunk/mpid/ch_gen2/process/minidaemon.h mvapich/trunk/mpid/ch_gen2/process/minidaemon_client.c mvapich/trunk/mpid/ch_gen2/process/mpirun_rsh.c mvapich/trunk/mpid/ch_gen2/process/mpirun_rsh.h mvapich/trunk/mpid/ch_gen2/process/mpispawn.c Log: ssh/rsh fix for XLauncher. Fixes for other integration problems as well. Modified: mvapich/trunk/mpid/ch_gen2/process/minidaemon.c =================================================================== --- mvapich/trunk/mpid/ch_gen2/process/minidaemon.c 2008-01-03 17:11:28 UTC (rev 1758) +++ mvapich/trunk/mpid/ch_gen2/process/minidaemon.c 2008-01-03 19:25:29 UTC (rev 1759) @@ -73,7 +73,7 @@ /* Minidaemon basic types definition */ typedef enum {CH_JOB_EXT = -1,P_OK = 0, P_KILLED, STATUS_REQUEST, STATUS , KILL} MD_message; typedef enum {MD_INIT = 0, MD_QP_LID, MD_WORK, MD_TERM } MD_phase; -typedef enum {MD_FINISH = 0, MD_NORMAL, MD_UNKNOWN, MD_NOT_STARTED} MD_status; //TODO MD_RUN instead of NORMAL +typedef enum {MD_FINISH = 0, MD_NORMAL, MD_UNKNOWN, MD_NOT_STARTED} MD_status; /* TODO MD_RUN instead of NORMAL */ /*Timeout values for WORK stage */ static const int send2father_work_timeout = 10; @@ -121,8 +121,12 @@ int recv_children_timeout ; int recv_jobs_timeout ; - const char *command_params; - const char *command; + int mpi_num_of_params; /* number of mpi parametres that we pass to mpilib */ + char *mpi_params; /* mpi parametres VIADEV_.... */ + char *wd; /* current work directory */ + char *command; + char *remote_sh_command; + char *remote_sh_args; char *root_hostname; int mpi_param_len; @@ -176,9 +180,10 @@ void md_listen_parent(int sd); void md_general_listen(); void md_socket_close(); -void md_base_init(int ch_num, int width, const char * command_params,const char * command, - const char * root_host,int root_ch_num, int mpirun_port, int ppid); - +void md_base_init(int ch_num, int width, const char * remote_sh_command, + const char * remote_sh_args, int mpirun_port, int ppid, + const char * root_host, int root_ch_num, char * wd, + int num_of_params, const char * mpi_params, const char * command); void md_start_node_tree(); int md_start_node(ChildrenListIterator i); void md_run_own_jobs(); @@ -204,6 +209,9 @@ void md_build_command_string(int i); void md_list_print(); static void get_display_str(char * display); +static void setenv_mpi_params(); +static char** str_to_argv(const char *str); +static char* add_word_to_string(char *str, const char *elem ); void timeout_handler (int delta_time); void sigpipe_handler(int signo); @@ -245,21 +253,24 @@ MD_PRINT(DDEBUG,"\nNode %d", j); } } - fprintf(stderr,"\n"); + /* fprintf(stderr,"\n"); */ } /** Create a root instance of MiniDaemon This function should be **/ -void minidaemon_create(process * procList, int nproc, int width, const char * command_params, const char * command, int mpirun_port, int ppid) { +void minidaemon_create(process * procList, int nproc, int width, int mpirun_port, int ppid, + const char * remote_sh_command, const char * remote_sh_args, char * wd, int num_of_params, + const char * mpi_params, const char * command) { int i; - MD_PRINT(DDEBUG," Starting root minidaemon with following parameters:"); - MD_PRINT(DDEBUG,"nproc = %d, width = %d, command = %s,\n\t command_params = %s\n", - nproc,width,command,command_params); - md_base_init(nproc, width, command_params, command,NULL,nproc,mpirun_port, ppid ); + MD_PRINT(DDEBUG," Starting root minidaemon with following parameters:\n"); + MD_PRINT(DDEBUG,"nproc = %d, width = %d, command = %s, mpi_params = %s\n", + nproc, width, command, mpi_params); + md_base_init(nproc, width, remote_sh_command, remote_sh_args, + mpirun_port, ppid, NULL, nproc, wd, num_of_params, mpi_params, command); for (i = 0; i < nproc; ++i) { - //TODO memcpy(&md_entity.childrenList[i], &procList[i], sizeof(procList[i].hostname)+sizeof(procList[i].device) + sizeof(procList[i].port)); + /* TODO memcpy(&md_entity.childrenList[i], &procList[i], sizeof(procList[i].hostname)+sizeof(procList[i].device) + sizeof(procList[i].port)); */ md_entity.childrenList[i].hostname = procList[i].hostname; md_entity.childrenList[i].device = procList[i].device; md_entity.childrenList[i].port = procList[i].port; @@ -281,14 +292,18 @@ } /* Init all data member structures */ -void minidaemon_init ( const char * par_name , int ch_num, int width, const char * command_params,const char * command, - const char * root_host , int root_ch_num, int mpirun_port, int ppid, int array_it) { - - MD_PRINT(DDEBUG," Starting minidaemon with following parameters:"); - MD_PRINT(DDEBUG,"parent_name =%s,ch_num = %d, width = %d, command = %s,command_params = %s\n command=%s, root_host=%s, root_ch_num=%d,\ - mpirun_port=%d",par_name,ch_num,width,command,command_params,command,root_host,root_ch_num,mpirun_port,ppid); +void minidaemon_init ( const char * par_name , int ch_num, int width, const char * remote_sh_command, + const char * remote_sh_args, const char * root_host , int root_ch_num, int mpirun_port, + int ppid, int array_it, char * wd, int num_params, const char * mpi_params, const char * command) +{ + + MD_PRINT(DDEBUG," Starting minidaemon with following parameters:\n"); + MD_PRINT(DDEBUG, + "parent_name=%s, ch_num=%d, width=%d, mpi_params=%s\ncommand=%s, root_host=%s, root_ch_num=%d, mpirun_port=%d, wd=%s, num_params=%d\n", + par_name, ch_num, width, mpi_params, command, root_host, root_ch_num, mpirun_port, wd, num_params); - md_base_init(ch_num, width, command_params, command, root_host,root_ch_num, mpirun_port, ppid); + md_base_init(ch_num, width, remote_sh_command, remote_sh_args, + mpirun_port, ppid, root_host, root_ch_num, wd, num_params, mpi_params, command); md_entity.parent_name = par_name; md_entity.isParent = 0; @@ -297,8 +312,10 @@ } -void md_base_init(int ch_num, int width, const char * command_params,const char * command, const char * root_host ,int root_ch_num, int mpirun_port, int ppid) { - +void md_base_init(int ch_num, int width, const char * remote_sh_command, const char * remote_sh_args, + int mpirun_port, int ppid, const char * root_host, int root_ch_num, char * wd, + int num_of_params, const char * mpi_params, const char * command) +{ if ( !(md_entity.children_md_arr = malloc((width+ch_num+1) * sizeof(struct md_entry_t))) || !(md_entity.childrenList = malloc(ch_num * sizeof(struct childrenList_t))) ) { MD_SYS_ERROR(" malloc failed"); @@ -309,9 +326,13 @@ md_entity.root_hostname = (char * ) root_host; md_entity.tree_width = width; - md_entity.command_params = command_params; - md_entity.mpi_param_len = strlen(command_params); - md_entity.command = command; + md_entity.mpi_num_of_params = num_of_params; + md_entity.mpi_params = strdup(mpi_params); + md_entity.mpi_param_len = strlen(mpi_params); + md_entity.wd = strdup(wd); + md_entity.command = strdup(command); + md_entity.remote_sh_command = strdup(remote_sh_command); + md_entity.remote_sh_args = strdup(remote_sh_args); md_entity.max_fd_num = 0; md_entity.total_running_mds = 0; @@ -386,7 +407,7 @@ MD_PRINT(DDEBUG,"Waiting for children to connect...\n"); int loop = md_entity.total_running_jobs +1 ; - //TODO: The following block should be spinned-of as a private method (function) + /* TODO: The following block should be spinned-of as a private method (function) */ struct timeval timeout; timeout.tv_sec = ACCEPT_TIMEOUT; timeout.tv_usec = 0; @@ -415,7 +436,7 @@ exit(MD_EXIT_SYS_ERROR); } if (FD_ISSET (s, &mask)) { - //TODO : yet another try + /* TODO : yet another try */ /* There may not always be a connection waiting after a SIGIO is delivered or select(2) or poll(2) return a readability event because the connection might have been removed by an asynchronous network error or another thread before accept is called. If this happens then the call will block @@ -433,12 +454,12 @@ ? md_entity.max_fd_num : md_entity.children_md_arr[loop].fd) ; MD_PRINT(DDEBUG,"Another one had connected to parent\n"); md_send_init_message(md_entity.children_md_arr[loop].fd); - /*children_md_arr[loop].sock_addr = clnt_sock_name;*/ //TODO ? + /*children_md_arr[loop].sock_addr = clnt_sock_name;*/ /* TODO ? */ ++loop; } break; } - //TODO real update socket status with time delta + /* TODO real update socket status with time delta */ timeout.tv_sec = ACCEPT_TIMEOUT; timeout.tv_usec = 0; FD_ZERO(&mask); @@ -498,7 +519,7 @@ exit(MD_EXIT_SYS_ERROR); } - //TODO host->h_addr_list + /* TODO host->h_addr_list */ memcpy ( (char*) &srv_sock_name.sin_addr, host->h_addr, host->h_length); @@ -624,21 +645,25 @@ /* Launch the tree of minidaemons over the cluster */ +#define NPROC_LEN 8 +#define WIDTH_LEN 8 void md_start_node_tree() { char * minidaemon_command; minidaemon_command = mkstr("%s%s%s", XSTR(MPI_PREFIX),XSTR(MPI_BIN), XSTR(MINIDAEMON_CLIENT_NAME) ); - /* STR_CONCAT(XSTR(MPI_PREFIX), XSTR(MPI_BIN), XSTR(MINIDAEMON_CLIENT_NAME)) ; // XSTR(MINIDAEMON_CLIENT_NAME) ; */ + /* STR_CONCAT(XSTR(MPI_PREFIX), XSTR(MPI_BIN), XSTR(MINIDAEMON_CLIENT_NAME)) ; XSTR(MINIDAEMON_CLIENT_NAME) ; */ int nproc, width; - const int nproc_len = 8; - const int width_len = 8; - char nproc_str[nproc_len]; - char root_nproc_str[nproc_len]; - char width_str[width_len]; - char mpiport_str[nproc_len]; - char ppid_str[nproc_len]; - char array_it_str[nproc_len]; + char nproc_str[NPROC_LEN]; + char root_nproc_str[NPROC_LEN]; + char width_str[WIDTH_LEN]; + char mpiport_str[NPROC_LEN]; + char ppid_str[NPROC_LEN]; + char array_it_str[NPROC_LEN]; + char num_of_params_str[NPROC_LEN]; + int md_node_counter; + ChildrenListIterator i, j, own_jobs_num; + int md_step; + int md_total; - /* It's very important caclulation According to the algorithm, the minidaemon should run its own jobs , then divide the rest of nodes array between (maximum) width minidaemons. @@ -647,15 +672,17 @@ After, MD2 will run an additional MD3 with array={host3} (lead mindaemon ) and MD4 will run an additional MD5 with array = {host5, host5} */ - int md_node_counter = md_get_node_counter(); + + md_node_counter = md_get_node_counter(); if (md_node_counter < 1) { md_entity.isLeaf = 1; MD_PRINT(DDEBUG,"We have reached the bottom level : this minidaemon is a leaf\n"); return ; } - ChildrenListIterator i = getListNextNode(0,1); - int md_step = (md_node_counter / md_entity.tree_width) + (md_node_counter % md_entity.tree_width ? 1 : 0); - int md_total = md_node_counter; + + i = getListNextNode(0,1); + md_step = (md_node_counter / md_entity.tree_width) + (md_node_counter % md_entity.tree_width ? 1 : 0); + md_total = md_node_counter; MD_PRINT(DDEBUG,"Starting node tree, total node counter is %d, md_step = %d \n",md_node_counter,md_step); while (md_node_counter > 0) { md_node_counter -= md_step; @@ -663,7 +690,6 @@ md_entity.childrenList[i].child_list_len = nproc; MD_PRINT(DDEBUG," nproc=%d , i=%d\n",nproc,i); assert (nproc > 0); - sprintf(width_str, "%d " ,md_entity.tree_width); sprintf(nproc_str, "%d " , nproc); @@ -671,34 +697,93 @@ sprintf(mpiport_str, "%d ", md_entity.mpirun_port); sprintf(ppid_str, "%d ", md_entity.ppid); sprintf(array_it_str,"%d ", i); + sprintf(num_of_params_str,"%d ", md_entity.mpi_num_of_params); md_entity.childrenList[i].pid=fork(); if (md_entity.childrenList[i].pid == 0) { int j; - MD_PRINT(DDEBUG,"Starting Child Minidaemon No. %d of %d at host number %s ,Exec command : %s\n", - md_total-md_node_counter, md_total,md_entity.childrenList[i].hostname, minidaemon_command); + MD_PRINT(DDEBUG,"Starting Child Minidaemon No. %d of %d at host number %s ,Exec command : %s Parent %d\n", + md_total-md_node_counter, md_total,md_entity.childrenList[i].hostname, minidaemon_command, md_entity.isParent); if (!md_entity.isParent) close(md_entity.parent_fd); - execl(RSH_CMD,RSH_CMD,md_entity.childrenList[i].hostname,minidaemon_command, - md_entity.childrenList[0].hostname,nproc_str,width_str,md_entity.command_params, - md_entity.command,md_entity.root_hostname, root_nproc_str,mpiport_str, ppid_str, array_it_str,NULL); + MD_PRINT(DDEBUG,"Running RSH/SSH: %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s\n", + md_entity.remote_sh_command, + md_entity.remote_sh_command, + md_entity.remote_sh_args, + md_entity.childrenList[i].hostname , + minidaemon_command , + md_entity.childrenList[0].hostname, + nproc_str, + width_str, + md_entity.remote_sh_command, + md_entity.remote_sh_args, + md_entity.root_hostname, + root_nproc_str, + mpiport_str, + ppid_str, + array_it_str, + md_entity.wd, + num_of_params_str, + md_entity.mpi_params, + md_entity.command ); + + if (strcmp(md_entity.remote_sh_args,"NOPARAM")) { + execl(md_entity.remote_sh_command, + md_entity.remote_sh_command, + md_entity.remote_sh_args, + md_entity.childrenList[i].hostname, + minidaemon_command, + md_entity.childrenList[0].hostname, + nproc_str, + width_str, + md_entity.remote_sh_command, + md_entity.remote_sh_args, + md_entity.root_hostname, + root_nproc_str, + mpiport_str, + ppid_str, + array_it_str, + md_entity.wd, + num_of_params_str, + md_entity.mpi_params, + md_entity.command, NULL); + } else { + execl(md_entity.remote_sh_command, + md_entity.remote_sh_command, + md_entity.childrenList[i].hostname, + minidaemon_command, + md_entity.childrenList[0].hostname, + nproc_str, + width_str, + md_entity.remote_sh_command, + md_entity.remote_sh_args, + md_entity.root_hostname, + root_nproc_str, + mpiport_str, + ppid_str, + array_it_str, + md_entity.wd, + num_of_params_str, + md_entity.mpi_params, + md_entity.command, NULL); + } /*If we've reached this line, ssh failed*/ MD_SYS_ERROR ("RSH/SSH command failed!"); - } - int j; + } i += nproc; for (j = 0 ; j < i ; ++j) { - md_entity.childrenList[i].proc_state = MD_NORMAL; //TODO insert negative number to status message + md_entity.childrenList[j].proc_state = MD_NORMAL; /* TODO insert negative number to status message */ } md_entity.total_running_mds +=1; - int own_jobs_num = (int) getListNextNode(0,1); + own_jobs_num = getListNextNode(0,1); md_entity.children_md_arr[own_jobs_num + md_entity.total_running_mds].stat = MD_WORK; md_entity.children_md_arr[own_jobs_num + md_entity.total_running_mds].resp_stat = !resp_status_def; MD_PRINT(DDEBUG,"own_jobs_num=%d, md_entity.total_running_mds=%d, !resp_status_def = %d\n", own_jobs_num,md_entity.total_running_mds,!resp_status_def); } + return ; /* parent process */ } @@ -714,7 +799,7 @@ char rank_str[max_rank_len]; char port_str[max_port_len]; int curr_param_len = 0; - static const char env_command[13]="/usr/bin/env"; + do { int pd[2]; @@ -726,6 +811,7 @@ md_build_command_string(i/*,&curr_command*/); MD_PRINT(DDEBUG," : Starting children processes\n"); if ((md_entity.childrenList[i].pid = fork())== 0) { /* Affinity should be set in vianit.c */ + char **params; close(pd[0]); /* closing read side of pipe in the child process */ /* close all other filedescriptors, i.e. all except */ @@ -735,19 +821,29 @@ } MD_PRINT(DINFO,"Starting child process: %s\n", md_entity.command); - execl(md_entity.command, md_entity.command, NULL); + setenv_mpi_params(); + MD_PRINT(DDEBUG,"cd: %s\n", md_entity.wd); + if (chdir(md_entity.wd) < 0) { + MD_SYS_ERROR("Failed change directory\n"); + exit(MD_EXIT_SYS_ERROR); + } + /* Run the application */ + params = str_to_argv(md_entity.command); + if (NULL == params) { + MD_SYS_ERROR("Failed to allocate memory for argv\n"); + exit(MD_EXIT_SYS_ERROR); + } + execv(params[0], params); MD_SYS_ERROR("execl failed"); - - } - else { + } else { /* close write side */ free(curr_command); close(pd[1]); - md_entity.children_md_arr[i].stat = MD_WORK; //TODO - REPAIR THIS LINE + md_entity.children_md_arr[i].stat = MD_WORK; /* TODO - REPAIR THIS LINE */ md_entity.childrenList[i].proc_state = MD_NORMAL; md_entity.children_md_arr[i].resp_stat = !resp_status_def; ++i; - ++md_entity.total_running_jobs; //TODO = i + ++md_entity.total_running_jobs; /* TODO = i */ md_entity.max_fd_num = ( md_entity.max_fd_num > pd[0] ? md_entity.max_fd_num : pd[0]) ; } @@ -761,9 +857,118 @@ /* Free the resources */ free(curr_command_params); curr_command_params = NULL; - //TODO while (i < MIN (md_entity.ch_num , getNextListNode(0,1)) + /* TODO while (i < MIN (md_entity.ch_num , getNextListNode(0,1)) */ } +static char** str_to_argv(const char *str) { + char **argv=NULL; + char *delims = " \t"; + int count = 0; + char* word; + char* l_str=strdup(str); + + if (NULL == l_str) { + MD_SYS_ERROR("Failed to allocate memory for l_str in md_str_to_argv\n"); + return NULL; + } + + word = strtok(l_str, delims); + while (NULL != word) { + count++; + word = strtok(NULL, delims); + } + + argv = (char**)malloc(sizeof(char*) * (count + 1)); /* +1 for NULL */ + if (NULL == argv) { + MD_SYS_ERROR("Failed to allocate memory for argv in md_str_to_argv\n"); + return NULL; + } + + count = 0; + free(l_str); + l_str=strdup(str); + word = strtok(l_str, delims); + MD_PRINT(DDEBUG, "Adding param to argv : %s\n", word); + while (NULL != word) { + argv[count] = strdup(word); + if (NULL == argv[count]) { + MD_SYS_ERROR("Failed to allocate memory for argv element in md_str_to_argv\n"); + return NULL; + } + count ++; + word = strtok(NULL, delims); + MD_PRINT(DDEBUG, "Adding param to argv : %s\n", word); + } + argv[++count] = NULL; + return argv; +} + +char* md_argv_to_string(const int start, const int end, char **argv ) { + int i; + char *res = NULL; + for (i = start; i < end; i++) { + res = add_word_to_string(res, argv[i]); + } + return res; +} + +#define STR_DEF_LEN 128 +static char* add_word_to_string(char *str, const char *elem ) +{ + int str_left; + int elem_len; + + if (NULL == str) { + if (NULL == (str = (char*)malloc(sizeof(char) * STR_DEF_LEN))) { + MD_SYS_ERROR("Malloc of str failed in md_add_to_string\n"); + } + str_left = STR_DEF_LEN - 1; + } else { + str_left = STR_DEF_LEN - (strlen(str) + 1) - 1; + } + + elem_len = strlen(elem); + if (elem_len > str_left) { + int len = + (STR_DEF_LEN > elem_len + 1 ? STR_DEF_LEN : elem_len + 1) + strlen(str); + if ((str = realloc(str, len)) == NULL) { + MD_SYS_ERROR("Realloc failed in md_env_to_string\n"); + } + str_left = STR_DEF_LEN - 1; + } + str = strcat(str, " "); + str = strcat(str, elem); + return str; +} + +static void setenv_mpi_params() +{ + char *env_last; + char env_delims[] = "="; + char *env_list = NULL; + char *elem_last; + char list_delims[] = " \t"; + char *elem_list = strdup(md_entity.mpi_params); + + env_list = strtok_r(elem_list, list_delims, &elem_last); + while (NULL != env_list) { + char *name, *value; + name = strtok_r(env_list, env_delims, &env_last); + if (NULL == name ) { + MD_SYS_ERROR("Failed to read name parametr\n"); + } + + value = strtok_r(NULL, env_delims, &env_last); + if (NULL == value) { + MD_SYS_ERROR("Failed to read value parametr\n"); + } + /* Setting envarement */ + MD_PRINT(DDEBUG,"Setenv: %s=%s\n", name, value); + setenv(name, value, 1); + env_list = strtok_r(NULL, list_delims, &elem_last); + } +} + void md_build_command_string(int i /*, char ** exec_command*/) { char *xterm_command; char xterm_title[100]; @@ -805,8 +1010,6 @@ setenv("VIADEV_PORT",port_str,1); } - //TODO insert MPI PARAMETER parsing - ld_library_path = getenv("LD_LIBRARY_PATH"); MD_PRINT(DDEBUG,"LD_LIBRARY_PATH=%s\n",ld_library_path); @@ -841,7 +1044,7 @@ } -//TODO rebuild minidaemonListen() to be a "Class" with general methods, printf("Hostname received: %s\n",passed by parameters +/* TODO rebuild minidaemonListen() to be a "Class" with general methods, printf("Hostname received: %s\n",passed by parameters */ void md_general_listen() { /* Registrate Event Handlers, if any */ @@ -870,7 +1073,7 @@ MD_PRINT(DDEBUG,"The sockets table for process No. %d are:",md_entity.pid); for (j=0; j < 1 + md_entity.total_running_jobs + md_entity.total_running_mds; ++j) MD_PRINT(DDEBUG,"%d, ",md_entity.children_md_arr[j].fd); - fprintf(stderr,"\n"); + /* fprintf(stderr,"\n"); */ /*signal(SIGPIPE, sigpipe_handler);*/ signal(SIGTERM, sigpipe_handler); @@ -928,7 +1131,7 @@ md_handler(md_entity.children_md_arr[i].fd,i); /*for parent and children md messages */ md_update_socket_status(i,(int)(time(NULL) - time_wasted)); } - //TODO : res optimization + /* TODO : res optimization */ /*if(--res == 0 ) break;*/ } @@ -965,13 +1168,13 @@ and timeout_other is timeout for own-jobs-termination */ case MD_TERM : /* here we should wait only for children process and children minidaemons */ - //TODO propagation delay, i.e. to_value = f(base_to_value,tree_depth) + /* TODO propagation delay, i.e. to_value = f(base_to_value,tree_depth) */ sleep(1); if (md_entity.total_running_jobs !=0) { md_forced_cleanup_handler(); } /* the last status Message we've send */ - //TODO Send Timeout with forced FINISH + /*TODO Send Timeout with forced FINISH */ if (!md_entity.isParent) { md_send_status_message(md_entity.total_running_jobs,-1); } @@ -995,7 +1198,6 @@ /*md_entity.children_md_arr[ind].stat = */ waitpid (md_entity.childrenList[ind].pid,&loc_state,WNOHANG); MD_PRINT(DDEBUG," Job %d was finished with status %d\n",md_entity.childrenList[ind].pid, WEXITSTATUS (loc_state)); - //MD_PRINT(DINFO,"The exit status of job %d is %d\n",ind,WEXITSTATUS (loc_state)); md_entity.childrenList[ind].proc_state = (WIFEXITED(loc_state) ? WEXITSTATUS (loc_state) : -1); MD_PRINT(DINFO,"The exit status of job %d is %d\n",ind,md_entity.childrenList[ind].proc_state); md_entity.md_exit_value = (md_entity.childrenList[ind].proc_state == MD_EXIT_NORMAL @@ -1039,23 +1241,21 @@ void md_print_status_message() { int i; if (md_entity.md_exit_value == MD_EXIT_NORMAL) { - MD_PRINT(DNONE,"All user jobs finished normally. Minidaemon will shutdown now\n"); + MD_PRINT(DDEBUG,"All user jobs finished normally. Minidaemon will shutdown now\n"); } else { - /*MD_PRINT(DNONE,"MPI run finished,printing the exit status of all mpi jobs: ");*/ - fprintf(stderr,"One ore more user jobs finished with bad status, printing:\n"); - fflush(stderr); + MD_PRINT(DDEBUG,"MPI run finished,printing the exit status of all mpi jobs: "); for (i = 0; i < md_entity.ch_num ; ++i) { - fprintf(stderr,"%d ", md_entity.childrenList[i].proc_state); fflush(stderr); + MD_PRINT(DDEBUG,"%d ", md_entity.childrenList[i].proc_state); } - fprintf(stderr,"\n"); fflush(stderr); + /* fprintf(stderr,"\n"); fflush(stderr); */ } } -//TODO array of pointer to appropriate function , i.e. func_array[msg_buff[0]].handler(); +/* TODO array of pointer to appropriate function , i.e. func_array[msg_buff[0]].handler(); */ void md_handler(int sd, int i) { - static int msg_buf[MAX_MESSAGE_SIZE]; //TODO spin it out + static int msg_buf[MAX_MESSAGE_SIZE]; /* TODO spin it out */ static const int small_msg_size = sizeof(int) * 2; static const int big_msg_size = sizeof(int) * 4; int n = 0; @@ -1071,7 +1271,7 @@ return; } MD_PRINT(DDEBUG,"Handling incoming message of size %d\n",n); - //TODO Once we got msg_buf[0], we can read the rest of the data (md_read_gen_type) according to its type + /* TODO Once we got msg_buf[0], we can read the rest of the data (md_read_gen_type) according to its type */ switch ( msg_buf[0]) { case STATUS_REQUEST: @@ -1160,12 +1360,12 @@ } -// TODO : define which set - parent, md or proc children should be set +/* TODO : define which set - parent, md or proc children should be set */ void md_rebuild_mask(fd_set * mask) { int i; FD_ZERO(mask); - //TODO not curr_running, but total_running; + /* TODO not curr_running, but total_running; */ MD_PRINT(DDEBUG,"\n"); for (i = 0 ; i < md_entity.total_running_jobs; ++i) { if (md_entity.children_md_arr[i].stat != MD_do_not_set) { @@ -1287,7 +1487,7 @@ } } -//TODO optimize with receive width times at the client side +/* TODO optimize with receive width times at the client side */ void md_send_init_message(int fd) { MD_PRINT(DDEBUG," : starting\n"); ChildrenListIterator it = 0, array_it; Modified: mvapich/trunk/mpid/ch_gen2/process/minidaemon.h =================================================================== --- mvapich/trunk/mpid/ch_gen2/process/minidaemon.h 2008-01-03 17:11:28 UTC (rev 1758) +++ mvapich/trunk/mpid/ch_gen2/process/minidaemon.h 2008-01-03 19:25:29 UTC (rev 1759) @@ -44,21 +44,24 @@ /* Support for debug prints. */ -/*#define DNONE 0*/ /* Message with this debug level will be always printed */ -/*#define DINFO 1*/ /* Info messages level is applicable for rare and informative messages */ -/*#define DDEBUG 2 *//* Debug level, should be turned off in normal run */ -/*#define DPATH 3 *//* The highest level, for use in loops or critical sections */ +#define DNONE 0 /* Message with this debug level will be always printed */ +#define DINFO 1 /* Info messages level is applicable for rare and informative messages */ +#define DDEBUG 2 /* Debug level, should be turned off in normal run */ +#define DPATH 3 /* The highest level, for use in loops or critical sections */ -/*#define DGLOBAL_LEVEL 3*/ +#ifndef DGLOBAL_LEVEL +#define DGLOBAL_LEVEL -1 +#endif /* DGLOBAL_LEVEL should be defined in compilation stage, otherwise it should be zero */ -/*#define MD_PRINT(dlevel,fmt, args...) {if (dlevel <= DGLOBAL_LEVEL){\ +#if DGLOBAL_LEVEL >= 0 +#define MD_PRINT(dlevel,fmt, args...) {if (dlevel <= DGLOBAL_LEVEL){\ fprintf(stderr, "[%s:%d, pid=%d]", __FILE__, __LINE__,getpid());\ - fprintf(stderr, fmt, ## args); fflush(stderr);}}*/ - + fprintf(stderr, fmt, ## args); fflush(stderr);}} +#else +#define MD_PRINT(dlevel,fmt, args...) +#endif - - /** Minidaemon ADT provided by Mellanox, MPI Team @@ -93,13 +96,17 @@ typedef struct minidaemon_t * Minidaemon; /*typedef struct process_t process;*/ -void minidaemon_create(process * procList, int nproc, int width,const char * command_params,const char * command, int mpirun_port,int ppid); +void minidaemon_create(process * procList, int nproc, int width, int mpirun_port, int ppid, + const char * remote_sh_command, const char * remote_sh_args, char * wd, int num_of_params, + const char * mpi_params, const char * command); /* Init all data member structures */ -void minidaemon_init (const char * par_name , int ch_num, int width, const char * command_params,const char * command, - const char * root_hostname, int root_ch_num,int mpirun_port, int ppid, int array_it); +void minidaemon_init ( const char * par_name , int ch_num, int width, const char * remote_sh_command, + const char * remote_sh_args,const char * root_host , int root_ch_num, int mpirun_port, + int ppid, int array_it, char * wd, int num_params, const char * mpi_params, const char * command); /* Start listening to messages from other Minidaemons */ void minidaemon_run (); +char * md_argv_to_string(int start, int end, char **argv ); #endif /* _MINIDAEMON_H */ Modified: mvapich/trunk/mpid/ch_gen2/process/minidaemon_client.c =================================================================== --- mvapich/trunk/mpid/ch_gen2/process/minidaemon_client.c 2008-01-03 17:11:28 UTC (rev 1758) +++ mvapich/trunk/mpid/ch_gen2/process/minidaemon_client.c 2008-01-03 19:25:29 UTC (rev 1759) @@ -1,31 +1,54 @@ #include #include "minidaemon.h" -#define DEFAULT_PARAM_NUMBER 10 -#define NAME_INDEX 1 -#define NUM_INDEX 2 -#define WIDTH_INDEX 3 -#define COM_PAR_INDEX 4 -#define COM_INDEX 5 -#define ROOT_NAME 6 -#define ROOT_CH_NUM 7 -#define MPIRUN_PORT_INDEX 8 -#define PID_INDEX 9 -#define ARRAY_IT_INDEX 10 +#define NAME_INDEX 1 +#define NUM_INDEX 2 +#define WIDTH_INDEX 3 +#define REM_SH_COM_INDEX 4 +#define REM_SH_ARGS_INDEX 5 +#define ROOT_NAME 6 +#define ROOT_CH_NUM 7 +#define MPIRUN_PORT_INDEX 8 +#define PID_INDEX 9 +#define ARRAY_IT_INDEX 10 +#define WD_INDEX 11 +#define NUM_PARAMS_INDEX 12 +#define PREFIX 13 +#define DEFAULT_PARAM_NUMBER (PREFIX + 1) + int main (int argc, char * argv[]) { int width; + char *mpi_params = NULL; + char *command = NULL; + + MD_PRINT(DDEBUG,"client main: Starting\n"); + /* some basic checks */ if (argc < DEFAULT_PARAM_NUMBER) { MD_USR_ERROR("Minidaemon client : too few parameters to run minidaemon"); + exit(MD_EXIT_SYS_ERROR); } if ( (width = atoi(argv[WIDTH_INDEX])) < 1) { MD_USR_ERROR("Minidaemon client : invalid tree width received"); + exit(MD_EXIT_SYS_ERROR); } - /** Minidaemon functions will exit on error **/ - /*void minidaemon_init (const char * par_name , int ch_num, int width, const char * command_params,const char * command, int root_ch_num);*/ - minidaemon_init(argv[NAME_INDEX],atoi(argv[NUM_INDEX]),atoi(argv[WIDTH_INDEX]), argv[COM_PAR_INDEX],argv[COM_INDEX], - argv[ROOT_NAME],atoi(argv[ROOT_CH_NUM]),atoi(argv[MPIRUN_PORT_INDEX]), atoi(argv[PID_INDEX]), atoi(argv[ARRAY_IT_INDEX])); + mpi_params = md_argv_to_string(PREFIX, PREFIX + atoi(argv[NUM_PARAMS_INDEX]), argv); + if (NULL == mpi_params) { + MD_SYS_ERROR("Failed to allocate memory for mpi_params\n"); + exit(MD_EXIT_SYS_ERROR); + } + + command = md_argv_to_string(PREFIX + atoi(argv[NUM_PARAMS_INDEX]), argc, argv); + if (NULL == command) { + MD_SYS_ERROR("Failed to allocate memory for mpi_params\n"); + exit(MD_EXIT_SYS_ERROR); + } + minidaemon_init (argv[NAME_INDEX], atoi(argv[NUM_INDEX]), atoi(argv[WIDTH_INDEX]), + argv[REM_SH_COM_INDEX], argv[REM_SH_ARGS_INDEX], argv[ROOT_NAME], atoi(argv[ROOT_CH_NUM]), + atoi(argv[MPIRUN_PORT_INDEX]), atoi(argv[PID_INDEX]), atoi(argv[ARRAY_IT_INDEX]), + argv[WD_INDEX], atoi(argv[NUM_PARAMS_INDEX]), mpi_params, command); + minidaemon_run(); return 0; } Modified: mvapich/trunk/mpid/ch_gen2/process/mpirun_rsh.c =================================================================== --- mvapich/trunk/mpid/ch_gen2/process/mpirun_rsh.c 2008-01-03 17:11:28 UTC (rev 1758) +++ mvapich/trunk/mpid/ch_gen2/process/mpirun_rsh.c 2008-01-03 19:25:29 UTC (rev 1759) @@ -64,13 +64,14 @@ int use_xlauncher = 0; int xlauncher_width = 8; int aout_index, port; -#define MAX_WD_LEN 256 -char wd[MAX_WD_LEN]; /* working directory of current process */ +char *wd; /* working directory of current process */ #define MAX_HOST_LEN 256 char mpirun_host[MAX_HOST_LEN]; /* hostname of current process */ /* xxx need to add checking for string overflow, do this more carefully ... */ char * mpispawn_param_env = NULL; int param_count = 0, legacy_startup = 0; +#define ENV_LEN 1024 +#define LINE_LEN 256 /* * Message notifying user of what timed out @@ -92,10 +93,11 @@ int start_process(int i, char *command_name, char *env); void cleanup(void); char *skip_white(char *s); -char *read_param_file(char *paramfile,char *env); +int read_param_file(char *paramfile,char **env); void process_termination(void); void wait_for_errors(int s,struct sockaddr *sockaddr,unsigned int sockaddr_len); int set_fds(fd_set * rfds, fd_set * efds); +static int read_cmdline_to_env(char **env, char *argv[]); static int read_hostfile(char *hostfile_name); void make_command_strings(int argc, char * argv[], char * totalview_cmd, char * command_name, char * command_name_tv); void mpispawn_checkin(int, struct sockaddr *, unsigned int); @@ -169,6 +171,7 @@ int addrlen, global_addrlen = 0; char *env = "\0"; + int num_of_params = 0; int tot_nread = 0; int *alladdrs = NULL; @@ -321,12 +324,12 @@ /* reading default param file */ if ( 0 == (access(PARAM_GLOBAL, R_OK))) { - env=read_param_file(PARAM_GLOBAL,env); + num_of_params += read_param_file(PARAM_GLOBAL,&env); } /* reading file specified by user env */ if (( param_env = getenv("MVAPICH_DEF_PARAMFILE")) != NULL ){ - env = read_param_file(param_env, env); + num_of_params += read_param_file(param_env, &env); } if (paramfile_on) { /* construct a string of environment variable definitions from @@ -334,7 +337,7 @@ * will be available to the remote processes, which * will use them to over-ride default parameter settings */ - env = read_param_file(paramfile, env); + num_of_params += read_param_file(paramfile, env); } @@ -385,7 +388,7 @@ mpirun_processes[0] = '\0'; } - getcwd(wd, MAX_WD_LEN); + wd = get_current_dir_name(); gethostname(mpirun_host, MAX_HOST_LEN); get_display_str(); @@ -452,30 +455,36 @@ if (use_xlauncher) { md_id = fork(); if (md_id == 0) { + assert (NULL != env); char command_name[COMMAND_LEN]; char command_name_tv[COMMAND_LEN]; char *ld_library_path; char *mpi_prefix; + + /* For minidemon we must to pass some parameter */ + num_of_params += read_cmdline_to_env(&env, argv); make_command_strings(argc, argv, totalview_cmd, command_name, command_name_tv); - fprintf(stderr,"Creating the instance of minidaemon, MPI command should be %s\n",command_name); ld_library_path = getenv("LD_LIBRARY_PATH"); - if (ld_library_path != NULL) { - fprintf(stderr," Setting LD_LIBRARY_PATH = %s\n",ld_library_path); - setenv("LD_LIBRARY_PATH",ld_library_path,1); - } - mpi_prefix = getenv("MPI_PREFIX"); - if (mpi_prefix != NULL) { - fprintf(stderr," Setting MPI_PREFIX = %s\n",mpi_prefix); - setenv("MPI_PREFIX",mpi_prefix,1); - } + if (ld_library_path != NULL) { + fprintf(stderr," Setting LD_LIBRARY_PATH = %s\n",ld_library_path); + setenv("LD_LIBRARY_PATH",ld_library_path,1); + } - minidaemon_create(plist,nprocs,xlauncher_width,"VIADEV_PARAM=DEMO_PARAM",command_name, port, getpid()); + mpi_prefix = getenv("MPI_PREFIX"); + if (mpi_prefix != NULL) { + fprintf(stderr," Setting MPI_PREFIX = %s\n",mpi_prefix); + setenv("MPI_PREFIX",mpi_prefix,1); + } + minidaemon_create(plist, nprocs, xlauncher_width, port, getpid(), + use_rsh ? RSH_CMD : SSH_CMD, use_rsh ? RSH_ARG : SSH_ARG, wd, + strlen(env) ? num_of_params : 1, + strlen(env) ? env : "VIADEV_PARAM=DEMO_PARAM", + command_name); + minidaemon_run(); /* minidaemon should not reach this line in normal flow, only special signal can be the reason */ - MD_PRINT(DNONE,"Finished minidaemon,exiting\n"); exit(MD_EXIT_MINIDAEMON_SIG); - } } @@ -920,6 +929,50 @@ return s; } +static int read_cmdline_to_env(char **env, char *argv[]) +{ + int env_left, e_len; + char *buf = NULL; + int counter = 0; + + if (0 == strlen(*env)) { + /* Allocating space for env first time */ + if (NULL == (*env = malloc(sizeof(char) * ENV_LEN))) { + fprintf(stderr, "Malloc of env failed in read_param_file\n"); + exit(EXIT_FAILURE); + } + env_left = ENV_LEN - 1; + } else { + /* already allocated */ + env_left = ENV_LEN - (strlen(*env) + 1) - 1; + } + + while(strchr(argv[aout_index], '=')) { + buf = strdup(argv[aout_index++]); + + e_len = strlen(buf); + if (e_len > env_left) { + /* oops, need to grow env string */ + int newlen = + (ENV_LEN > e_len + 1 ? ENV_LEN : e_len + 1) + strlen(*env); + if ((*env = realloc(*env, newlen)) == NULL) { + fprintf(stderr, "realloc failed in read_param_file\n"); + exit(EXIT_FAILURE); + } + if (param_debug) { + printf("realloc to %d\n", newlen); + } + env_left = ENV_LEN - 1; + } + strcat(*env, " "); + strcat(*env, buf); + ++counter; + env_left -= e_len; + free(buf); + } + return counter; +} + /* Read hostfile */ static int read_hostfile(char *hostfile_name) { @@ -1014,9 +1067,7 @@ * something like: * rsh remote_host "cd workdir; env ENVNAME=value ... command" */ -#define ENV_LEN 1024 -#define LINE_LEN 256 -char *read_param_file(char *paramfile,char *env) +int read_param_file(char *paramfile,char **env) { FILE *pf; char errstr[256]; @@ -1026,6 +1077,7 @@ char *p, * tmp; int num, e_len; int env_left = 0; + int num_params = 0; if ((pf = fopen(paramfile, "r")) == NULL) { sprintf(errstr, "Cant open paramfile = %s", paramfile); @@ -1033,16 +1085,16 @@ exit(EXIT_FAILURE); } - if ( strlen(env) == 0 ){ + if ( strlen(*env) == 0 ){ /* Allocating space for env first time */ - if ((env = malloc(ENV_LEN)) == NULL) { + if ((*env = malloc(ENV_LEN)) == NULL) { fprintf(stderr, "Malloc of env failed in read_param_file\n"); exit(EXIT_FAILURE); } env_left = ENV_LEN - 1; }else{ /* already allocated */ - env_left = ENV_LEN - (strlen(env) + 1) - 1; + env_left = ENV_LEN - (strlen(*env) + 1) - 1; } while (fgets(line, LINE_LEN, pf) != NULL) { @@ -1068,6 +1120,7 @@ /* construct the environment string */ buf[0] = '\0'; sprintf(buf, "%s=%s ", name, value); + ++num_params; if(mpispawn_param_env) { tmp = mkstr("%s MPISPAWN_GENERIC_NAME_%d=%s" @@ -1105,8 +1158,8 @@ if (e_len > env_left) { /* oops, need to grow env string */ int newlen = - (ENV_LEN > e_len + 1 ? ENV_LEN : e_len + 1) + strlen(env); - if ((env = realloc(env, newlen)) == NULL) { + (ENV_LEN > e_len + 1 ? ENV_LEN : e_len + 1) + strlen(*env); + if ((*env = realloc(*env, newlen)) == NULL) { fprintf(stderr, "realloc failed in read_param_file\n"); exit(EXIT_FAILURE); } @@ -1115,16 +1168,16 @@ } env_left = ENV_LEN - 1; } - strcat(env, buf); + strcat(*env, buf); env_left -= e_len; if (param_debug) { printf("Added: [%s]\n", buf); - printf("env len = %d, env left = %d\n", strlen(env), env_left); + printf("env len = %d, env left = %d\n", strlen(*env), env_left); } } fclose(pf); - return env; + return num_params; } void cleanup_handler(int sig) @@ -1906,6 +1959,7 @@ { int i; if (debug_on) { + fprintf (stderr,"debug enabled !\n"); char keyval_list[COMMAND_LEN]; sprintf(keyval_list, "%s", " "); /* Take more env variables if present */ Modified: mvapich/trunk/mpid/ch_gen2/process/mpirun_rsh.h =================================================================== --- mvapich/trunk/mpid/ch_gen2/process/mpirun_rsh.h 2008-01-03 17:11:28 UTC (rev 1758) +++ mvapich/trunk/mpid/ch_gen2/process/mpirun_rsh.h 2008-01-03 19:25:29 UTC (rev 1759) @@ -42,6 +42,7 @@ #define SSH_CMD "/usr/bin/ssh" #define XTERM "/usr/X11R6/bin/xterm" #define SSH_ARG "-q" +#define RSH_ARG "NOPARAM" #ifndef LD_LIBRARY_PATH_MPI #define LD_LIBRARY_PATH_MPI "/usr/mvapich/lib/shared" @@ -81,27 +82,6 @@ #define PMGR_VERSION PMGR_COLLECTIVE -/* Support for debug prints. */ - -#define DNONE 0 /* Message with this debug level will be always printed */ -#define DINFO 1 /* Info messages level is applicable for rare and informative messages */ -#define DDEBUG 2 /* Debug level, should be turned off in normal run */ -#define DPATH 3 /* The highest level, for use in loops or critical sections */ - -#ifndef DGLOBAL_LEVEL -#define DGLOBAL_LEVEL 0 -#endif - -/* DGLOBAL_LEVEL should be defined in compilation stage, otherwise it should be zero */ -#if DGLOBAL_LEVEL >= 0 -#define MD_PRINT(dlevel,fmt, args...) {if (dlevel <= DGLOBAL_LEVEL){\ - fprintf(stderr, "[%s:%d, pid=%d]", __FILE__, __LINE__,getpid());\ - fprintf(stderr, fmt, ## args); fflush(stderr);}} -#else -#define MD_PRINT(dlevel,fmt, args...) -#endif - - typedef enum { P_NOTSTARTED, P_STARTED, Modified: mvapich/trunk/mpid/ch_gen2/process/mpispawn.c =================================================================== --- mvapich/trunk/mpid/ch_gen2/process/mpispawn.c 2008-01-03 17:11:28 UTC (rev 1758) +++ mvapich/trunk/mpid/ch_gen2/process/mpispawn.c 2008-01-03 19:25:29 UTC (rev 1759) @@ -137,7 +137,7 @@ if(lv.mpirun_rank != 0) { int fd = open("/dev/null", O_RDWR, 0); - //dup2(fd, STDIN_FILENO); + /* dup2(fd, STDIN_FILENO); */ } argc = env2int("MPISPAWN_ARGC"); From chail at mvapich.cse.ohio-state.edu Fri Jan 4 16:05:45 2008 From: chail at mvapich.cse.ohio-state.edu (chail@mvapich.cse.ohio-state.edu) Date: Fri Jan 4 16:05:53 2008 Subject: [mvapich-commit] r1769 - mvapich/trunk Message-ID: <200801042105.m04L5jaE011536@mvapich.cse.ohio-state.edu> Author: chail Date: 2008-01-04 16:05:42 -0500 (Fri, 04 Jan 2008) New Revision: 1769 Modified: mvapich/trunk/make.mvapich.vapi Log: Remove the LAZY_MEM_UNREGISTER flag from default for VAPI device. Modified: mvapich/trunk/make.mvapich.vapi =================================================================== --- mvapich/trunk/make.mvapich.vapi 2008-01-04 20:09:52 UTC (rev 1768) +++ mvapich/trunk/make.mvapich.vapi 2008-01-04 21:05:42 UTC (rev 1769) @@ -84,7 +84,7 @@ export LIBS="-L${MTHOME_LIB} -lmtl_common -lvapi -lpthread -lmosal -lmpga $SUPPRESS" export FFLAGS="-L${MTHOME_LIB}" export CFLAGS="-D${ARCH} -DEARLY_SEND_COMPLETION -DSRQ -DADAPTIVE_RDMA_FAST_PATH \ - -DVIADEV_RPUT_SUPPORT -DLAZY_MEM_UNREGISTER -D_SMP_ -D_SMP_RNDV_ \ + -DVIADEV_RPUT_SUPPORT -D_SMP_ -D_SMP_RNDV_ \ $SUPPRESS -D${IO_BUS} -D${LINKS} \ ${HAVE_MPD_RING} -I${MTHOME}/include $OPT_FLAG" From chail at mvapich.cse.ohio-state.edu Fri Jan 4 16:30:08 2008 From: chail at mvapich.cse.ohio-state.edu (chail@mvapich.cse.ohio-state.edu) Date: Fri Jan 4 16:30:16 2008 Subject: [mvapich-commit] r1770 - mvapich/trunk Message-ID: <200801042130.m04LU8sa011570@mvapich.cse.ohio-state.edu> Author: chail Date: 2008-01-04 16:30:07 -0500 (Fri, 04 Jan 2008) New Revision: 1770 Modified: mvapich/trunk/make.mvapich.vapi_multirail Log: Remove the LAZY_MEM_UNREGISTER flag from default for vapi_multirail device. Modified: mvapich/trunk/make.mvapich.vapi_multirail =================================================================== --- mvapich/trunk/make.mvapich.vapi_multirail 2008-01-04 21:05:42 UTC (rev 1769) +++ mvapich/trunk/make.mvapich.vapi_multirail 2008-01-04 21:30:07 UTC (rev 1770) @@ -75,7 +75,7 @@ export LIBS="-L${MTHOME_LIB} -lmtl_common -lvapi -lpthread -lmosal -lmpga $SUPPRESS" export FFLAGS="-L${MTHOME_LIB}" export CFLAGS="-D${ARCH} -DUSE_INLINE -DUSE_MRAIL -DEARLY_SEND_COMPLETION -DRDMA_FAST_PATH \ - -DVIADEV_RPUT_SUPPORT -DLAZY_MEM_UNREGISTER -D_SMP_ -D_SMP_RNDV_ \ + -DVIADEV_RPUT_SUPPORT -D_SMP_ -D_SMP_RNDV_ \ $SUPPRESS \ ${HAVE_MPD_RING} -I${MTHOME}/include $OPT_FLAG" From chail at mvapich.cse.ohio-state.edu Fri Jan 4 16:54:39 2008 From: chail at mvapich.cse.ohio-state.edu (chail@mvapich.cse.ohio-state.edu) Date: Fri Jan 4 16:54:45 2008 Subject: [mvapich-commit] r1772 - mvapich2/trunk Message-ID: <200801042154.m04Lsdkf011623@mvapich.cse.ohio-state.edu> Author: chail Date: 2008-01-04 16:54:38 -0500 (Fri, 04 Jan 2008) New Revision: 1772 Modified: mvapich2/trunk/make.mvapich2.vapi Log: Remove the LAZY_MEM_UNREGISTER flag from default for VAPI device in mvapich2 trunk. Modified: mvapich2/trunk/make.mvapich2.vapi =================================================================== --- mvapich2/trunk/make.mvapich2.vapi 2008-01-04 21:32:56 UTC (rev 1771) +++ mvapich2/trunk/make.mvapich2.vapi 2008-01-04 21:54:38 UTC (rev 1772) @@ -116,7 +116,7 @@ export LD_LIBRARY_PATH=$MTHOME_LIB:$LD_LIBRARY_PATH export LIBS=${LIBS:--L${MTHOME_LIB} -lmtl_common -lvapi -lpthread -lmosal -lmpga $SUPPRESS} export FFLAGS=${FFLAGS:--L${MTHOME_LIB}} -export CFLAGS=${CFLAGS:--D${ARCH} -DONE_SIDED -DUSE_INLINE -DRDMA_FAST_PATH -DUSE_HEADER_CACHING -DLAZY_MEM_UNREGISTER -D_SMP_ -D${IO_BUS} -D${LINKS} -DMPIDI_CH3_CHANNEL_RNDV -DMPID_USE_SEQUENCE_NUMBERS -D${VCLUSTER} ${HAVE_MPD_RING} -I${MTHOME}/include $OPT_FLAG $SUPPRESS} +export CFLAGS=${CFLAGS:--D${ARCH} -DONE_SIDED -DUSE_INLINE -DRDMA_FAST_PATH -DUSE_HEADER_CACHING -D_SMP_ -D${IO_BUS} -D${LINKS} -DMPIDI_CH3_CHANNEL_RNDV -DMPID_USE_SEQUENCE_NUMBERS -D${VCLUSTER} ${HAVE_MPD_RING} -I${MTHOME}/include $OPT_FLAG $SUPPRESS} # Prelogue make distclean &>/dev/null From mamidala at mvapich.cse.ohio-state.edu Sun Jan 6 20:55:58 2008 From: mamidala at mvapich.cse.ohio-state.edu (mamidala@mvapich.cse.ohio-state.edu) Date: Sun Jan 6 20:56:05 2008 Subject: [mvapich-commit] r1809 - mvapich/trunk/mpid/ch_gen2_ud Message-ID: <200801070155.m071twsk002658@mvapich.cse.ohio-state.edu> Author: mamidala Date: 2008-01-06 20:55:58 -0500 (Sun, 06 Jan 2008) New Revision: 1809 Modified: mvapich/trunk/mpid/ch_gen2_ud/coll_shmem.h mvapich/trunk/mpid/ch_gen2_ud/shmem_coll.c Log: Applying ch_gen2 device changes for the shmem collectives Modified: mvapich/trunk/mpid/ch_gen2_ud/coll_shmem.h =================================================================== --- mvapich/trunk/mpid/ch_gen2_ud/coll_shmem.h 2008-01-07 01:31:35 UTC (rev 1808) +++ mvapich/trunk/mpid/ch_gen2_ud/coll_shmem.h 2008-01-07 01:55:58 UTC (rev 1809) @@ -91,21 +91,16 @@ #define SHMEM_BCAST_SIZE (1<<23) #define SHMEM_COLL_BLOCK_SIZE ((smpi.num_local_nodes) * shmem_coll_max_msg_size) #define SHMEM_COLL_OFFSET (shmem_coll_blocks * SHMEM_COLL_BLOCK_SIZE) -#define SHMEM_COLL_BUF_SIZE (shmem_coll_blocks * SHMEM_COLL_BLOCK_SIZE + SHMEM_BCAST_SIZE) +#define FLAG_TYPES 6 +#define SHMEM_COLL_BUF_SIZE (shmem_coll_blocks * SHMEM_COLL_BLOCK_SIZE + sizeof(shmem_coll_region) \ + + FLAG_TYPES*shmem_coll_num_comm*smpi.num_local_nodes) + /* the shared area itself */ typedef struct { - volatile int child_complete_bcast[SHMEM_COLL_NUM_COMM][SHMEM_COLL_NUM_PROCS]; /* use for initial synchro */ - volatile int root_complete_bcast[SHMEM_COLL_NUM_COMM][SHMEM_COLL_NUM_PROCS]; - volatile int child_complete_gather[SHMEM_COLL_NUM_COMM][SHMEM_COLL_NUM_PROCS]; /* use for initial synchro */ - volatile int root_complete_gather[SHMEM_COLL_NUM_COMM][SHMEM_COLL_NUM_PROCS]; - volatile int barrier_gather[SHMEM_COLL_NUM_COMM][SHMEM_COLL_NUM_PROCS]; - volatile int barrier_bcast[SHMEM_COLL_NUM_COMM][SHMEM_COLL_NUM_PROCS]; volatile int shmem_comm_count; pthread_spinlock_t shmem_coll_lock; - /* the collective buffer */ - char shmem_coll_buf; }shmem_coll_region; @@ -113,6 +108,13 @@ struct shmem_coll_mgmt{ void *mmap_ptr; int fd; + int **child_complete_bcast; + int **root_complete_bcast; + int **child_complete_gather; + int **root_complete_gather; + int **barrier_gather; + int **barrier_bcast; + char* shmem_coll_buf; }; #define SHMEM_BCAST_FLAGS 1024 Modified: mvapich/trunk/mpid/ch_gen2_ud/shmem_coll.c =================================================================== --- mvapich/trunk/mpid/ch_gen2_ud/shmem_coll.c 2008-01-07 01:31:35 UTC (rev 1808) +++ mvapich/trunk/mpid/ch_gen2_ud/shmem_coll.c 2008-01-07 01:55:58 UTC (rev 1809) @@ -57,7 +57,7 @@ extern int shmem_coll_blocks; extern int shmem_coll_max_msg_size; - +int shmem_coll_num_comm; #undef FUNCNAME #define FUNCNAME MPIDI_CH3I_SHMEM_COLL_Init #undef FCNAME @@ -66,6 +66,8 @@ { int pagesize = getpagesize(); + shmem_coll_num_comm = shmem_coll_blocks; + /* add pid for unique file name */ shmem_file = (char *) malloc(sizeof(char) * (SHMEM_COLL_HOSTNAME_LEN + 26 + PID_CHAR_LEN)); @@ -152,18 +154,56 @@ } shmem_coll = (shmem_coll_region *) shmem_coll_obj.mmap_ptr; + shmem_coll_obj.child_complete_bcast = malloc(sizeof(int*)*shmem_coll_num_comm); + shmem_coll_obj.root_complete_bcast = malloc(sizeof(int*)*shmem_coll_num_comm); + shmem_coll_obj.child_complete_gather = malloc(sizeof(int*)*shmem_coll_num_comm); + shmem_coll_obj.root_complete_gather = malloc(sizeof(int*)*shmem_coll_num_comm); + shmem_coll_obj.barrier_gather = malloc(sizeof(int*)*shmem_coll_num_comm); + shmem_coll_obj.barrier_bcast = malloc(sizeof(int*)*shmem_coll_num_comm); + + shmem_coll_obj.child_complete_bcast[0] = (int*)((char*)(shmem_coll_obj.mmap_ptr) + sizeof(shmem_coll)); + shmem_coll_obj.root_complete_bcast[0] = + (int*)(shmem_coll_obj.child_complete_bcast[0] + shmem_coll_num_comm*smpi.num_local_nodes); + shmem_coll_obj.child_complete_gather[0] = + (int*)(shmem_coll_obj.child_complete_bcast[0] + 2*shmem_coll_num_comm*smpi.num_local_nodes); + shmem_coll_obj.root_complete_gather[0] = + (int*)(shmem_coll_obj.child_complete_bcast[0] + 3*shmem_coll_num_comm*smpi.num_local_nodes); + shmem_coll_obj.barrier_gather[0] = + (int*)(shmem_coll_obj.child_complete_bcast[0] + 4*shmem_coll_num_comm*smpi.num_local_nodes); + shmem_coll_obj.barrier_bcast[0] = + (int*)(shmem_coll_obj.child_complete_bcast[0] + 5*shmem_coll_num_comm*smpi.num_local_nodes); + + + for (j=1;jchild_complete_bcast[j][i] = 1; + for(j=0; j < shmem_coll_num_comm; j++){ + for (i = 0; i < smpi.num_local_nodes; i++){ + shmem_coll_obj.child_complete_bcast[j][i] = 1; } - for (i = 0; i < SHMEM_COLL_NUM_PROCS; i++){ - shmem_coll->root_complete_gather[j][i] = 1; + for (i = 0; i < smpi.num_local_nodes; i++){ + shmem_coll_obj.root_complete_gather[j][i] = 1; } } pthread_spin_init(&shmem_coll->shmem_coll_lock,0); } + + shmem_coll_obj.shmem_coll_buf = + (char*)(shmem_coll_obj.child_complete_bcast[0] + 6*shmem_coll_num_comm*smpi.num_local_nodes); return MPI_SUCCESS; } @@ -197,30 +237,30 @@ void MPID_SHMEM_COLL_GetShmemBuf(int size, int rank, int shmem_comm_rank, void** output_buf) { int i,myid; - char* shmem_coll_buf = (char*)(&(shmem_coll->shmem_coll_buf)); + char* shmem_coll_buf = shmem_coll_obj.shmem_coll_buf; myid = rank; if (myid == 0){ for (i=1; i < size; i++){ - while (shmem_coll->child_complete_gather[shmem_comm_rank][i] == 0) + while (shmem_coll_obj.child_complete_gather[shmem_comm_rank][i] == 0) { MPID_DeviceCheck(MPID_NOTBLOCKING); }; } /* Set the completion flags back to zero */ for (i=1; i < size; i++){ - shmem_coll->child_complete_gather[shmem_comm_rank][i] = 0; + shmem_coll_obj.child_complete_gather[shmem_comm_rank][i] = 0; } *output_buf = (char*)shmem_coll_buf + shmem_comm_rank*SHMEM_COLL_BLOCK_SIZE; } else{ - while (shmem_coll->root_complete_gather[shmem_comm_rank][myid] == 0) + while (shmem_coll_obj.root_complete_gather[shmem_comm_rank][myid] == 0) { MPID_DeviceCheck(MPID_NOTBLOCKING); }; - shmem_coll->root_complete_gather[shmem_comm_rank][myid] = 0; + shmem_coll_obj.root_complete_gather[shmem_comm_rank][myid] = 0; *output_buf = (char*)shmem_coll_buf + shmem_comm_rank*SHMEM_COLL_BLOCK_SIZE; } } @@ -283,11 +323,11 @@ if (myid == 0){ for (i=1; i < size; i++){ - shmem_coll->root_complete_gather[shmem_comm_rank][i] = 1; + shmem_coll_obj.root_complete_gather[shmem_comm_rank][i] = 1; } } else{ - shmem_coll->child_complete_gather[shmem_comm_rank][myid] = 1; + shmem_coll_obj.child_complete_gather[shmem_comm_rank][myid] = 1; } } @@ -298,17 +338,17 @@ if (rank == 0){ for (i=1; i < size; i++){ - while (shmem_coll->barrier_gather[shmem_comm_rank][i] == 0) + while (shmem_coll_obj.barrier_gather[shmem_comm_rank][i] == 0) { MPID_DeviceCheck(MPID_NOTBLOCKING); } } for (i=1; i < size; i++){ - shmem_coll->barrier_gather[shmem_comm_rank][i] = 0; + shmem_coll_obj.barrier_gather[shmem_comm_rank][i] = 0; } } else{ - shmem_coll->barrier_gather[shmem_comm_rank][myid] = 1; + shmem_coll_obj.barrier_gather[shmem_comm_rank][myid] = 1; } } @@ -319,15 +359,15 @@ if (rank == 0){ for (i=1; i < size; i++){ - shmem_coll->barrier_bcast[shmem_comm_rank][i] = 1; + shmem_coll_obj.barrier_bcast[shmem_comm_rank][i] = 1; } } else{ - while (shmem_coll->barrier_bcast[shmem_comm_rank][myid] == 0) + while (shmem_coll_obj.barrier_bcast[shmem_comm_rank][myid] == 0) { MPID_DeviceCheck(MPID_NOTBLOCKING); } - shmem_coll->barrier_bcast[shmem_comm_rank][myid] = 0; + shmem_coll_obj.barrier_bcast[shmem_comm_rank][myid] = 0; } MPID_DeviceCheck(MPID_NOTBLOCKING); } From mamidala at mvapich.cse.ohio-state.edu Sun Jan 6 21:05:47 2008 From: mamidala at mvapich.cse.ohio-state.edu (mamidala@mvapich.cse.ohio-state.edu) Date: Sun Jan 6 21:05:53 2008 Subject: [mvapich-commit] r1810 - mvapich/trunk/mpid/ch_gen2 Message-ID: <200801070205.m0725lfl002699@mvapich.cse.ohio-state.edu> Author: mamidala Date: 2008-01-06 21:05:46 -0500 (Sun, 06 Jan 2008) New Revision: 1810 Modified: mvapich/trunk/mpid/ch_gen2/shmem_coll.c Log: checking in minor fix for macro defn. Modified: mvapich/trunk/mpid/ch_gen2/shmem_coll.c =================================================================== --- mvapich/trunk/mpid/ch_gen2/shmem_coll.c 2008-01-07 01:55:58 UTC (rev 1809) +++ mvapich/trunk/mpid/ch_gen2/shmem_coll.c 2008-01-07 02:05:46 UTC (rev 1810) @@ -196,10 +196,10 @@ if (smpi.my_local_id == 0){ memset(shmem_coll_obj.mmap_ptr, 0, shmem_coll_size); for(j=0; j < shmem_coll_num_comm; j++){ - for (i = 0; i < SHMEM_COLL_NUM_PROCS; i++){ + for (i = 0; i < smpi.num_local_nodes; i++){ shmem_coll_obj.child_complete_bcast[j][i] = 1; } - for (i = 0; i < SHMEM_COLL_NUM_PROCS; i++){ + for (i = 0; i < smpi.num_local_nodes; i++){ shmem_coll_obj.root_complete_gather[j][i] = 1; } } From mamidala at mvapich.cse.ohio-state.edu Sun Jan 6 21:06:56 2008 From: mamidala at mvapich.cse.ohio-state.edu (mamidala@mvapich.cse.ohio-state.edu) Date: Sun Jan 6 21:07:07 2008 Subject: [mvapich-commit] r1811 - mvapich/trunk/mpid/ch_smp Message-ID: <200801070206.m0726uIO002709@mvapich.cse.ohio-state.edu> Author: mamidala Date: 2008-01-06 21:06:56 -0500 (Sun, 06 Jan 2008) New Revision: 1811 Modified: mvapich/trunk/mpid/ch_smp/coll_shmem.h mvapich/trunk/mpid/ch_smp/shmem_coll.c Log: checking in changes from ch_gen2 device (shmem collectives) Modified: mvapich/trunk/mpid/ch_smp/coll_shmem.h =================================================================== --- mvapich/trunk/mpid/ch_smp/coll_shmem.h 2008-01-07 02:05:46 UTC (rev 1810) +++ mvapich/trunk/mpid/ch_smp/coll_shmem.h 2008-01-07 02:06:56 UTC (rev 1811) @@ -91,21 +91,17 @@ #define SHMEM_BCAST_SIZE (1<<23) #define SHMEM_COLL_BLOCK_SIZE ((smpi.num_local_nodes) * shmem_coll_max_msg_size) #define SHMEM_COLL_OFFSET (shmem_coll_blocks * SHMEM_COLL_BLOCK_SIZE) -#define SHMEM_COLL_BUF_SIZE (shmem_coll_blocks * SHMEM_COLL_BLOCK_SIZE + SHMEM_BCAST_SIZE) +#define FLAG_TYPES 6 +#define SHMEM_COLL_BUF_SIZE (shmem_coll_blocks * SHMEM_COLL_BLOCK_SIZE + sizeof(shmem_coll_region) \ + + FLAG_TYPES*shmem_coll_num_comm*smpi.num_local_nodes) /* the shared area itself */ typedef struct { - volatile int child_complete_bcast[SHMEM_COLL_NUM_COMM][SHMEM_COLL_NUM_PROCS]; /* use for initial synchro */ - volatile int root_complete_bcast[SHMEM_COLL_NUM_COMM][SHMEM_COLL_NUM_PROCS]; - volatile int child_complete_gather[SHMEM_COLL_NUM_COMM][SHMEM_COLL_NUM_PROCS]; /* use for initial synchro */ - volatile int root_complete_gather[SHMEM_COLL_NUM_COMM][SHMEM_COLL_NUM_PROCS]; - volatile int barrier_gather[SHMEM_COLL_NUM_COMM][SHMEM_COLL_NUM_PROCS]; - volatile int barrier_bcast[SHMEM_COLL_NUM_COMM][SHMEM_COLL_NUM_PROCS]; + volatile int shmem_comm_count; pthread_spinlock_t shmem_coll_lock; /* the collective buffer */ - char shmem_coll_buf; }shmem_coll_region; @@ -113,6 +109,13 @@ struct shmem_coll_mgmt{ void *mmap_ptr; int fd; + int **child_complete_bcast; + int **root_complete_bcast; + int **child_complete_gather; + int **root_complete_gather; + int **barrier_gather; + int **barrier_bcast; + char* shmem_coll_buf; }; #define SHMEM_BCAST_FLAGS 1024 Modified: mvapich/trunk/mpid/ch_smp/shmem_coll.c =================================================================== --- mvapich/trunk/mpid/ch_smp/shmem_coll.c 2008-01-07 02:05:46 UTC (rev 1810) +++ mvapich/trunk/mpid/ch_smp/shmem_coll.c 2008-01-07 02:06:56 UTC (rev 1811) @@ -58,7 +58,7 @@ extern int shmem_coll_blocks; extern int shmem_coll_max_msg_size; - +int shmem_coll_num_comm; #undef FUNCNAME #define FUNCNAME MPIDI_CH3I_SHMEM_COLL_Init #undef FCNAME @@ -70,6 +70,7 @@ volatile char tmpchar; #endif + shmem_coll_num_comm = shmem_coll_blocks; /* add pid for unique file name */ shmem_file = (char *) malloc(sizeof(char) * (SHMEM_COLL_HOSTNAME_LEN + 26 + PID_CHAR_LEN)); @@ -156,20 +157,59 @@ return -1; } shmem_coll = (shmem_coll_region *) shmem_coll_obj.mmap_ptr; + + + shmem_coll_obj.child_complete_bcast = malloc(sizeof(int*)*shmem_coll_num_comm); + shmem_coll_obj.root_complete_bcast = malloc(sizeof(int*)*shmem_coll_num_comm); + shmem_coll_obj.child_complete_gather = malloc(sizeof(int*)*shmem_coll_num_comm); + shmem_coll_obj.root_complete_gather = malloc(sizeof(int*)*shmem_coll_num_comm); + shmem_coll_obj.barrier_gather = malloc(sizeof(int*)*shmem_coll_num_comm); + shmem_coll_obj.barrier_bcast = malloc(sizeof(int*)*shmem_coll_num_comm); + + shmem_coll_obj.child_complete_bcast[0] = (int*)((char*)(shmem_coll_obj.mmap_ptr) + sizeof(shmem_coll)); + shmem_coll_obj.root_complete_bcast[0] = + (int*)(shmem_coll_obj.child_complete_bcast[0] + shmem_coll_num_comm*smpi.num_local_nodes); + shmem_coll_obj.child_complete_gather[0] = + (int*)(shmem_coll_obj.child_complete_bcast[0] + 2*shmem_coll_num_comm*smpi.num_local_nodes); + shmem_coll_obj.root_complete_gather[0] = + (int*)(shmem_coll_obj.child_complete_bcast[0] + 3*shmem_coll_num_comm*smpi.num_local_nodes); + shmem_coll_obj.barrier_gather[0] = + (int*)(shmem_coll_obj.child_complete_bcast[0] + 4*shmem_coll_num_comm*smpi.num_local_nodes); + shmem_coll_obj.barrier_bcast[0] = + (int*)(shmem_coll_obj.child_complete_bcast[0] + 5*shmem_coll_num_comm*smpi.num_local_nodes); + + for (j=1;jchild_complete_bcast[j][i] = 1; + for(j=0; j < shmem_coll_num_comm; j++){ + for (i = 0; i < smpi.num_local_nodes; i++){ + shmem_coll_obj.child_complete_bcast[j][i] = 1; } - for (i = 0; i < SHMEM_COLL_NUM_PROCS; i++){ - shmem_coll->root_complete_gather[j][i] = 1; + for (i = 0; i < smpi.num_local_nodes; i++){ + shmem_coll_obj.root_complete_gather[j][i] = 1; } } pthread_spin_init(&shmem_coll->shmem_coll_lock,0); } + shmem_coll_obj.shmem_coll_buf = + (char*)(shmem_coll_obj.child_complete_bcast[0] + 6*shmem_coll_num_comm*smpi.num_local_nodes); + return MPI_SUCCESS; } @@ -202,30 +242,30 @@ void MPID_SHMEM_COLL_GetShmemBuf(int size, int rank, int shmem_comm_rank, void** output_buf) { int i,myid; - char* shmem_coll_buf = (char*)(&(shmem_coll->shmem_coll_buf)); + char* shmem_coll_buf = shmem_coll_obj.shmem_coll_buf; myid = rank; if (myid == 0){ for (i=1; i < size; i++){ - while (shmem_coll->child_complete_gather[shmem_comm_rank][i] == 0) + while (shmem_coll_obj.child_complete_gather[shmem_comm_rank][i] == 0) { MPID_DeviceCheck(MPID_NOTBLOCKING); }; } /* Set the completion flags back to zero */ for (i=1; i < size; i++){ - shmem_coll->child_complete_gather[shmem_comm_rank][i] = 0; + shmem_coll_obj.child_complete_gather[shmem_comm_rank][i] = 0; } *output_buf = (char*)shmem_coll_buf + shmem_comm_rank*SHMEM_COLL_BLOCK_SIZE; } else{ - while (shmem_coll->root_complete_gather[shmem_comm_rank][myid] == 0) + while (shmem_coll_obj.root_complete_gather[shmem_comm_rank][myid] == 0) { MPID_DeviceCheck(MPID_NOTBLOCKING); }; - shmem_coll->root_complete_gather[shmem_comm_rank][myid] = 0; + shmem_coll_obj.root_complete_gather[shmem_comm_rank][myid] = 0; *output_buf = (char*)shmem_coll_buf + shmem_comm_rank*SHMEM_COLL_BLOCK_SIZE; } } @@ -289,11 +329,11 @@ if (myid == 0){ for (i=1; i < size; i++){ - shmem_coll->root_complete_gather[shmem_comm_rank][i] = 1; + shmem_coll_obj.root_complete_gather[shmem_comm_rank][i] = 1; } } else{ - shmem_coll->child_complete_gather[shmem_comm_rank][myid] = 1; + shmem_coll_obj.child_complete_gather[shmem_comm_rank][myid] = 1; } } @@ -304,17 +344,17 @@ if (rank == 0){ for (i=1; i < size; i++){ - while (shmem_coll->barrier_gather[shmem_comm_rank][i] == 0) + while (shmem_coll_obj.barrier_gather[shmem_comm_rank][i] == 0) { MPID_DeviceCheck(MPID_NOTBLOCKING); } } for (i=1; i < size; i++){ - shmem_coll->barrier_gather[shmem_comm_rank][i] = 0; + shmem_coll_obj.barrier_gather[shmem_comm_rank][i] = 0; } } else{ - shmem_coll->barrier_gather[shmem_comm_rank][myid] = 1; + shmem_coll_obj.barrier_gather[shmem_comm_rank][myid] = 1; } } @@ -325,15 +365,15 @@ if (rank == 0){ for (i=1; i < size; i++){ - shmem_coll->barrier_bcast[shmem_comm_rank][i] = 1; + shmem_coll_obj.barrier_bcast[shmem_comm_rank][i] = 1; } } else{ - while (shmem_coll->barrier_bcast[shmem_comm_rank][myid] == 0) + while (shmem_coll_obj.barrier_bcast[shmem_comm_rank][myid] == 0) { MPID_DeviceCheck(MPID_NOTBLOCKING); } - shmem_coll->barrier_bcast[shmem_comm_rank][myid] = 0; + shmem_coll_obj.barrier_bcast[shmem_comm_rank][myid] = 0; } MPID_DeviceCheck(MPID_NOTBLOCKING); } From mamidala at mvapich.cse.ohio-state.edu Mon Jan 7 15:50:56 2008 From: mamidala at mvapich.cse.ohio-state.edu (mamidala@mvapich.cse.ohio-state.edu) Date: Mon Jan 7 15:51:06 2008 Subject: [mvapich-commit] r1814 - mvapich2/trunk/src/mpi/coll Message-ID: <200801072050.m07Kounl005246@mvapich.cse.ohio-state.edu> Author: mamidala Date: 2008-01-07 15:50:55 -0500 (Mon, 07 Jan 2008) New Revision: 1814 Modified: mvapich2/trunk/src/mpi/coll/reduce.c Log: checking in the MPI_Reduce problem with shmem collectives reported by Christian Modified: mvapich2/trunk/src/mpi/coll/reduce.c =================================================================== --- mvapich2/trunk/src/mpi/coll/reduce.c 2008-01-07 04:39:45 UTC (rev 1813) +++ mvapich2/trunk/src/mpi/coll/reduce.c 2008-01-07 20:50:55 UTC (rev 1814) @@ -729,14 +729,14 @@ MPI_Comm shmem_comm, leader_comm; MPID_Comm *shmem_commptr = 0, *leader_commptr = 0; int local_rank = -1, global_rank = -1, local_size=0, my_rank; - void* local_buf, *tmpbuf; + void* local_buf, *tmpbuf, *tmpbuf1; MPI_Aint true_lb, true_extent, extent; MPI_User_function *uop; int stride = 0, i, is_commutative, size; MPID_Op *op_ptr; MPI_Status status; int leader_root, total_size, shmem_comm_rank; - MPIU_CHKLMEM_DECL(1); + MPIU_CHKLMEM_DECL(2); #ifdef HAVE_CXX_BINDING int is_cxx_uop = 0; #endif @@ -918,6 +918,8 @@ global_rank = leader_commptr->rank; MPIU_CHKLMEM_MALLOC(tmpbuf, void *, count*(MPIR_MAX(extent,true_extent)), mpi_errno, "receive buffer"); tmpbuf = (void *)((char*)tmpbuf - true_lb); + MPIU_CHKLMEM_MALLOC(tmpbuf1, void *, count*(MPIR_MAX(extent,true_extent)), mpi_errno, "receive buffer"); + tmpbuf1 = (void *)((char*)tmpbuf1 - true_lb); MPIR_Nest_incr(); mpi_errno = MPIR_Localcopy(sendbuf, count, datatype, tmpbuf, count, datatype); @@ -953,7 +955,7 @@ leader_root = comm_ptr->leader_rank[leader_of_root]; if (local_size != total_size){ MPIR_Nest_incr(); - mpi_errno = MPIR_Reduce(tmpbuf, recvbuf, count, datatype, + mpi_errno = MPIR_Reduce(tmpbuf, tmpbuf1, count, datatype, op, leader_root, leader_commptr); MPIR_Nest_decr(); } @@ -975,6 +977,13 @@ MPIDI_CH3I_SHMEM_COLL_SetGatherComplete(local_size, local_rank, shmem_comm_rank); } + if ((local_rank == 0) && (root == my_rank)){ + MPIR_Nest_incr(); + mpi_errno = MPIR_Localcopy(tmpbuf1, count, datatype, recvbuf, + count, datatype); + MPIR_Nest_decr(); + goto fn_exit; + } /* Copying data from leader to the root incase leader is * not the root */ @@ -988,7 +997,7 @@ MPIR_REDUCE_TAG, comm ); } else{ - mpi_errno = MPIC_Send( recvbuf, count, datatype, root, + mpi_errno = MPIC_Send( tmpbuf1, count, datatype, root, MPIR_REDUCE_TAG, comm ); } } From curtisbr at mvapich.cse.ohio-state.edu Mon Jan 7 16:36:05 2008 From: curtisbr at mvapich.cse.ohio-state.edu (curtisbr@mvapich.cse.ohio-state.edu) Date: Mon Jan 7 16:36:14 2008 Subject: [mvapich-commit] r1815 - in mvapich2/branches/1.0: . src/mpi/coll src/mpid/osu_ch3/channels/mrail/include src/mpid/osu_ch3/channels/mrail/src/gen2 src/pmi/smpd Message-ID: <200801072136.m07La5ES005326@mvapich.cse.ohio-state.edu> Author: curtisbr Date: 2008-01-07 16:36:04 -0500 (Mon, 07 Jan 2008) New Revision: 1815 Modified: mvapich2/branches/1.0/CHANGELOG mvapich2/branches/1.0/src/mpi/coll/reduce.c mvapich2/branches/1.0/src/mpid/osu_ch3/channels/mrail/include/mpidi_ch3_impl.h mvapich2/branches/1.0/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_channel_manager.c mvapich2/branches/1.0/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_rndv.c mvapich2/branches/1.0/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_send.c mvapich2/branches/1.0/src/mpid/osu_ch3/channels/mrail/src/gen2/vbuf.c mvapich2/branches/1.0/src/pmi/smpd/smpd_pmi.c Log: sync trunk <-> 1.0 Modified: mvapich2/branches/1.0/CHANGELOG =================================================================== --- mvapich2/branches/1.0/CHANGELOG 2008-01-07 20:50:55 UTC (rev 1814) +++ mvapich2/branches/1.0/CHANGELOG 2008-01-07 21:36:04 UTC (rev 1815) @@ -9,7 +9,7 @@ 10/15/2007 * Fixing a bug that miscalculates the receive size in case of complex -datatype is used. +datatype is used. Thanks for Patrice Martinez from Bull for reporting this problem. 10/10/2007 Modified: mvapich2/branches/1.0/src/mpi/coll/reduce.c =================================================================== --- mvapich2/branches/1.0/src/mpi/coll/reduce.c 2008-01-07 20:50:55 UTC (rev 1814) +++ mvapich2/branches/1.0/src/mpi/coll/reduce.c 2008-01-07 21:36:04 UTC (rev 1815) @@ -729,14 +729,14 @@ MPI_Comm shmem_comm, leader_comm; MPID_Comm *shmem_commptr = 0, *leader_commptr = 0; int local_rank = -1, global_rank = -1, local_size=0, my_rank; - void* local_buf, *tmpbuf; + void* local_buf, *tmpbuf, *tmpbuf1; MPI_Aint true_lb, true_extent, extent; MPI_User_function *uop; int stride = 0, i, is_commutative, size; MPID_Op *op_ptr; MPI_Status status; int leader_root, total_size, shmem_comm_rank; - MPIU_CHKLMEM_DECL(1); + MPIU_CHKLMEM_DECL(2); #ifdef HAVE_CXX_BINDING int is_cxx_uop = 0; #endif @@ -918,6 +918,8 @@ global_rank = leader_commptr->rank; MPIU_CHKLMEM_MALLOC(tmpbuf, void *, count*(MPIR_MAX(extent,true_extent)), mpi_errno, "receive buffer"); tmpbuf = (void *)((char*)tmpbuf - true_lb); + MPIU_CHKLMEM_MALLOC(tmpbuf1, void *, count*(MPIR_MAX(extent,true_extent)), mpi_errno, "receive buffer"); + tmpbuf1 = (void *)((char*)tmpbuf1 - true_lb); MPIR_Nest_incr(); mpi_errno = MPIR_Localcopy(sendbuf, count, datatype, tmpbuf, count, datatype); @@ -953,7 +955,7 @@ leader_root = comm_ptr->leader_rank[leader_of_root]; if (local_size != total_size){ MPIR_Nest_incr(); - mpi_errno = MPIR_Reduce(tmpbuf, recvbuf, count, datatype, + mpi_errno = MPIR_Reduce(tmpbuf, tmpbuf1, count, datatype, op, leader_root, leader_commptr); MPIR_Nest_decr(); } @@ -975,6 +977,13 @@ MPIDI_CH3I_SHMEM_COLL_SetGatherComplete(local_size, local_rank, shmem_comm_rank); } + if ((local_rank == 0) && (root == my_rank)){ + MPIR_Nest_incr(); + mpi_errno = MPIR_Localcopy(tmpbuf1, count, datatype, recvbuf, + count, datatype); + MPIR_Nest_decr(); + goto fn_exit; + } /* Copying data from leader to the root incase leader is * not the root */ @@ -988,7 +997,7 @@ MPIR_REDUCE_TAG, comm ); } else{ - mpi_errno = MPIC_Send( recvbuf, count, datatype, root, + mpi_errno = MPIC_Send( tmpbuf1, count, datatype, root, MPIR_REDUCE_TAG, comm ); } } Modified: mvapich2/branches/1.0/src/mpid/osu_ch3/channels/mrail/include/mpidi_ch3_impl.h =================================================================== --- mvapich2/branches/1.0/src/mpid/osu_ch3/channels/mrail/include/mpidi_ch3_impl.h 2008-01-07 20:50:55 UTC (rev 1814) +++ mvapich2/branches/1.0/src/mpid/osu_ch3/channels/mrail/include/mpidi_ch3_impl.h 2008-01-07 21:36:04 UTC (rev 1815) @@ -432,7 +432,7 @@ int MPIDI_CH3I_SMP_writev_rndv_data_cont(MPIDI_VC_t * vc, const MPID_IOV * iov, const int n, int *num_bytes_ptr); - + int MPIDI_CH3I_SMP_writev_rndv_data(MPIDI_VC_t * vc, const MPID_IOV * iov, const int n, int *num_bytes_ptr); Modified: mvapich2/branches/1.0/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_channel_manager.c =================================================================== --- mvapich2/branches/1.0/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_channel_manager.c 2008-01-07 20:50:55 UTC (rev 1814) +++ mvapich2/branches/1.0/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_channel_manager.c 2008-01-07 21:36:04 UTC (rev 1815) @@ -800,7 +800,7 @@ if(MPIDI_CH3I_RDMA_Process.has_apm) { DEBUG_PRINT("Path Migration Failed\n"); } -#endif /* DEBUG */ +#endif /* ifdef DEBUG */ ibv_va_error_abort(GEN_EXIT_ERR, "Got FATAL event %d\n", event.event_type); break; Modified: mvapich2/branches/1.0/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_rndv.c =================================================================== --- mvapich2/branches/1.0/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_rndv.c 2008-01-07 20:50:55 UTC (rev 1814) +++ mvapich2/branches/1.0/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_rndv.c 2008-01-07 21:36:04 UTC (rev 1815) @@ -688,7 +688,6 @@ } /* Get the sorted indices for the given array */ - void get_sorted_index(MPIDI_VC_t *vc, int *b) { int *taken; Modified: mvapich2/branches/1.0/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_send.c =================================================================== --- mvapich2/branches/1.0/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_send.c 2008-01-07 20:50:55 UTC (rev 1814) +++ mvapich2/branches/1.0/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_send.c 2008-01-07 21:36:04 UTC (rev 1815) @@ -713,7 +713,7 @@ DEBUG_PRINT("[eager send]vbuf addr %p, buffer: %p\n", v, v->buffer); *num_bytes_ptr = MRAILI_Fill_start_buffer(v, iov, n_iov); - + #ifdef CKPT /* this won't work properly at the moment... * Modified: mvapich2/branches/1.0/src/mpid/osu_ch3/channels/mrail/src/gen2/vbuf.c =================================================================== --- mvapich2/branches/1.0/src/mpid/osu_ch3/channels/mrail/src/gen2/vbuf.c 2008-01-07 20:50:55 UTC (rev 1814) +++ mvapich2/branches/1.0/src/mpid/osu_ch3/channels/mrail/src/gen2/vbuf.c 2008-01-07 21:36:04 UTC (rev 1815) @@ -283,7 +283,6 @@ v->content_size = 0; v->eager = 0; - if (MPIDI_CH3I_RDMA_Process.has_srq #ifdef CKPT || 1 Modified: mvapich2/branches/1.0/src/pmi/smpd/smpd_pmi.c =================================================================== --- mvapich2/branches/1.0/src/pmi/smpd/smpd_pmi.c 2008-01-07 20:50:55 UTC (rev 1814) +++ mvapich2/branches/1.0/src/pmi/smpd/smpd_pmi.c 2008-01-07 21:36:04 UTC (rev 1815) @@ -98,7 +98,7 @@ fn.PMI_KVS_Iter_next = (int (*)(const char [], char [], int, char [], int))PMIGetProcAddress(hModule, "PMI_KVS_Iter_next"); fn.PMI_Spawn_multiple = (int (*)(int, const char *[], const char **[], const int [], const int [], const PMI_keyval_t *[], int, const PMI_keyval_t [], int []))PMIGetProcAddress(hModule, "PMI_Spawn_multiple"); fn.PMI_Parse_option = (int (*)(int, char *[], int *, PMI_keyval_t **, int *))PMIGetProcAddress(hModule, "PMI_Parse_option"); - fn.PMI_Args_to_keyval = (int (*)(int *, char *((*)[]), PMI_keyval_t **, int *))PMIGetProcAddress(hModule, "PMI_Args_to_keyval"); + fn.PMI_Args_to_keyval = (int (*)(int *, char ***, PMI_keyval_t **, int *))PMIGetProcAddress(hModule, "PMI_Args_to_keyval"); fn.PMI_Free_keyvals = (int (*)(PMI_keyval_t [], int))PMIGetProcAddress(hModule, "PMI_Free_keyvals"); fn.PMI_Publish_name = (int (*)(const char [], const char [] ))PMIGetProcAddress(hModule, "PMI_Publish_name"); fn.PMI_Unpublish_name = (int (*)( const char [] ))PMIGetProcAddress(hModule, "PMI_Unpublish_name"); @@ -329,7 +329,7 @@ return fn.PMI_Parse_option(num_args, args, num_parsed, keyvalp, size); } -int PMI_Args_to_keyval(int *argcp, char *((*argvp)[]), PMI_keyval_t **keyvalp, int *size) +int PMI_Args_to_keyval(int *argcp, char ***argvp, PMI_keyval_t **keyvalp, int *size) { if (fn.PMI_Args_to_keyval == NULL) return PMI_FAIL; From curtisbr at mvapich.cse.ohio-state.edu Mon Jan 7 16:36:31 2008 From: curtisbr at mvapich.cse.ohio-state.edu (curtisbr@mvapich.cse.ohio-state.edu) Date: Mon Jan 7 16:36:39 2008 Subject: [mvapich-commit] r1816 - in mvapich2/trunk: . src/binding/f77 src/mpid/osu_ch3/channels/mrail/src/rdma Message-ID: <200801072136.m07LaVgT005336@mvapich.cse.ohio-state.edu> Author: curtisbr Date: 2008-01-07 16:36:31 -0500 (Mon, 07 Jan 2008) New Revision: 1816 Modified: mvapich2/trunk/make.mvapich2.vapi mvapich2/trunk/src/binding/f77/mpi_fortimpl.h mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_rndvtransfer.c Log: sync trunk <-> 1.0 Modified: mvapich2/trunk/make.mvapich2.vapi =================================================================== --- mvapich2/trunk/make.mvapich2.vapi 2008-01-07 21:36:04 UTC (rev 1815) +++ mvapich2/trunk/make.mvapich2.vapi 2008-01-07 21:36:31 UTC (rev 1816) @@ -116,7 +116,7 @@ export LD_LIBRARY_PATH=$MTHOME_LIB:$LD_LIBRARY_PATH export LIBS=${LIBS:--L${MTHOME_LIB} -lmtl_common -lvapi -lpthread -lmosal -lmpga $SUPPRESS} export FFLAGS=${FFLAGS:--L${MTHOME_LIB}} -export CFLAGS=${CFLAGS:--D${ARCH} -DONE_SIDED -DUSE_INLINE -DRDMA_FAST_PATH -DUSE_HEADER_CACHING -D_SMP_ -D${IO_BUS} -D${LINKS} -DMPIDI_CH3_CHANNEL_RNDV -DMPID_USE_SEQUENCE_NUMBERS -D${VCLUSTER} ${HAVE_MPD_RING} -I${MTHOME}/include $OPT_FLAG $SUPPRESS} +export CFLAGS=${CFLAGS:--D${ARCH} -DONE_SIDED -DUSE_INLINE -DRDMA_FAST_PATH -DUSE_HEADER_CACHING -DLAZY_MEM_UNREGISTER -D_SMP_ -D${IO_BUS} -D${LINKS} -DMPIDI_CH3_CHANNEL_RNDV -DMPID_USE_SEQUENCE_NUMBERS -D${VCLUSTER} ${HAVE_MPD_RING} -I${MTHOME}/include $OPT_FLAG $SUPPRESS} # Prelogue make distclean &>/dev/null Modified: mvapich2/trunk/src/binding/f77/mpi_fortimpl.h =================================================================== --- mvapich2/trunk/src/binding/f77/mpi_fortimpl.h 2008-01-07 21:36:04 UTC (rev 1815) +++ mvapich2/trunk/src/binding/f77/mpi_fortimpl.h 2008-01-07 21:36:31 UTC (rev 1816) @@ -215,6 +215,6 @@ /* A special case to help out when ROMIO is disabled */ #ifndef MPI_MODE_RDONLY #ifndef MPI_File_f2c -#define MPI_File_f2c(a) ((MPI_File*)(a)) +#define MPI_File_f2c(a) (*(MPI_File*)(&(a))) #endif #endif /* MPI_MODE_RDONLY */ Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_rndvtransfer.c =================================================================== --- mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_rndvtransfer.c 2008-01-07 21:36:04 UTC (rev 1815) +++ mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_rndvtransfer.c 2008-01-07 21:36:31 UTC (rev 1816) @@ -22,8 +22,7 @@ #include "pmi.h" #ifdef _SMP_ -static int MPIDI_CH3_SMP_Rendezvous_push(MPIDI_VC_t *, - MPID_Request *); +static int MPIDI_CH3_SMP_Rendezvous_push(MPIDI_VC_t *, MPID_Request *); #endif MPIDI_VC_t *flowlist; From koop at mvapich.cse.ohio-state.edu Tue Jan 8 18:46:46 2008 From: koop at mvapich.cse.ohio-state.edu (koop@mvapich.cse.ohio-state.edu) Date: Tue Jan 8 18:46:52 2008 Subject: [mvapich-commit] r1818 - mvapich/trunk/mpid/ch_gen2 Message-ID: <200801082346.m08NkkbM008439@mvapich.cse.ohio-state.edu> Author: koop Date: 2008-01-08 18:46:44 -0500 (Tue, 08 Jan 2008) New Revision: 1818 Modified: mvapich/trunk/mpid/ch_gen2/viasend.c Log: * Fix potential issue with ASYNC (very unlikely) Modified: mvapich/trunk/mpid/ch_gen2/viasend.c =================================================================== --- mvapich/trunk/mpid/ch_gen2/viasend.c 2008-01-08 17:49:04 UTC (rev 1817) +++ mvapich/trunk/mpid/ch_gen2/viasend.c 2008-01-08 23:46:44 UTC (rev 1818) @@ -230,16 +230,9 @@ shandle->protocol = proto; packet->protocol = proto; - /* prepare descriptor and post */ - vbuf_init_send(v, sizeof(viadev_packet_rendezvous_start)); - - viadev_post_send(c, v); - - /* we can post the vbuf before updating the send handle. - * If we go to a multithreaded implementation, in which - * another thread may process the completed vbuf, we will - * need to update the shandle before posting the send - * to avoid a race condition + /* we need to do this before posting the send since in an extreme + * case for ASYNC we may get a completion for this shandle + * before setting these fields */ shandle->local_address = buf; @@ -249,6 +242,13 @@ shandle->connection = c; shandle->is_complete = 0; + /* prepare descriptor and post */ + vbuf_init_send(v, sizeof(viadev_packet_rendezvous_start)); + + viadev_post_send(c, v); + + + /* fill in when we get the reply: * receive_id, remote_address, memory_handle */ From koop at mvapich.cse.ohio-state.edu Tue Jan 8 18:48:22 2008 From: koop at mvapich.cse.ohio-state.edu (koop@mvapich.cse.ohio-state.edu) Date: Tue Jan 8 18:48:28 2008 Subject: [mvapich-commit] r1819 - mvapich/trunk/mpid/ch_gen2 Message-ID: <200801082348.m08NmM0a008449@mvapich.cse.ohio-state.edu> Author: koop Date: 2008-01-08 18:48:21 -0500 (Tue, 08 Jan 2008) New Revision: 1819 Modified: mvapich/trunk/mpid/ch_gen2/async_progress.c Log: * adding copyright header Modified: mvapich/trunk/mpid/ch_gen2/async_progress.c =================================================================== --- mvapich/trunk/mpid/ch_gen2/async_progress.c 2008-01-08 23:46:44 UTC (rev 1818) +++ mvapich/trunk/mpid/ch_gen2/async_progress.c 2008-01-08 23:48:21 UTC (rev 1819) @@ -1,3 +1,15 @@ +/* Copyright (c) 2002-2007, The Ohio State University. All rights + * reserved. + * + * This file is part of the MVAPICH software package developed by the + * team members of The Ohio State University's Network-Based Computing + * Laboratory (NBCL), headed by Professor Dhabaleswar K. (DK) Panda. + * + * For detailed copyright and licensing information, please refer to the + * copyright file COPYRIGHT_MVAPICH in the top level MPICH directory. + * + */ + #include "mpid_bind.h" #include "ibverbs_header.h" #include "viutil.h" From koop at mvapich.cse.ohio-state.edu Wed Jan 9 15:52:44 2008 From: koop at mvapich.cse.ohio-state.edu (koop@mvapich.cse.ohio-state.edu) Date: Wed Jan 9 15:52:54 2008 Subject: [mvapich-commit] r1824 - mvapich/trunk/mpid/ch_gen2 Message-ID: <200801092052.m09KqiX7011181@mvapich.cse.ohio-state.edu> Author: koop Date: 2008-01-09 15:52:43 -0500 (Wed, 09 Jan 2008) New Revision: 1824 Modified: mvapich/trunk/mpid/ch_gen2/viaparam.c Log: * Increase the default number of allowed outstanding requests for ASYNC Modified: mvapich/trunk/mpid/ch_gen2/viaparam.c =================================================================== --- mvapich/trunk/mpid/ch_gen2/viaparam.c 2008-01-09 20:41:40 UTC (rev 1823) +++ mvapich/trunk/mpid/ch_gen2/viaparam.c 2008-01-09 20:52:43 UTC (rev 1824) @@ -111,7 +111,7 @@ int viadev_async_srq_pool_size = 64; /* The number of RDMA Reads are anyways limited to 4 */ -int viadev_async_sq_size = 8; +int viadev_async_sq_size = 16; /* if the reg cache is turned off, use R3 for messages below this size From kumarra at mvapich.cse.ohio-state.edu Wed Jan 9 16:15:16 2008 From: kumarra at mvapich.cse.ohio-state.edu (kumarra@mvapich.cse.ohio-state.edu) Date: Wed Jan 9 16:15:22 2008 Subject: [mvapich-commit] r1825 - mvapich/trunk/mpid/ch_gen2 Message-ID: <200801092115.m09LFG7S011303@mvapich.cse.ohio-state.edu> Author: kumarra Date: 2008-01-09 16:15:14 -0500 (Wed, 09 Jan 2008) New Revision: 1825 Modified: mvapich/trunk/mpid/ch_gen2/viainit.c Log: solves the low bandwidth of async due to affinity Modified: mvapich/trunk/mpid/ch_gen2/viainit.c =================================================================== --- mvapich/trunk/mpid/ch_gen2/viainit.c 2008-01-09 20:52:43 UTC (rev 1824) +++ mvapich/trunk/mpid/ch_gen2/viainit.c 2008-01-09 21:15:14 UTC (rev 1825) @@ -686,8 +686,6 @@ free(remote_host_info); } - viainit_smpi_init(allhostids); - free(allhostids); /* set default parameters */ viadev_set_default_parameters(viadev.np, viadev.me, is_homogeneous, hca_type); @@ -696,6 +694,9 @@ * environment variables */ viadev_init_parameters(viadev.np, viadev.me); + viainit_smpi_init(allhostids); + free(allhostids); + viadev.num_connections = viadev_use_on_demand ? 0 : viadev.np; viadev.maxtransfersize = viadev_max_rdma_size; From curtisbr at mvapich.cse.ohio-state.edu Thu Jan 10 11:40:43 2008 From: curtisbr at mvapich.cse.ohio-state.edu (curtisbr@mvapich.cse.ohio-state.edu) Date: Thu Jan 10 11:40:51 2008 Subject: [mvapich-commit] r1837 - mvapich2/branches/1.0 Message-ID: <200801101640.m0AGehdU018147@mvapich.cse.ohio-state.edu> Author: curtisbr Date: 2008-01-10 11:40:43 -0500 (Thu, 10 Jan 2008) New Revision: 1837 Modified: mvapich2/branches/1.0/README Log: Update url to user guide. Modified: mvapich2/branches/1.0/README =================================================================== --- mvapich2/branches/1.0/README 2008-01-10 16:38:07 UTC (rev 1836) +++ mvapich2/branches/1.0/README 2008-01-10 16:40:43 UTC (rev 1837) @@ -17,7 +17,7 @@ For details on installation and usage instructions, please refer to Sections 4 and 5 of the MVAPICH2 user guide: -http://nowlab.cse.ohio-state.edu/projects/mpi-iba/download-mvapich2/mvapich2_user_guide.html +http://mvapich.cse.ohio-state.edu/support/user_guide_mvapich2.html A set of micro-benchmarks (for both point-to-point and one sided) are also available under the osu_benchmarks directory. From kumarra at mvapich.cse.ohio-state.edu Fri Jan 11 13:55:37 2008 From: kumarra at mvapich.cse.ohio-state.edu (kumarra@mvapich.cse.ohio-state.edu) Date: Fri Jan 11 13:55:56 2008 Subject: [mvapich-commit] r1839 - mvapich/trunk/mpid/ch_gen2 Message-ID: <200801111855.m0BItbum021296@mvapich.cse.ohio-state.edu> Author: kumarra Date: 2008-01-11 13:55:35 -0500 (Fri, 11 Jan 2008) New Revision: 1839 Modified: mvapich/trunk/mpid/ch_gen2/async_progress.c mvapich/trunk/mpid/ch_gen2/viapriv.h Log: Call Unpacking for derived datatypes before setting the is_complete flag. This is required for async progress Modified: mvapich/trunk/mpid/ch_gen2/async_progress.c =================================================================== --- mvapich/trunk/mpid/ch_gen2/async_progress.c 2008-01-10 21:07:11 UTC (rev 1838) +++ mvapich/trunk/mpid/ch_gen2/async_progress.c 2008-01-11 18:55:35 UTC (rev 1839) @@ -418,8 +418,7 @@ r = (MPIR_RHANDLE *) v->shandle; if (r != NULL) { - //RECV_COMPLETE(r); - r->is_complete = 1; + RECV_COMPLETE(r); } } #ifdef ADAPTIVE_RDMA_FAST_PATH Modified: mvapich/trunk/mpid/ch_gen2/viapriv.h =================================================================== --- mvapich/trunk/mpid/ch_gen2/viapriv.h 2008-01-10 21:07:11 UTC (rev 1838) +++ mvapich/trunk/mpid/ch_gen2/viapriv.h 2008-01-11 18:55:35 UTC (rev 1839) @@ -787,7 +787,6 @@ if(VIADEV_PROTOCOL_ASYNC == viadev_rndv_protocol) { \ ASYNC_THREAD_LOCK; \ } \ - r->is_complete = 1; \ r->s.MPI_ERROR = MPI_SUCCESS; \ r->s.count = r->len; \ if (r->dreg_entry != NULL) { \ @@ -813,6 +812,7 @@ error_abort_all(GEN_EXIT_ERR, "RECV_COMPLETE invalid type\n"); \ } \ } \ + r->is_complete = 1; \ if(VIADEV_PROTOCOL_ASYNC == viadev_rndv_protocol) { \ ASYNC_THREAD_UNLOCK; \ } \ From perkinjo at mvapich.cse.ohio-state.edu Fri Jan 11 16:50:13 2008 From: perkinjo at mvapich.cse.ohio-state.edu (perkinjo@mvapich.cse.ohio-state.edu) Date: Fri Jan 11 16:50:21 2008 Subject: [mvapich-commit] r1840 - mvapich/trunk/mpid/ch_gen2/process Message-ID: <200801112150.m0BLoDJl021541@mvapich.cse.ohio-state.edu> Author: perkinjo Date: 2008-01-11 16:50:11 -0500 (Fri, 11 Jan 2008) New Revision: 1840 Modified: mvapich/trunk/mpid/ch_gen2/process/pmgr_collective_common.h Log: Fix for ia64 network byte order bug. Modified: mvapich/trunk/mpid/ch_gen2/process/pmgr_collective_common.h =================================================================== --- mvapich/trunk/mpid/ch_gen2/process/pmgr_collective_common.h 2008-01-11 18:55:35 UTC (rev 1839) +++ mvapich/trunk/mpid/ch_gen2/process/pmgr_collective_common.h 2008-01-11 21:50:11 UTC (rev 1840) @@ -20,13 +20,6 @@ #ifndef _PMGR_COLLECTIVE_COMMON_H #define _PMGR_COLLECTIVE_COMMON_H -#if defined(_IA64_) -#undef htons -#undef ntohs -#define htons(__bsx) ((((__bsx) >> 8) & 0xff) | (((__bsx) & 0xff) << 8)) -#define ntohs(__bsx) ((((__bsx) >> 8) & 0xff) | (((__bsx) & 0xff) << 8)) -#endif - /* PMGR_VERSION for pmgr_collective is PMGR_COLLECTIVE (== 8) */ #define PMGR_COLLECTIVE 8 From perkinjo at mvapich.cse.ohio-state.edu Fri Jan 11 16:53:25 2008 From: perkinjo at mvapich.cse.ohio-state.edu (perkinjo@mvapich.cse.ohio-state.edu) Date: Fri Jan 11 16:53:32 2008 Subject: [mvapich-commit] r1842 - in mvapich/trunk/mpid/ch_gen2: . process Message-ID: <200801112153.m0BLrPCj021562@mvapich.cse.ohio-state.edu> Author: perkinjo Date: 2008-01-11 16:53:25 -0500 (Fri, 11 Jan 2008) New Revision: 1842 Modified: mvapich/trunk/mpid/ch_gen2/process/pmgr_collective_client.c mvapich/trunk/mpid/ch_gen2/viainit.c mvapich/trunk/mpid/ch_gen2/viaparam.c Log: Check for NULL HCA Context. Also, close pmgr collectives and exit from pmgr_abort when necessary. Modified: mvapich/trunk/mpid/ch_gen2/process/pmgr_collective_client.c =================================================================== --- mvapich/trunk/mpid/ch_gen2/process/pmgr_collective_client.c 2008-01-11 21:51:48 UTC (rev 1841) +++ mvapich/trunk/mpid/ch_gen2/process/pmgr_collective_client.c 2008-01-11 21:53:25 UTC (rev 1842) @@ -71,6 +71,7 @@ int mpirun_port; int mpirun_socket; int pmgr_me, pmgr_nprocs, pmgr_id; +static int pmgr_close_on_abort = 0; /* tree data structures */ int pmgr_parent; /* MPI rank of parent */ @@ -573,6 +574,8 @@ /* open up socket tree, if enabled */ if (mpirun_use_trees) pmgr_open_tree(); + pmgr_close_on_abort = 1; + return PMGR_SUCCESS; } @@ -787,5 +790,11 @@ close(s); - return PMGR_SUCCESS; + if(pmgr_close_on_abort) { + pmgr_close(); + } + + exit(EXIT_FAILURE); + + return !PMGR_SUCCESS; } Modified: mvapich/trunk/mpid/ch_gen2/viainit.c =================================================================== --- mvapich/trunk/mpid/ch_gen2/viainit.c 2008-01-11 21:51:48 UTC (rev 1841) +++ mvapich/trunk/mpid/ch_gen2/viainit.c 2008-01-11 21:53:25 UTC (rev 1842) @@ -138,8 +138,8 @@ static void check_attrs(void) { if(viadev.port_attr.active_mtu < viadev_default_mtu) { - fprintf(stderr, - "Active MTU is %d, VIADEV_DEFAULT_MTU set to %d\n. See User Guide", + fprintf(stderr, "Active MTU is %d, VIADEV_DEFAULT_MTU set to %d\n. " + "See User Guide\n", viadev.port_attr.active_mtu, viadev_default_mtu); } @@ -180,7 +180,6 @@ "Max VIADEV_CQ_SIZE is %d, set to %d\n", viadev.dev_attr.max_cqe, (int) viadev_cq_size); } - } static void open_ib_port(void) @@ -661,6 +660,10 @@ open_ib_port(); hca_type = get_hca_type(viadev.nic, viadev.context); + if(hca_type == HCA_ERROR) { + error_abort_all(GEN_EXIT_ERR, "HCA Error!\n"); + } + { typedef struct { int host_id; Modified: mvapich/trunk/mpid/ch_gen2/viaparam.c =================================================================== --- mvapich/trunk/mpid/ch_gen2/viaparam.c 2008-01-11 21:51:48 UTC (rev 1841) +++ mvapich/trunk/mpid/ch_gen2/viaparam.c 2008-01-11 21:53:25 UTC (rev 1842) @@ -1082,6 +1082,10 @@ memset(&dev_attr, 0, sizeof(struct ibv_device_attr)); + if(!ctx) { + return HCA_ERROR; + } + ret = ibv_query_device(ctx, &dev_attr); if(ret) { From perkinjo at mvapich.cse.ohio-state.edu Fri Jan 11 16:51:48 2008 From: perkinjo at mvapich.cse.ohio-state.edu (perkinjo@mvapich.cse.ohio-state.edu) Date: Fri Jan 11 16:57:24 2008 Subject: [mvapich-commit] r1841 - mvapich/trunk/mpid/ch_gen2/process Message-ID: <200801112151.m0BLpmmi021551@mvapich.cse.ohio-state.edu> Author: perkinjo Date: 2008-01-11 16:51:48 -0500 (Fri, 11 Jan 2008) New Revision: 1841 Modified: mvapich/trunk/mpid/ch_gen2/process/mpirun_rsh.c Log: -show and -xterm no longer fall back to the legacy startup. Also added a space for abort error messages. Modified: mvapich/trunk/mpid/ch_gen2/process/mpirun_rsh.c =================================================================== --- mvapich/trunk/mpid/ch_gen2/process/mpirun_rsh.c 2008-01-11 21:50:11 UTC (rev 1840) +++ mvapich/trunk/mpid/ch_gen2/process/mpirun_rsh.c 2008-01-11 21:51:48 UTC (rev 1841) @@ -229,7 +229,6 @@ break; case 2: xterm_on = 1; - legacy_startup = 1; break; case 3: hostfile_on = 1; @@ -246,7 +245,6 @@ break; case 5: show_on = 1; - legacy_startup = 1; break; case 6: use_rsh = 1; @@ -491,6 +489,9 @@ else { if(pglist && !legacy_startup) { spawn_fast(argc, argv, totalview_cmd, env); + + if(show_on) exit(EXIT_SUCCESS); + mpispawn_checkin(s, (struct sockaddr *)&sockaddr, sockaddr_len); } @@ -760,7 +761,7 @@ else { char wfe_abort_message[wfe_abort_msglen]; - fprintf(stderr, "Abort signaled from %s (rank %d):", + fprintf(stderr, "Abort signaled from %s (rank %d): ", plist[wfe_abort_rank].hostname, wfe_abort_rank); if(!read_socket(wfe_socket, &wfe_abort_message, wfe_abort_msglen)) { @@ -1799,8 +1800,10 @@ } for(i = 0; i < pglist->npgs; i++) { + size_t arg_offset = 0; + if(!(pglist->index[i]->pid = fork())) { - const char* argv[5]; + const char* argv[6]; char *command; tmp = mkstr("%s MPISPAWN_ID=%d", mpispawn_env, i); @@ -1879,30 +1882,44 @@ } } - command = mkstr("cd %s; %s %s %s %s/mpispawn", wd, ENV_CMD, - mpispawn_env, env, binary_dirname); + if(use_rsh) { + argv[0] = RSH_CMD; + } - if(!command) { - fprintf(stderr, - "Could not allocate string for remote command!\n"); - exit(EXIT_FAILURE); + else { + argv[arg_offset] = SSH_CMD; + argv[++arg_offset] = SSH_ARG; } - if(use_rsh) { - argv[0] = RSH_CMD; - argv[1] = pglist->index[i]->hostname; - argv[2] = command; - argv[3] = NULL; + if(xterm_on) { + argv[++arg_offset] = "-X"; + command = mkstr("cd %s; %s %s %s %s -e %s/mpispawn", wd, + ENV_CMD, mpispawn_env, env, XTERM, binary_dirname); } else { - argv[0] = SSH_CMD; - argv[1] = SSH_ARG; - argv[2] = pglist->index[i]->hostname; - argv[3] = command; - argv[4] = NULL; + command = mkstr("cd %s; %s %s %s %s/mpispawn", wd, ENV_CMD, + mpispawn_env, env, binary_dirname); } + if(!command) { + fprintf(stderr, "Couldn't allocate string for remote command!\n"); + exit(EXIT_FAILURE); + } + + argv[arg_offset + 1] = pglist->index[i]->hostname; + argv[arg_offset + 2] = command; + argv[arg_offset + 3] = NULL; + + if(show_on) { + size_t arg = 0; + fprintf(stdout, "\n"); + while(argv[arg] != NULL) fprintf(stdout, "%s ", argv[arg++]); + fprintf(stdout, "\n"); + + exit(EXIT_SUCCESS); + } + execv(argv[0], (char* const*) argv); perror("execv"); From koop at mvapich.cse.ohio-state.edu Mon Jan 14 08:46:27 2008 From: koop at mvapich.cse.ohio-state.edu (koop@mvapich.cse.ohio-state.edu) Date: Mon Jan 14 08:46:34 2008 Subject: [mvapich-commit] r1843 - mvapich/trunk Message-ID: <200801141346.m0EDkRJf005031@mvapich.cse.ohio-state.edu> Author: koop Date: 2008-01-14 08:46:25 -0500 (Mon, 14 Jan 2008) New Revision: 1843 Modified: mvapich/trunk/mvapich.conf Log: * Fixing parameter name Modified: mvapich/trunk/mvapich.conf =================================================================== --- mvapich/trunk/mvapich.conf 2008-01-11 21:53:25 UTC (rev 1842) +++ mvapich/trunk/mvapich.conf 2008-01-14 13:46:25 UTC (rev 1843) @@ -234,7 +234,7 @@ # VIADEV_VBUF_TOTAL_SIZE. This has no effect if message coalescing is # turned off. #----------------------------------------------------------------------------------------- -# VIADEV_COALESCE_SAME=0 +# VIADEV_USE_COALESCE_SAME=0 # # Default value: 0 # If VIADEV_USE_COALESCE is enabled, this flag will enable coalescing From perkinjo at mvapich.cse.ohio-state.edu Mon Jan 14 16:13:16 2008 From: perkinjo at mvapich.cse.ohio-state.edu (perkinjo@mvapich.cse.ohio-state.edu) Date: Mon Jan 14 16:13:24 2008 Subject: [mvapich-commit] r1847 - mvapich2/tags Message-ID: <200801142113.m0ELDGC0005836@mvapich.cse.ohio-state.edu> Author: perkinjo Date: 2008-01-14 16:13:14 -0500 (Mon, 14 Jan 2008) New Revision: 1847 Added: mvapich2/tags/1.0.1/ Log: Creating tag for 1.0.1 release. Copied: mvapich2/tags/1.0.1 (from rev 1624, mvapich2/branches/1.0) From curtisbr at mvapich.cse.ohio-state.edu Wed Jan 16 14:22:13 2008 From: curtisbr at mvapich.cse.ohio-state.edu (curtisbr@mvapich.cse.ohio-state.edu) Date: Wed Jan 16 14:22:22 2008 Subject: [mvapich-commit] r1863 - mvapich2/trunk Message-ID: <200801161922.m0GJMDG0011720@mvapich.cse.ohio-state.edu> Author: curtisbr Date: 2008-01-16 14:22:10 -0500 (Wed, 16 Jan 2008) New Revision: 1863 Modified: mvapich2/trunk/README Log: Update url of user guide. Modified: mvapich2/trunk/README =================================================================== --- mvapich2/trunk/README 2008-01-16 17:06:55 UTC (rev 1862) +++ mvapich2/trunk/README 2008-01-16 19:22:10 UTC (rev 1863) @@ -17,7 +17,7 @@ For details on installation and usage instructions, please refer to Sections 4 and 5 of the MVAPICH2 user guide: -http://nowlab.cse.ohio-state.edu/projects/mpi-iba/download-mvapich2/mvapich2_user_guide.html +http://mvapich.cse.ohio-state.edu/support/user_guide_mvapich2.html A set of micro-benchmarks (for both point-to-point and one sided) are also available under the osu_benchmarks directory. From chail at mvapich.cse.ohio-state.edu Wed Jan 16 18:33:25 2008 From: chail at mvapich.cse.ohio-state.edu (chail@mvapich.cse.ohio-state.edu) Date: Wed Jan 16 18:33:35 2008 Subject: [mvapich-commit] r1868 - in mvapich/trunk: . mpid/ch_gen2 mpid/ch_smp Message-ID: <200801162333.m0GNXPEe012179@mvapich.cse.ohio-state.edu> Author: chail Date: 2008-01-16 18:33:24 -0500 (Wed, 16 Jan 2008) New Revision: 1868 Modified: mvapich/trunk/make.mvapich.def mvapich/trunk/make.mvapich.def.smp mvapich/trunk/make.mvapich.gen2 mvapich/trunk/make.mvapich.smp mvapich/trunk/mpid/ch_gen2/mpid_smpi.h mvapich/trunk/mpid/ch_smp/mpid_smpi.h Log: Add auto-detection for AMD Barcelona architecture and apply optimized parameters for it. Modified: mvapich/trunk/make.mvapich.def =================================================================== --- mvapich/trunk/make.mvapich.def 2008-01-16 23:16:09 UTC (rev 1867) +++ mvapich/trunk/make.mvapich.def 2008-01-16 23:33:24 UTC (rev 1868) @@ -23,6 +23,9 @@ ARCH=_EM64T_ elif [ -z "`grep 'model name' /dev/null Modified: mvapich/trunk/mpid/ch_gen2/mpid_smpi.h =================================================================== --- mvapich/trunk/mpid/ch_gen2/mpid_smpi.h 2008-01-16 23:16:09 UTC (rev 1867) +++ mvapich/trunk/mpid/ch_gen2/mpid_smpi.h 2008-01-16 23:33:24 UTC (rev 1868) @@ -44,6 +44,12 @@ #define SMP_BATCH_SIZE (8) #define SMP_NUM_SEND_BUFFER (128) #define SMP_SEND_BUF_SIZE (8192) +#elif defined(_X86_64_) && defined(_AMD_QUAD_CORE_) + #define SMP_EAGERSIZE (32) /* 32 Kbytes */ + #define SMPI_LENGTH_QUEUE (128) + #define SMP_BATCH_SIZE (32) + #define SMP_NUM_SEND_BUFFER (128) + #define SMP_SEND_BUF_SIZE (8192) #elif defined(_X86_64_) #define SMP_EAGERSIZE (8) /* 8 Kbytes */ #define SMPI_LENGTH_QUEUE (32) Modified: mvapich/trunk/mpid/ch_smp/mpid_smpi.h =================================================================== --- mvapich/trunk/mpid/ch_smp/mpid_smpi.h 2008-01-16 23:16:09 UTC (rev 1867) +++ mvapich/trunk/mpid/ch_smp/mpid_smpi.h 2008-01-16 23:33:24 UTC (rev 1868) @@ -43,6 +43,12 @@ #define SMP_BATCH_SIZE (8) #define SMP_NUM_SEND_BUFFER (128) #define SMP_SEND_BUF_SIZE (8192) +#elif defined(_X86_64_) && defined(_AMD_QUAD_CORE_) + #define SMP_EAGERSIZE (32) /* 32 Kbytes */ + #define SMPI_LENGTH_QUEUE (128) + #define SMP_BATCH_SIZE (32) + #define SMP_NUM_SEND_BUFFER (128) + #define SMP_SEND_BUF_SIZE (8192) #elif defined(_X86_64_) #define SMP_EAGERSIZE (8) /* 8 Kbytes */ #define SMPI_LENGTH_QUEUE (32) From curtisbr at cse.ohio-state.edu Wed Jan 16 18:44:15 2008 From: curtisbr at cse.ohio-state.edu (Brian Curtis) Date: Wed Jan 16 18:44:23 2008 Subject: [mvapich-commit] r1868 - in mvapich/trunk: . mpid/ch_gen2 mpid/ch_smp In-Reply-To: <200801162333.m0GNXPEe012179@mvapich.cse.ohio-state.edu> References: <200801162333.m0GNXPEe012179@mvapich.cse.ohio-state.edu> Message-ID: <478E96CF.6020502@cse.ohio-state.edu> So the compilation must occur on a machine with the same processor as the production machine? I thought you might go with run-time CPU identification. Thinking from the OEM perspective, this is troublesome. Brian chail@mvapich.cse.ohio-state.edu wrote: > Author: chail > Date: 2008-01-16 18:33:24 -0500 (Wed, 16 Jan 2008) > New Revision: 1868 > > Modified: > mvapich/trunk/make.mvapich.def > mvapich/trunk/make.mvapich.def.smp > mvapich/trunk/make.mvapich.gen2 > mvapich/trunk/make.mvapich.smp > mvapich/trunk/mpid/ch_gen2/mpid_smpi.h > mvapich/trunk/mpid/ch_smp/mpid_smpi.h > Log: > Add auto-detection for AMD Barcelona architecture and > apply optimized parameters for it. > > > Modified: mvapich/trunk/make.mvapich.def > =================================================================== > --- mvapich/trunk/make.mvapich.def 2008-01-16 23:16:09 UTC (rev 1867) > +++ mvapich/trunk/make.mvapich.def 2008-01-16 23:33:24 UTC (rev 1868) > @@ -23,6 +23,9 @@ > ARCH=_EM64T_ > elif [ -z "`grep 'model name' ARCH=_X86_64_ > + if [ "`grep 'siblings' + PROCESSOR=_AMD_QUAD_CORE_ > + fi > else > ARCH=unknown > fi > > Modified: mvapich/trunk/make.mvapich.def.smp > =================================================================== > --- mvapich/trunk/make.mvapich.def.smp 2008-01-16 23:16:09 UTC (rev 1867) > +++ mvapich/trunk/make.mvapich.def.smp 2008-01-16 23:33:24 UTC (rev 1868) > @@ -23,6 +23,9 @@ > ARCH=_EM64T_ > elif [ -z "`grep 'model name' ARCH=_X86_64_ > + if [ "`grep 'siblings' + PROCESSOR=_AMD_QUAD_CORE_ > + fi > else > ARCH=unknown > fi > > Modified: mvapich/trunk/make.mvapich.gen2 > =================================================================== > --- mvapich/trunk/make.mvapich.gen2 2008-01-16 23:16:09 UTC (rev 1867) > +++ mvapich/trunk/make.mvapich.gen2 2008-01-16 23:33:24 UTC (rev 1868) > @@ -75,9 +75,15 @@ > # Set this to override automatic optimization setting (-03). > OPT_FLAG=${OPT_FLAG:--O3} > > +if [ -n "$PROCESSOR" ]; then > + PROCESSOR=-D${PROCESSOR} > +else > + PROCESSOR= > +fi > + > export LIBS=${LIBS:--L${IBHOME_LIB} -Wl,-rpath=${IBHOME_LIB} -libverbs -libumad -lpthread} > export FFLAGS=${FFLAGS:--L${IBHOME_LIB}} > -export CFLAGS=${CFLAGS:--D${ARCH} ${PTMALLOC} -DEARLY_SEND_COMPLETION -DMEMORY_SCALE -DVIADEV_RPUT_SUPPORT -D_SMP_ -D_SMP_RNDV_ -DCH_GEN2 -D_GNU_SOURCE ${COMPILER_FLAG} -I${IBHOME}/include $OPT_FLAG} > +export CFLAGS=${CFLAGS:--D${ARCH} ${PROCESSOR