From gaoq at mvapich.cse.ohio-state.edu Wed Aug 1 14:05:10 2007 From: gaoq at mvapich.cse.ohio-state.edu (gaoq@mvapich.cse.ohio-state.edu) Date: Wed Aug 1 14:05:38 2007 Subject: [mvapich-commit] r1450 - mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/include Message-ID: <200708011805.l71I5AdI016468@mvapich.cse.ohio-state.edu> Author: gaoq Date: 2007-08-01 14:05:08 -0400 (Wed, 01 Aug 2007) New Revision: 1450 Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/include/mpidi_ch3_pre.h Log: remove the duplicate volatile Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/include/mpidi_ch3_pre.h =================================================================== --- mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/include/mpidi_ch3_pre.h 2007-08-01 03:57:23 UTC (rev 1449) +++ mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/include/mpidi_ch3_pre.h 2007-08-01 18:05:08 UTC (rev 1450) @@ -44,7 +44,7 @@ #define MPIDI_DEV_IMPLEMENTS_KVS -typedef volatile enum MPIDI_CH3I_VC_state +typedef enum MPIDI_CH3I_VC_state { MPIDI_CH3I_VC_STATE_INVALID, MPIDI_CH3I_VC_STATE_UNCONNECTED, From mellanox at mvapich.cse.ohio-state.edu Sun Aug 5 04:38:34 2007 From: mellanox at mvapich.cse.ohio-state.edu (mellanox@mvapich.cse.ohio-state.edu) Date: Sun Aug 5 04:38:57 2007 Subject: [mvapich-commit] r1451 - mvapich/branches/0.9.9/mpid/ch_gen2/process Message-ID: <200708050838.l758cYHf016012@mvapich.cse.ohio-state.edu> Author: mellanox Date: 2007-08-05 04:38:33 -0400 (Sun, 05 Aug 2007) New Revision: 1451 Modified: mvapich/branches/0.9.9/mpid/ch_gen2/process/mpirun_rsh.c Log: Small fixes for mpirun_rsh - Check that we don't overflow command line size - better handling for accept The patch based on NERSC patch that was part of 0.9.7 mellanox release. Modified: mvapich/branches/0.9.9/mpid/ch_gen2/process/mpirun_rsh.c =================================================================== --- mvapich/branches/0.9.9/mpid/ch_gen2/process/mpirun_rsh.c 2007-08-01 18:05:08 UTC (rev 1450) +++ mvapich/branches/0.9.9/mpid/ch_gen2/process/mpirun_rsh.c 2007-08-05 08:38:33 UTC (rev 1451) @@ -523,7 +523,7 @@ sockaddr_len = sizeof(sockaddr); s1 = accept(s, (struct sockaddr *) &sockaddr, &sockaddr_len); if (s1 < 0) { - if (errno == EINTR) + if ((errno == EINTR) || (errno == EAGAIN)) goto ACCEPT_HID; perror("accept"); cleanup(); @@ -644,7 +644,7 @@ sockaddr_len = sizeof(sockaddr); s1 = accept(s, (struct sockaddr *) &sockaddr, &sockaddr_len); if (s1 < 0) { - if (errno == EINTR) + if ((errno == EINTR) || (errno == EAGAIN)) goto ACCEPT; perror("accept"); cleanup(); @@ -891,7 +891,7 @@ char *device_port_env = NULL; int id = getpid(); - int str_len; + int str_len, len; if (plist[i].device != NULL && strlen(plist[i].device) != 0){ device_port_env = (char * )malloc(BASE_ENV_LEN + strlen(plist[i].device) + 1); sprintf(device_port_env, "VIADEV_DEVICE=%s \0", plist[i].device); @@ -922,6 +922,10 @@ strlen(device_port_env) + 530; } + if ((ld_library_path = getenv( "LD_LIBRARY_PATH" ) ) != NULL ) { + str_len += strlen(ld_library_path); + } + if ((remote_command = malloc(str_len)) == NULL) { fprintf(stderr, "Failed to malloc %d bytes for remote_command\n", str_len); @@ -938,7 +942,7 @@ * this is the remote command we execute whether we were are using * an xterm or using rsh directly */ - if ((ld_library_path = getenv( "LD_LIBRARY_PATH" ) ) != NULL ) { + if (ld_library_path != NULL ) { sprintf(remote_command, "cd %s; %s LD_LIBRARY_PATH=%s:%s " "MPIRUN_MPD=0 MPIRUN_HOST=%s MPIRUN_PORT=%d " "MPIRUN_RANK=%d MPIRUN_NPROCS=%d MPIRUN_ID=%d %s %s %s", @@ -954,11 +958,16 @@ } if(use_totalview) { - sprintf(remote_command, "%s MPIRUN_PROCESSES='%s' %s ", remote_command, mpirun_processes, command_name); + len = sprintf(remote_command, "%s MPIRUN_PROCESSES='%s' %s ", remote_command, mpirun_processes, command_name); } else { - sprintf(remote_command, "%s NOT_USE_TOTALVIEW=1 %s ", remote_command, command_name); + len = sprintf(remote_command, "%s NOT_USE_TOTALVIEW=1 %s ", remote_command, command_name); } + if (len > str_len) { + fprintf(stderr, "Internal error - overflowed remote_command\n"); + exit(1); + } + if (xterm_on) { sprintf(xterm_command, "%s; echo process exited", remote_command); sprintf(xterm_title, "\"mpirun process %d of %d\"", i, nprocs); @@ -1022,7 +1031,15 @@ int nread,remote_id,local_id,s1,i,flag; +ACCEPT_WFE: s1 = accept(s,(struct sockaddr *) sockaddr,&sockaddr_len); + if (s1 < 0) { + if ((errno == EINTR) || (errno == EAGAIN)) + goto ACCEPT_WFE; + perror("accept"); + cleanup(); + } + nread = read(s1, &flag, sizeof(flag)); if (nread == -1) { perror("Termination socket read failed"); From perkinjo at mvapich.cse.ohio-state.edu Mon Aug 6 13:42:59 2007 From: perkinjo at mvapich.cse.ohio-state.edu (perkinjo@mvapich.cse.ohio-state.edu) Date: Mon Aug 6 13:43:23 2007 Subject: [mvapich-commit] r1453 - mvapich/trunk/mpid/ch_gen2/process Message-ID: <200708061742.l76Hgxo2019595@mvapich.cse.ohio-state.edu> Author: perkinjo Date: 2007-08-06 13:42:58 -0400 (Mon, 06 Aug 2007) New Revision: 1453 Modified: mvapich/trunk/mpid/ch_gen2/process/mpirun_rsh.c Log: Adding patch from Pasha Modified: mvapich/trunk/mpid/ch_gen2/process/mpirun_rsh.c =================================================================== --- mvapich/trunk/mpid/ch_gen2/process/mpirun_rsh.c 2007-08-06 17:41:36 UTC (rev 1452) +++ mvapich/trunk/mpid/ch_gen2/process/mpirun_rsh.c 2007-08-06 17:42:58 UTC (rev 1453) @@ -575,7 +575,7 @@ alarm_msg = "Timeout during hostid exchange.\n"; if (s1 < 0) { - if (errno == EINTR) + if ((errno == EINTR) || (errno == EAGAIN)) goto ACCEPT_HID; perror("accept"); cleanup(); @@ -907,7 +907,7 @@ char *device_port_env = NULL; int id = getpid(); - int str_len; + int str_len, len; if (plist[i].device != NULL && strlen(plist[i].device) != 0){ device_port_env = (char * )malloc(BASE_ENV_LEN + strlen(plist[i].device) + 1); sprintf(device_port_env, "VIADEV_DEVICE=%s \0", plist[i].device); @@ -938,6 +938,10 @@ strlen(device_port_env) + 530; } + if ((ld_library_path = getenv( "LD_LIBRARY_PATH" ) ) != NULL ) { + str_len += strlen(ld_library_path); + } + if ((remote_command = malloc(str_len)) == NULL) { fprintf(stderr, "Failed to malloc %d bytes for remote_command\n", str_len); @@ -954,7 +958,7 @@ * this is the remote command we execute whether we were are using * an xterm or using rsh directly */ - if ((ld_library_path = getenv( "LD_LIBRARY_PATH" ) ) != NULL ) { + if (ld_library_path != NULL ) { sprintf(remote_command, "cd %s; %s LD_LIBRARY_PATH=%s:%s " "MPIRUN_MPD=0 MPIRUN_HOST=%s MPIRUN_PORT=%d " "MPIRUN_RANK=%d MPIRUN_NPROCS=%d MPIRUN_ID=%d %s %s %s", @@ -970,11 +974,16 @@ } if(use_totalview) { - sprintf(remote_command, "%s MPIRUN_PROCESSES='%s' %s ", remote_command, mpirun_processes, command_name); + len = sprintf(remote_command, "%s MPIRUN_PROCESSES='%s' %s ", remote_command, mpirun_processes, command_name); } else { - sprintf(remote_command, "%s NOT_USE_TOTALVIEW=1 %s ", remote_command, command_name); + len = sprintf(remote_command, "%s NOT_USE_TOTALVIEW=1 %s ", remote_command, command_name); } + if (len > str_len) { + fprintf(stderr, "Internal error - overflowed remote_command\n"); + exit(1); + } + if (xterm_on) { sprintf(xterm_command, "%s; echo process exited", remote_command); sprintf(xterm_title, "\"mpirun process %d of %d\"", i, nprocs); @@ -1038,7 +1047,15 @@ int nread,remote_id,local_id,s1,i,flag; +ACCEPT_WFE: s1 = accept(s,(struct sockaddr *) sockaddr,&sockaddr_len); + if (s1 < 0) { + if ((errno == EINTR) || (errno == EAGAIN)) + goto ACCEPT_WFE; + perror("accept"); + cleanup(); + } + nread = read(s1, &flag, sizeof(flag)); if (nread == -1) { perror("Termination socket read failed"); From mamidala at mvapich.cse.ohio-state.edu Mon Aug 6 15:38:11 2007 From: mamidala at mvapich.cse.ohio-state.edu (mamidala@mvapich.cse.ohio-state.edu) Date: Mon Aug 6 15:38:38 2007 Subject: [mvapich-commit] r1454 - mvapich2/trunk/test/mpi/comm Message-ID: <200708061938.l76JcBwj019762@mvapich.cse.ohio-state.edu> Author: mamidala Date: 2007-08-06 15:38:10 -0400 (Mon, 06 Aug 2007) New Revision: 1454 Modified: mvapich2/trunk/test/mpi/comm/cmfree.c Log: checking in modified parameter NCOMM Modified: mvapich2/trunk/test/mpi/comm/cmfree.c =================================================================== --- mvapich2/trunk/test/mpi/comm/cmfree.c 2007-08-06 17:42:58 UTC (rev 1453) +++ mvapich2/trunk/test/mpi/comm/cmfree.c 2007-08-06 19:38:10 UTC (rev 1454) @@ -12,7 +12,7 @@ static char MTEST_Descrip[] = "Test that communicators have reference count semantics"; #define NELM 128 -#define NCOMM 1020 +#define NCOMM 508 int main( int argc, char *argv[] ) { From huangwei at mvapich.cse.ohio-state.edu Tue Aug 7 21:16:34 2007 From: huangwei at mvapich.cse.ohio-state.edu (huangwei@mvapich.cse.ohio-state.edu) Date: Tue Aug 7 21:16:58 2007 Subject: [mvapich-commit] r1455 - in mvapich2/trunk/src/mpid/osu_ch3: channels/mrail/src/rdma src Message-ID: <200708080116.l781GYCM023315@mvapich.cse.ohio-state.edu> Author: huangwei Date: 2007-08-07 21:16:32 -0400 (Tue, 07 Aug 2007) New Revision: 1455 Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_rndvtransfer.c mvapich2/trunk/src/mpid/osu_ch3/src/ch3u_handle_recv_req.c Log: Rendezvous resources associated with the request should be freed only if R3 protocol is used. Otherwise, it is freed before the actual RDMA in the case that datatype is used. Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_rndvtransfer.c =================================================================== --- mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_rndvtransfer.c 2007-08-06 19:38:10 UTC (rev 1454) +++ mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_rndvtransfer.c 2007-08-08 01:16:32 UTC (rev 1455) @@ -539,6 +539,8 @@ VAPI_PROTOCOL_RPUT == rreq->mrail.protocol); } + rreq->mrail.protocol = VAPI_PROTOCOL_R3; + mpi_errno = MPIDI_CH3I_MRAIL_Fill_Request(rreq, buffer, skipsize, &nb); if (mpi_errno != MPI_SUCCESS) { mpi_errno = Modified: mvapich2/trunk/src/mpid/osu_ch3/src/ch3u_handle_recv_req.c =================================================================== --- mvapich2/trunk/src/mpid/osu_ch3/src/ch3u_handle_recv_req.c 2007-08-06 19:38:10 UTC (rev 1454) +++ mvapich2/trunk/src/mpid/osu_ch3/src/ch3u_handle_recv_req.c 2007-08-08 01:16:32 UTC (rev 1455) @@ -73,10 +73,9 @@ } fn_exit: - if (TRUE == *complete && - (VAPI_PROTOCOL_RPUT == rreq->mrail.protocol || - VAPI_PROTOCOL_R3 == rreq->mrail.protocol)) + if (TRUE == *complete && VAPI_PROTOCOL_R3 == rreq->mrail.protocol) MPIDI_CH3I_MRAILI_RREQ_RNDV_FINISH(rreq); + in_routine = FALSE; MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_HANDLE_RECV_REQ); return mpi_errno; From huangwei at mvapich.cse.ohio-state.edu Wed Aug 8 14:43:34 2007 From: huangwei at mvapich.cse.ohio-state.edu (huangwei@mvapich.cse.ohio-state.edu) Date: Wed Aug 8 14:43:56 2007 Subject: [mvapich-commit] r1456 - mvapich2/trunk/src/mpid/osu_ch3/src Message-ID: <200708081843.l78IhYSs025710@mvapich.cse.ohio-state.edu> Author: huangwei Date: 2007-08-08 14:43:32 -0400 (Wed, 08 Aug 2007) New Revision: 1456 Modified: mvapich2/trunk/src/mpid/osu_ch3/src/ch3u_rma_sync.c Log: We should expect a tag message during Win_complete only if the remote side is not self. Modified: mvapich2/trunk/src/mpid/osu_ch3/src/ch3u_rma_sync.c =================================================================== --- mvapich2/trunk/src/mpid/osu_ch3/src/ch3u_rma_sync.c 2007-08-08 01:16:32 UTC (rev 1455) +++ mvapich2/trunk/src/mpid/osu_ch3/src/ch3u_rma_sync.c 2007-08-08 18:43:32 UTC (rev 1456) @@ -288,7 +288,7 @@ #endif /* End of OSU-MPI2 */ DEBUG_PRINT - (stderr, "rankd %d comm_size %d, rmatarget procs[%d][%d][%d][%d]\n", + ("rankd %d comm_size %d, rmatarget procs[%d][%d][%d][%d]\n", win_ptr->my_id, comm_size, rma_target_proc[0], rma_target_proc[1], rma_target_proc[2], rma_target_proc[3]); @@ -1313,7 +1313,7 @@ } #endif if (dst != rank) { -#ifdef ONE_SIDED +#ifdef ONE_SIDED if (win_ptr->fall_back != 1 #ifdef _SMP_ && (!SMP_INIT || vc->smp.local_nodes == -1) @@ -1483,6 +1483,7 @@ MPIDI_CH3I_RDMA_complete_rma(win_ptr, start_grp_size, ranks_in_win_grp, 0); } + if (win_ptr->rma_ops_list == NULL && need_dummy == 0) { MPIU_Free(ranks_in_win_grp); MPIU_Free(ranks_in_start_grp); @@ -1514,15 +1515,15 @@ #if defined (_SMP_) && defined(ONE_SIDED) if (SMP_INIT) { MPIDI_Comm_get_vc(comm_ptr, src, &vc); - if ((src != rank && vc->smp.local_nodes != -1) - || win_ptr->fall_back == 1) + if (src != rank && + (vc->smp.local_nodes != -1 || win_ptr->fall_back == 1)) { mpi_errno = NMPI_Recv(NULL, 0, MPI_INT, src, 100, win_ptr->comm, MPI_STATUS_IGNORE); if (mpi_errno) { MPIU_ERR_POP(mpi_errno); } } - } else if (src != rank) + } else if (src != rank && win_ptr->fall_back == 1) #else if (src != rank) #endif From narravul at mvapich.cse.ohio-state.edu Wed Aug 8 14:59:36 2007 From: narravul at mvapich.cse.ohio-state.edu (narravul@mvapich.cse.ohio-state.edu) Date: Wed Aug 8 14:59:59 2007 Subject: [mvapich-commit] r1457 - mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2 Message-ID: <200708081859.l78Ixahr025747@mvapich.cse.ohio-state.edu> Author: narravul Date: 2007-08-08 14:59:34 -0400 (Wed, 08 Aug 2007) New Revision: 1457 Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/rdma_cm.c Log: Fix for finalize race condition for RDMA_CM seen in mpich2 coll tests. Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/rdma_cm.c =================================================================== --- mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/rdma_cm.c 2007-08-08 18:43:32 UTC (rev 1456) +++ mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/rdma_cm.c 2007-08-08 18:59:34 UTC (rev 1457) @@ -993,15 +993,17 @@ continue; MPIDI_PG_Get_vc(cached_pg, i, &vc); - for (rail_index = 0; rail_index < rdma_num_rails; rail_index++){ - if (vc->mrail.rails[rail_index].cm_ids != NULL) { - rdma_disconnect(vc->mrail.rails[rail_index].cm_ids); - rdma_destroy_qp(vc->mrail.rails[rail_index].cm_ids); + if (vc->ch.state == MPIDI_CH3I_VC_STATE_IDLE) { + for (rail_index = 0; rail_index < rdma_num_rails; rail_index++){ + if (vc->mrail.rails[rail_index].cm_ids != NULL) { + rdma_disconnect(vc->mrail.rails[rail_index].cm_ids); + rdma_destroy_qp(vc->mrail.rails[rail_index].cm_ids); + } + if (proc->has_one_sided){ + rdma_disconnect(vc->mrail.rails[rail_index].cm_ids_1sc); + rdma_destroy_qp(vc->mrail.rails[rail_index].cm_ids_1sc); + } } - if (proc->has_one_sided){ - rdma_disconnect(vc->mrail.rails[rail_index].cm_ids_1sc); - rdma_destroy_qp(vc->mrail.rails[rail_index].cm_ids_1sc); - } } } @@ -1035,11 +1037,13 @@ continue; MPIDI_PG_Get_vc(cached_pg, i, &vc); - for (rail_index = 0; rail_index < rdma_num_rails; rail_index++){ - if (vc->mrail.rails[rail_index].cm_ids != NULL) - rdma_destroy_id(vc->mrail.rails[rail_index].cm_ids); - if (proc->has_one_sided) - rdma_destroy_id(vc->mrail.rails[rail_index].cm_ids); + if (vc->ch.state == MPIDI_CH3I_VC_STATE_IDLE) { + for (rail_index = 0; rail_index < rdma_num_rails; rail_index++){ + if (vc->mrail.rails[rail_index].cm_ids != NULL) + rdma_destroy_id(vc->mrail.rails[rail_index].cm_ids); + if (proc->has_one_sided) + rdma_destroy_id(vc->mrail.rails[rail_index].cm_ids); + } } } From rowland at mvapich.cse.ohio-state.edu Sun Aug 12 01:53:22 2007 From: rowland at mvapich.cse.ohio-state.edu (rowland@mvapich.cse.ohio-state.edu) Date: Sun Aug 12 01:53:46 2007 Subject: [mvapich-commit] r1458 - mvapich2/trunk/test/mpi/pt2pt Message-ID: <200708120553.l7C5rMWq003248@mvapich.cse.ohio-state.edu> Author: rowland Date: 2007-08-12 01:53:20 -0400 (Sun, 12 Aug 2007) New Revision: 1458 Modified: mvapich2/trunk/test/mpi/pt2pt/testlist Log: Removing pingping test. Modified: mvapich2/trunk/test/mpi/pt2pt/testlist =================================================================== --- mvapich2/trunk/test/mpi/pt2pt/testlist 2007-08-08 18:59:34 UTC (rev 1457) +++ mvapich2/trunk/test/mpi/pt2pt/testlist 2007-08-12 05:53:20 UTC (rev 1458) @@ -3,7 +3,7 @@ sendrecv3 2 sendself 1 eagerdt 2 -pingping 2 +#pingping 2 bottom 2 bsend1 1 bsend2 1 From rowland at mvapich.cse.ohio-state.edu Mon Aug 13 16:30:00 2007 From: rowland at mvapich.cse.ohio-state.edu (rowland@mvapich.cse.ohio-state.edu) Date: Mon Aug 13 16:30:25 2007 Subject: [mvapich-commit] r1459 - mvapich2/trunk/src/mpid/osu_ch3/src Message-ID: <200708132030.l7DKU0fo024926@mvapich.cse.ohio-state.edu> Author: rowland Date: 2007-08-13 16:30:00 -0400 (Mon, 13 Aug 2007) New Revision: 1459 Modified: mvapich2/trunk/src/mpid/osu_ch3/src/ch3u_handle_recv_pkt.c Log: Fixed string termination issue in DEBUG_PRINT() call. Modified: mvapich2/trunk/src/mpid/osu_ch3/src/ch3u_handle_recv_pkt.c =================================================================== --- mvapich2/trunk/src/mpid/osu_ch3/src/ch3u_handle_recv_pkt.c 2007-08-12 05:53:20 UTC (rev 1458) +++ mvapich2/trunk/src/mpid/osu_ch3/src/ch3u_handle_recv_pkt.c 2007-08-13 20:30:00 UTC (rev 1459) @@ -592,8 +592,8 @@ } else { req->mrail.protocol = VAPI_PROTOCOL_EAGER; } - DEBUG_PRINT("put_rndv_t size %d, buf0 size %d, buf1 size %d, - sender_req_id %p, data_sz %d\n", + DEBUG_PRINT("put_rndv_t size %d, buf0 size %d, buf1 size %d, " + "sender_req_id %p, data_sz %d\n", sizeof(MPIDI_CH3_Pkt_put_rndv_t), req->dev.iov[0].MPID_IOV_LEN, req->dev.iov[1].MPID_IOV_LEN, req->dev.sender_req_id, req->dev.recv_data_sz); From rowland at mvapich.cse.ohio-state.edu Mon Aug 13 16:34:53 2007 From: rowland at mvapich.cse.ohio-state.edu (rowland@mvapich.cse.ohio-state.edu) Date: Mon Aug 13 16:35:16 2007 Subject: [mvapich-commit] r1460 - in mvapich2/trunk/test/mpi: . cxx f77 f90 Message-ID: <200708132034.l7DKYrgb024943@mvapich.cse.ohio-state.edu> Author: rowland Date: 2007-08-13 16:34:53 -0400 (Mon, 13 Aug 2007) New Revision: 1460 Modified: mvapich2/trunk/test/mpi/cxx/testlist.in mvapich2/trunk/test/mpi/f77/testlist.in mvapich2/trunk/test/mpi/f90/testlist.in mvapich2/trunk/test/mpi/testlist.in Log: Removed I/O test cases for current testing run. Modified: mvapich2/trunk/test/mpi/cxx/testlist.in =================================================================== --- mvapich2/trunk/test/mpi/cxx/testlist.in 2007-08-13 20:30:00 UTC (rev 1459) +++ mvapich2/trunk/test/mpi/cxx/testlist.in 2007-08-13 20:34:53 UTC (rev 1460) @@ -4,6 +4,6 @@ init info datatype -@iodir@ +#@iodir@ #@spawndir@ @rmadir@ Modified: mvapich2/trunk/test/mpi/f77/testlist.in =================================================================== --- mvapich2/trunk/test/mpi/f77/testlist.in 2007-08-13 20:30:00 UTC (rev 1459) +++ mvapich2/trunk/test/mpi/f77/testlist.in 2007-08-13 20:34:53 UTC (rev 1460) @@ -4,7 +4,7 @@ pt2pt info #@spawndir@ -@iodir@ +#@iodir@ @rmadir@ init comm Modified: mvapich2/trunk/test/mpi/f90/testlist.in =================================================================== --- mvapich2/trunk/test/mpi/f90/testlist.in 2007-08-13 20:30:00 UTC (rev 1459) +++ mvapich2/trunk/test/mpi/f90/testlist.in 2007-08-13 20:34:53 UTC (rev 1460) @@ -4,7 +4,7 @@ ext info init -@iodir@ +#@iodir@ misc pt2pt datatype Modified: mvapich2/trunk/test/mpi/testlist.in =================================================================== --- mvapich2/trunk/test/mpi/testlist.in 2007-08-13 20:30:00 UTC (rev 1459) +++ mvapich2/trunk/test/mpi/testlist.in 2007-08-13 20:34:53 UTC (rev 1460) @@ -13,7 +13,7 @@ @rmadir@ #@spawndir@ topo -@iodir@ +#@iodir@ @f77dir@ @cxxdir@ @f90dir@ From narravul at mvapich.cse.ohio-state.edu Tue Aug 14 00:29:38 2007 From: narravul at mvapich.cse.ohio-state.edu (narravul@mvapich.cse.ohio-state.edu) Date: Tue Aug 14 00:30:03 2007 Subject: [mvapich-commit] r1461 - mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2 Message-ID: <200708140429.l7E4TbSH025877@mvapich.cse.ohio-state.edu> Author: narravul Date: 2007-08-14 00:29:36 -0400 (Tue, 14 Aug 2007) New Revision: 1461 Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/rdma_cm.c Log: Fixing cleanup bug for rdma-cm finalize. Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/rdma_cm.c =================================================================== --- mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/rdma_cm.c 2007-08-13 20:34:53 UTC (rev 1460) +++ mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/rdma_cm.c 2007-08-14 04:29:36 UTC (rev 1461) @@ -1042,7 +1042,7 @@ if (vc->mrail.rails[rail_index].cm_ids != NULL) rdma_destroy_id(vc->mrail.rails[rail_index].cm_ids); if (proc->has_one_sided) - rdma_destroy_id(vc->mrail.rails[rail_index].cm_ids); + rdma_destroy_id(vc->mrail.rails[rail_index].cm_ids_1sc); } } } From rowland at mvapich.cse.ohio-state.edu Thu Aug 16 12:42:34 2007 From: rowland at mvapich.cse.ohio-state.edu (rowland@mvapich.cse.ohio-state.edu) Date: Thu Aug 16 12:42:59 2007 Subject: [mvapich-commit] r1462 - mvapich2/trunk/test/mpi/rma Message-ID: <200708161642.l7GGgYV1000904@mvapich.cse.ohio-state.edu> Author: rowland Date: 2007-08-16 12:42:33 -0400 (Thu, 16 Aug 2007) New Revision: 1462 Modified: mvapich2/trunk/test/mpi/rma/testlist Log: Removing putpscw1 test as it appears to be incorrect. Modified: mvapich2/trunk/test/mpi/rma/testlist =================================================================== --- mvapich2/trunk/test/mpi/rma/testlist 2007-08-14 04:29:36 UTC (rev 1461) +++ mvapich2/trunk/test/mpi/rma/testlist 2007-08-16 16:42:33 UTC (rev 1462) @@ -4,7 +4,7 @@ getfence1 4 accfence1 4 accfence2 4 -putpscw1 4 +#putpscw1 4 getgroup 4 transpose1 2 transpose2 2 From koop at mvapich.cse.ohio-state.edu Thu Aug 16 13:22:44 2007 From: koop at mvapich.cse.ohio-state.edu (koop@mvapich.cse.ohio-state.edu) Date: Thu Aug 16 13:23:12 2007 Subject: [mvapich-commit] r1463 - mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2 Message-ID: <200708161722.l7GHMiiN000977@mvapich.cse.ohio-state.edu> Author: koop Date: 2007-08-16 13:22:42 -0400 (Thu, 16 Aug 2007) New Revision: 1463 Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/vbuf.c Log: * Fixing issue where v->coalesce flag was not getting reset properly. Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/vbuf.c =================================================================== --- mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/vbuf.c 2007-08-16 16:42:33 UTC (rev 1462) +++ mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/vbuf.c 2007-08-16 17:22:42 UTC (rev 1463) @@ -279,6 +279,7 @@ * a dozen other places, and probably miss one. */ v->sreq = NULL; + v->coalesce = 0; if (MPIDI_CH3I_RDMA_Process.has_srq #ifdef CKPT From surs at mvapich.cse.ohio-state.edu Thu Aug 16 16:01:25 2007 From: surs at mvapich.cse.ohio-state.edu (surs@mvapich.cse.ohio-state.edu) Date: Thu Aug 16 16:01:51 2007 Subject: [mvapich-commit] r1464 - mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma Message-ID: <200708162001.l7GK1P7g001205@mvapich.cse.ohio-state.edu> Author: surs Date: 2007-08-16 16:01:24 -0400 (Thu, 16 Aug 2007) New Revision: 1464 Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_istartrndvmsg.c Log: In case of sender having non-contiguous datatypes, the packing is left for until the CTS arrives (according to current datatype processing logic). If this is the case, fall back to RPUT. Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_istartrndvmsg.c =================================================================== --- mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_istartrndvmsg.c 2007-08-16 17:22:42 UTC (rev 1463) +++ mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_istartrndvmsg.c 2007-08-16 20:01:24 UTC (rev 1464) @@ -138,6 +138,12 @@ write */ MPIDI_CH3_Prepare_rndv(vc, sreq); MPIDI_CH3I_MRAIL_SET_PKT_RNDV(rndv_pkt, sreq); + if(1 == sreq->mrail.rndv_buf_alloc) { + MPIDI_CH3I_MRAIL_REVERT_RPUT(sreq); + if (VAPI_PROTOCOL_RGET == rndv_pkt->rndv.protocol) { + rndv_pkt->rndv.protocol = VAPI_PROTOCOL_RPUT; + } + } mpi_errno = MPIDI_CH3_iStartMsg(vc, rndv_pkt, sizeof(MPIDI_CH3_Pkt_rndv_req_to_send_t), From mamidala at mvapich.cse.ohio-state.edu Fri Aug 17 12:15:52 2007 From: mamidala at mvapich.cse.ohio-state.edu (mamidala@mvapich.cse.ohio-state.edu) Date: Fri Aug 17 12:16:18 2007 Subject: [mvapich-commit] r1465 - in mvapich2/trunk/src/mpi: comm init Message-ID: <200708171615.l7HGFqip003805@mvapich.cse.ohio-state.edu> Author: mamidala Date: 2007-08-17 12:15:50 -0400 (Fri, 17 Aug 2007) New Revision: 1465 Modified: mvapich2/trunk/src/mpi/comm/comm_create.c mvapich2/trunk/src/mpi/comm/comm_dup.c mvapich2/trunk/src/mpi/comm/comm_split.c mvapich2/trunk/src/mpi/comm/create_2level_comm.c mvapich2/trunk/src/mpi/init/init.c mvapich2/trunk/src/mpi/init/initthread.c Log: checking in thread specific functions, substituting the global flags used earlier Modified: mvapich2/trunk/src/mpi/comm/comm_create.c =================================================================== --- mvapich2/trunk/src/mpi/comm/comm_create.c 2007-08-16 20:01:24 UTC (rev 1464) +++ mvapich2/trunk/src/mpi/comm/comm_create.c 2007-08-17 16:15:50 UTC (rev 1465) @@ -233,7 +233,7 @@ #ifdef _SMP_ int flag; if (enable_shmem_collectives){ - if (split_comm == 1){ + if (check_split_comm(pthread_self())){ if (*newcomm != MPI_COMM_NULL){ MPIR_Nest_incr(); MPI_Comm_test_inter(*newcomm, &flag); @@ -241,9 +241,9 @@ int my_id, size; MPI_Comm_rank(*newcomm, &my_id); MPI_Comm_size(*newcomm, &size); - split_comm = 0; + disable_split_comm(pthread_self()); create_2level_comm(*newcomm, size, my_id); - split_comm = 1; + enable_split_comm(pthread_self()); } MPIR_Nest_decr(); } Modified: mvapich2/trunk/src/mpi/comm/comm_dup.c =================================================================== --- mvapich2/trunk/src/mpi/comm/comm_dup.c 2007-08-16 20:01:24 UTC (rev 1464) +++ mvapich2/trunk/src/mpi/comm/comm_dup.c 2007-08-17 16:15:50 UTC (rev 1465) @@ -178,27 +178,27 @@ } *newcomm = newcomm_ptr->handle; + #ifdef _SMP_ int flag; if (enable_shmem_collectives){ - if (split_comm == 1){ + MPIR_Nest_incr(); + if (check_split_comm(pthread_self())){ if (*newcomm != MPI_COMM_NULL){ - MPIR_Nest_incr(); MPI_Comm_test_inter(*newcomm, &flag); if (flag == 0){ int my_id, size; MPI_Comm_rank(*newcomm, &my_id); MPI_Comm_size(*newcomm, &size); - split_comm = 0; + disable_split_comm(pthread_self()); create_2level_comm(*newcomm, size, my_id); - split_comm = 1; + enable_split_comm(pthread_self()); } - MPIR_Nest_decr(); } } + MPIR_Nest_decr(); } #endif - /* ... end of body of routine ... */ Modified: mvapich2/trunk/src/mpi/comm/comm_split.c =================================================================== --- mvapich2/trunk/src/mpi/comm/comm_split.c 2007-08-16 20:01:24 UTC (rev 1464) +++ mvapich2/trunk/src/mpi/comm/comm_split.c 2007-08-17 16:15:50 UTC (rev 1465) @@ -269,7 +269,7 @@ #ifdef _SMP_ int flag; if (enable_shmem_collectives){ - if (split_comm == 1){ + if (check_split_comm(pthread_self())){ if (*newcomm != MPI_COMM_NULL){ MPIR_Nest_incr(); MPI_Comm_test_inter(*newcomm, &flag); @@ -277,9 +277,9 @@ int my_id, size; MPI_Comm_rank(*newcomm, &my_id); MPI_Comm_size(*newcomm, &size); - split_comm = 0; + disable_split_comm(pthread_self()); create_2level_comm(*newcomm, size, my_id); - split_comm = 1; + enable_split_comm(pthread_self()); } MPIR_Nest_decr(); } Modified: mvapich2/trunk/src/mpi/comm/create_2level_comm.c =================================================================== --- mvapich2/trunk/src/mpi/comm/create_2level_comm.c 2007-08-16 20:01:24 UTC (rev 1464) +++ mvapich2/trunk/src/mpi/comm/create_2level_comm.c 2007-08-17 16:15:50 UTC (rev 1465) @@ -25,8 +25,12 @@ int shmem_comm_count = 0; extern shmem_coll_region *shmem_coll; static pthread_mutex_t shmem_coll_lock = PTHREAD_MUTEX_INITIALIZER; +static pthread_mutex_t comm_lock = PTHREAD_MUTEX_INITIALIZER; extern int shmem_coll_blocks; +#define MAX_NUM_THREADS 1024 +pthread_t thread_reg[MAX_NUM_THREADS]; + void clear_2level_comm (MPID_Comm* comm_ptr) { comm_ptr->shmem_coll_ok = 0; @@ -154,7 +158,6 @@ else{ input_flag = 0; } - comm_ptr->shmem_coll_ok = 0;/* To prevent Allreduce taking shmem route*/ MPI_Allreduce(&input_flag, &output_flag, 1, MPI_INT, MPI_LAND, comm); @@ -169,14 +172,75 @@ MPI_Group_free(&comm_group); } - ++comm_count; MPIR_Nest_decr(); } +int init_thread_reg(void){ + int j; + for ( j=0; j < MAX_NUM_THREADS; j++ ){ + thread_reg[j] = -1; + } +} +int check_split_comm(pthread_t my_id){ + int j, value; + + pthread_mutex_lock(&comm_lock); + for ( j=0; j < MAX_NUM_THREADS; j++ ){ + if (pthread_equal(thread_reg[j], my_id)){ + value = 0; + pthread_mutex_unlock(&comm_lock); + return value; + } + } + value = 1; + pthread_mutex_unlock(&comm_lock); + + return value; +} + +int disable_split_comm(pthread_t my_id){ + int j,found = 0; + + pthread_mutex_lock(&comm_lock); + for ( j=0; j < MAX_NUM_THREADS; j++ ){ + if (thread_reg[j] == -1){ + thread_reg[j] = my_id; + found = 1; + break; + } + } + pthread_mutex_unlock(&comm_lock); + + if (found == 0){ + printf("Error:max number of threads reached\n"); + exit(0); + } +} + + +int enable_split_comm(pthread_t my_id){ + int j,found = 0; + + pthread_mutex_lock(&comm_lock); + for ( j=0; j < MAX_NUM_THREADS; j++ ){ + if (pthread_equal(thread_reg[j], my_id)){ + thread_reg[j] = -1; + found = 1; + break; + } + } + pthread_mutex_unlock(&comm_lock); + + if (found == 0){ + printf("Error: Could not locate thread id\n"); + exit(0); + } +} + int check_comm_registry(MPI_Comm comm) { MPID_Comm* comm_ptr; Modified: mvapich2/trunk/src/mpi/init/init.c =================================================================== --- mvapich2/trunk/src/mpi/init/init.c 2007-08-16 20:01:24 UTC (rev 1464) +++ mvapich2/trunk/src/mpi/init/init.c 2007-08-17 16:15:50 UTC (rev 1465) @@ -121,14 +121,14 @@ #ifdef _SMP_ if (enable_shmem_collectives){ - if (split_comm == 1){ + if (check_split_comm(pthread_self())){ MPIR_Nest_incr(); int my_id, size; MPI_Comm_rank(MPI_COMM_WORLD, &my_id); MPI_Comm_size(MPI_COMM_WORLD, &size); - split_comm = 0; + disable_split_comm(pthread_self()); create_2level_comm(MPI_COMM_WORLD, size, my_id); - split_comm = 1; + enable_split_comm(pthread_self()); MPIR_Nest_decr(); } } Modified: mvapich2/trunk/src/mpi/init/initthread.c =================================================================== --- mvapich2/trunk/src/mpi/init/initthread.c 2007-08-16 20:01:24 UTC (rev 1464) +++ mvapich2/trunk/src/mpi/init/initthread.c 2007-08-17 16:15:50 UTC (rev 1465) @@ -369,6 +369,7 @@ extern int shmem_coll_blocks; extern int shmem_coll_max_msg_size; void MV2_Read_env_vars(void); +void init_thread_reg(); #endif int MPI_Init_thread( int *argc, char ***argv, int required, int *provided ) { @@ -415,14 +416,14 @@ #ifdef _SMP_ if (enable_shmem_collectives){ - if (split_comm == 1){ + if (check_split_comm(pthread_self())){ MPIR_Nest_incr(); int my_id, size; MPI_Comm_rank(MPI_COMM_WORLD, &my_id); MPI_Comm_size(MPI_COMM_WORLD, &size); - split_comm = 0; + disable_split_comm(pthread_self()); create_2level_comm(MPI_COMM_WORLD, size, my_id); - split_comm = 1; + enable_split_comm(pthread_self()); MPIR_Nest_decr(); } } @@ -491,5 +492,7 @@ flag = (int)atoi(value); if (flag > 0) enable_shmem_collectives = 0; } + + init_thread_reg(); } #endif From santhana at mvapich.cse.ohio-state.edu Sat Aug 18 15:20:10 2007 From: santhana at mvapich.cse.ohio-state.edu (santhana@mvapich.cse.ohio-state.edu) Date: Sat Aug 18 15:20:35 2007 Subject: [mvapich-commit] r1466 - mvapich2/trunk/test/mpi/rma Message-ID: <200708181920.l7IJKAih006994@mvapich.cse.ohio-state.edu> Author: santhana Date: 2007-08-18 15:20:08 -0400 (Sat, 18 Aug 2007) New Revision: 1466 Modified: mvapich2/trunk/test/mpi/rma/accfence1.c mvapich2/trunk/test/mpi/rma/getfence1.c mvapich2/trunk/test/mpi/rma/nullpscw.c mvapich2/trunk/test/mpi/rma/putfence1.c mvapich2/trunk/test/mpi/rma/putpscw1.c Log: making the mpich2 1.0.6-RC fixes to the rma test list Modified: mvapich2/trunk/test/mpi/rma/accfence1.c =================================================================== --- mvapich2/trunk/test/mpi/rma/accfence1.c 2007-08-17 16:15:50 UTC (rev 1465) +++ mvapich2/trunk/test/mpi/rma/accfence1.c 2007-08-18 19:20:08 UTC (rev 1466) @@ -84,9 +84,9 @@ else { MPI_Win_fence( 0, win ); } + MPI_Win_free( &win ); MTestFreeDatatype( &sendtype ); MTestFreeDatatype( &recvtype ); - MPI_Win_free( &win ); } } MTestFreeComm(&comm); Modified: mvapich2/trunk/test/mpi/rma/getfence1.c =================================================================== --- mvapich2/trunk/test/mpi/rma/getfence1.c 2007-08-17 16:15:50 UTC (rev 1465) +++ mvapich2/trunk/test/mpi/rma/getfence1.c 2007-08-18 19:20:08 UTC (rev 1466) @@ -80,9 +80,9 @@ else { MPI_Win_fence( 0, win ); } + MPI_Win_free( &win ); MTestFreeDatatype( &recvtype ); MTestFreeDatatype( &sendtype ); - MPI_Win_free( &win ); } } MTestFreeComm(&comm); Modified: mvapich2/trunk/test/mpi/rma/nullpscw.c =================================================================== --- mvapich2/trunk/test/mpi/rma/nullpscw.c 2007-08-17 16:15:50 UTC (rev 1465) +++ mvapich2/trunk/test/mpi/rma/nullpscw.c 2007-08-18 19:20:08 UTC (rev 1466) @@ -24,7 +24,7 @@ MPI_Win_complete(win); MPI_Win_wait(win); - + MPI_Group_free( &group ); MPI_Win_free(&win); MTest_Finalize(errs); MPI_Finalize(); Modified: mvapich2/trunk/test/mpi/rma/putfence1.c =================================================================== --- mvapich2/trunk/test/mpi/rma/putfence1.c 2007-08-17 16:15:50 UTC (rev 1465) +++ mvapich2/trunk/test/mpi/rma/putfence1.c 2007-08-18 19:20:08 UTC (rev 1466) @@ -91,9 +91,9 @@ else { MPI_Win_fence( 0, win ); } + MPI_Win_free( &win ); MTestFreeDatatype( &sendtype ); MTestFreeDatatype( &recvtype ); - MPI_Win_free( &win ); } } MTestFreeComm(&comm); Modified: mvapich2/trunk/test/mpi/rma/putpscw1.c =================================================================== --- mvapich2/trunk/test/mpi/rma/putpscw1.c 2007-08-17 16:15:50 UTC (rev 1465) +++ mvapich2/trunk/test/mpi/rma/putpscw1.c 2007-08-18 19:20:08 UTC (rev 1466) @@ -92,10 +92,10 @@ MPI routines */ ; } + MPI_Win_free( &win ); MTestFreeDatatype( &sendtype ); MTestFreeDatatype( &recvtype ); MPI_Group_free( &wingroup ); - MPI_Win_free( &win ); } } MTestFreeComm( &comm ); From rowland at mvapich.cse.ohio-state.edu Sat Aug 18 17:28:46 2007 From: rowland at mvapich.cse.ohio-state.edu (rowland@mvapich.cse.ohio-state.edu) Date: Sat Aug 18 17:29:11 2007 Subject: [mvapich-commit] r1467 - mvapich2/trunk/test/mpi/rma Message-ID: <200708182128.l7ILSkWb007165@mvapich.cse.ohio-state.edu> Author: rowland Date: 2007-08-18 17:28:44 -0400 (Sat, 18 Aug 2007) New Revision: 1467 Modified: mvapich2/trunk/test/mpi/rma/testlist Log: Adding updated putpscw1 back to the testlist file. Modified: mvapich2/trunk/test/mpi/rma/testlist =================================================================== --- mvapich2/trunk/test/mpi/rma/testlist 2007-08-18 19:20:08 UTC (rev 1466) +++ mvapich2/trunk/test/mpi/rma/testlist 2007-08-18 21:28:44 UTC (rev 1467) @@ -4,7 +4,7 @@ getfence1 4 accfence1 4 accfence2 4 -#putpscw1 4 +putpscw1 4 getgroup 4 transpose1 2 transpose2 2 From gaoq at mvapich.cse.ohio-state.edu Mon Aug 20 10:01:47 2007 From: gaoq at mvapich.cse.ohio-state.edu (gaoq@mvapich.cse.ohio-state.edu) Date: Mon Aug 20 10:02:11 2007 Subject: [mvapich-commit] r1468 - mvapich2/trunk/src/pm/mpd Message-ID: <200708201401.l7KE1lSP000653@mvapich.cse.ohio-state.edu> Author: gaoq Date: 2007-08-20 10:01:45 -0400 (Mon, 20 Aug 2007) New Revision: 1468 Modified: mvapich2/trunk/src/pm/mpd/mv2_checkpoint Log: Specify "process" option to cr_checkpoint Modified: mvapich2/trunk/src/pm/mpd/mv2_checkpoint =================================================================== --- mvapich2/trunk/src/pm/mpd/mv2_checkpoint 2007-08-18 21:28:44 UTC (rev 1467) +++ mvapich2/trunk/src/pm/mpd/mv2_checkpoint 2007-08-20 14:01:45 UTC (rev 1468) @@ -36,7 +36,7 @@ done echo "Checkpointing PID $PID" -cr_checkpoint $PID +cr_checkpoint -p $PID if [ $? = 0 ]; then echo "Checkpoint file: context.$PID" From narravul at mvapich.cse.ohio-state.edu Wed Aug 22 02:36:52 2007 From: narravul at mvapich.cse.ohio-state.edu (narravul@mvapich.cse.ohio-state.edu) Date: Wed Aug 22 02:37:18 2007 Subject: [mvapich-commit] r1469 - mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2 Message-ID: <200708220636.l7M6ap33005327@mvapich.cse.ohio-state.edu> Author: narravul Date: 2007-08-22 02:36:50 -0400 (Wed, 22 Aug 2007) New Revision: 1469 Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/rdma_iba_priv.c Log: Fix for the alltoallwf90 failure where backlog.len was not zero after init. Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/rdma_iba_priv.c =================================================================== --- mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/rdma_iba_priv.c 2007-08-20 14:01:45 UTC (rev 1468) +++ mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/rdma_iba_priv.c 2007-08-22 06:36:50 UTC (rev 1469) @@ -681,6 +681,8 @@ "**fail", "**fail %s", "Failed to allocate resources for " "credits array"); } + memset(vc->mrail.srp.credits, 0, + (sizeof *vc->mrail.srp.credits * vc->mrail.num_rails)); if (i == pg_rank) continue; From huangwei at mvapich.cse.ohio-state.edu Wed Aug 22 15:24:24 2007 From: huangwei at mvapich.cse.ohio-state.edu (huangwei@mvapich.cse.ohio-state.edu) Date: Wed Aug 22 15:24:51 2007 Subject: [mvapich-commit] r1470 - in mvapich2/trunk/src/mpid/osu_ch3: channels/mrail/src/gen2 channels/mrail/src/rdma src Message-ID: <200708221924.l7MJOO5t006987@mvapich.cse.ohio-state.edu> Author: huangwei Date: 2007-08-22 15:24:23 -0400 (Wed, 22 Aug 2007) New Revision: 1470 Added: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/crc32h.c Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/Makefile.in mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_channel_manager.c mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_recv.c mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_send.c mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/mpidi_ch3_rdma_post.h mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/mpidi_ch3_rdma_pre.h mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/rdma_iba_priv.c mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_rndvtransfer.c mvapich2/trunk/src/mpid/osu_ch3/src/ch3u_handle_recv_pkt.c mvapich2/trunk/src/mpid/osu_ch3/src/ch3u_handle_recv_req.c Log: Forcing order for CTS packets with RMA operations. Add CRC check for debugging purpose. Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/Makefile.in =================================================================== --- mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/Makefile.in 2007-08-22 06:36:50 UTC (rev 1469) +++ mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/Makefile.in 2007-08-22 19:24:23 UTC (rev 1470) @@ -58,18 +58,18 @@ ../../../../../../../lib/lib${MPILIBNAME}.a: ibv_send.o ibv_recv.o rdma_iba_init.o rdma_iba_priv.o \ dreg.o ibv_param.o vbuf.o ibv_channel_manager.o ibv_rma.o rdma_iba_1sc.o \ -ibv_rndv.o ibv_priv.o avl.o mem_hooks.o rdma_cm.o cm.o cr.o +ibv_rndv.o ibv_priv.o avl.o mem_hooks.o rdma_cm.o cm.o cr.o crc32h.o ${AR} cr ../../../../../../../lib/lib${MPILIBNAME}.a $? ${RANLIB} ../../../../../../../lib/lib${MPILIBNAME}.a ../../../../../../../lib/lib${MPILIBNAME}.la: ibv_send.lo ibv_recv.lo rdma_iba_init.lo rdma_iba_priv.lo \ dreg.lo ibv_param.lo vbuf.lo ibv_channel_manager.lo ibv_rma.lo rdma_iba_1sc.lo \ -ibv_rndv.lo ibv_priv.lo avl.lo mem_hooks.lo cm.lo rdma_cm.lo cr.lo +ibv_rndv.lo ibv_priv.lo avl.lo mem_hooks.lo cm.lo rdma_cm.lo cr.lo crc32h.lo ${AR} cr ../../../../../../../lib/lib${MPILIBNAME}.la $? SOURCES = ibv_send.c ibv_recv.c rdma_iba_init.c rdma_iba_priv.c dreg.c ibv_param.c \ vbuf.c ibv_channel_manager.c ibv_rma.c rdma_iba_1sc.c ibv_rndv.c ibv_priv.c avl.c \ -mem_hooks.c cm.c rdma_cm.c cr.c +mem_hooks.c cm.c rdma_cm.c cr.c crc32h.c ptmalloc2: malloc.o ${AR} cr ../../../../../../../lib/lib${MPILIBNAME}.a malloc.o Added: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/crc32h.c =================================================================== --- mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/crc32h.c 2007-08-22 06:36:50 UTC (rev 1469) +++ mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/crc32h.c 2007-08-22 19:24:23 UTC (rev 1470) @@ -0,0 +1,85 @@ +/* crc32h.c -- package to compute 32-bit CRC one byte at a time using */ +/* the high-bit first (Big-Endian) bit ordering convention */ +/* */ +/* Synopsis: */ +/* gen_crc_table() -- generates a 256-word table containing all CRC */ +/* remainders for every possible 8-bit byte. It */ +/* must be executed (once) before any CRC updates. */ +/* */ +/* unsigned update_crc(crc_accum, data_blk_ptr, data_blk_size) */ +/* unsigned crc_accum; char *data_blk_ptr; int data_blk_size; */ +/* Returns the updated value of the CRC accumulator after */ +/* processing each byte in the addressed block of data. */ +/* */ +/* It is assumed that an unsigned long is at least 32 bits wide and */ +/* that the predefined type char occupies one 8-bit byte of storage. */ +/* */ +/* The generator polynomial used for this version of the package is */ +/* x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x^1+x^0 */ +/* as specified in the Autodin/Ethernet/ADCCP protocol standards. */ +/* Other degree 32 polynomials may be substituted by re-defining the */ +/* symbol POLYNOMIAL below. Lower degree polynomials must first be */ +/* multiplied by an appropriate power of x. The representation used */ +/* is that the coefficient of x^0 is stored in the LSB of the 32-bit */ +/* word and the coefficient of x^31 is stored in the most significant */ +/* bit. The CRC is to be appended to the data most significant byte */ +/* first. For those protocols in which bytes are transmitted MSB */ +/* first and in the same order as they are encountered in the block */ +/* this convention results in the CRC remainder being transmitted with */ +/* the coefficient of x^31 first and with that of x^0 last (just as */ +/* would be done by a hardware shift register mechanization). */ +/* */ +/* The table lookup technique was adapted from the algorithm described */ +/* by Avram Perez, Byte-wise CRC Calculations, IEEE Micro 3, 40 (1983).*/ +#ifdef CRC_CHECK + +#include + +#define POLYNOMIAL 0x04c11db7L + +static unsigned long crc_table[256]; + + +/* generate the table of CRC remainders for all possible bytes */ +void gen_crc_table() +{ + register int i, j; + register unsigned long crc_accum; + + memset(&crc_table, 0, sizeof(unsigned long) * 256); + + for ( i = 0; i < 256; i++ ) { + + crc_accum = ( (unsigned long) i << 24 ); + + for ( j = 0; j < 8; j++ ) { + + if ( crc_accum & 0x80000000L ) + crc_accum = ( crc_accum << 1 ) ^ POLYNOMIAL; + else + crc_accum = ( crc_accum << 1 ); + } + + crc_table[i] = crc_accum; + } + + return; +} + + +/* update the CRC on the data block one byte at a time */ +unsigned long update_crc(unsigned long crc_accum, char *data_blk_ptr, + int data_blk_size) +{ + register int i, j; + + for ( j = 0; j < data_blk_size; j++ ) { + + i = ( (int) ( crc_accum >> 24) ^ *data_blk_ptr++ ) & 0xff; + crc_accum = ( crc_accum << 8 ) ^ crc_table[i]; + + } + return crc_accum; +} + +#endif Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_channel_manager.c =================================================================== --- mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_channel_manager.c 2007-08-22 06:36:50 UTC (rev 1469) +++ mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_channel_manager.c 2007-08-22 19:24:23 UTC (rev 1470) @@ -196,6 +196,11 @@ { return ((MPIDI_CH3_Pkt_close_t *)(buf->pheader))->seqnum; } + case MPIDI_CH3_PKT_RMA_RNDV_CLR_TO_SEND: + { + return ((MPIDI_CH3_Pkt_rndv_clr_to_send_t *) + (buf->pheader))->seqnum; + } default: return PKT_NO_SEQ_NUM; } Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_recv.c =================================================================== --- mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_recv.c 2007-08-22 06:36:50 UTC (rev 1469) +++ mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_recv.c 2007-08-22 19:24:23 UTC (rev 1470) @@ -46,6 +46,9 @@ { void *vstart; MPIDI_CH3I_MRAILI_Pkt_comm_header *header; +#ifdef CRC_CHECK + unsigned long crc; +#endif DEBUG_PRINT("[parse header] vbuf address %p\n", v); vstart = v->pheader; @@ -54,7 +57,17 @@ /* set it to the header size by default */ *header_size = MPIDI_CH3_Pkt_size_index[header->type]; - +#ifdef CRC_CHECK + crc = update_crc(1, (void *)((uintptr_t)header+sizeof *header), + v->content_size - sizeof *header); + if (crc != header->mrail.crc) { + int rank; PMI_Get_rank(&rank); + fprintf(stderr, "CRC mismatch, get %lx, should be %lx " + "type %d, ocntent size %d\n", + crc, header->mrail.crc, header->type, v->content_size); + assert(0); + } +#endif switch (header->type) { #ifdef USE_HEADER_CACHING case (MPIDI_CH3_PKT_FAST_EAGER_SEND): Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_send.c =================================================================== --- mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_send.c 2007-08-22 06:36:50 UTC (rev 1469) +++ mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_send.c 2007-08-22 19:24:23 UTC (rev 1470) @@ -390,7 +390,10 @@ vc->mrail.rfp.RDMA_remote_buf_rkey[vc->mrail.rails[rail].hca_index]); FLUSH_RAIL(vc, rail); - +#ifdef CRC_CHECK + p->mrail.crc = update_crc(1, (void *)((uintptr_t)p+sizeof *p), + *v->head_flag - sizeof *p); +#endif if (!vc->mrail.rails[rail].send_wqes_avail) { DEBUG_PRINT("[send: rdma_send] Warning! no send wqe available\n"); MRAILI_Ext_sendq_enqueue(vc, rail, v); @@ -541,7 +544,10 @@ || p->type == MPIDI_CH3_PKT_NOOP) { PACKET_SET_CREDIT(p, vc, rail); - +#ifdef CRC_CHECK + p->mrail.crc = update_crc(1, (void *)((uintptr_t)p+sizeof *p), + v->desc.sg_entry.length - sizeof *p ); +#endif if (p->type != MPIDI_CH3_PKT_NOOP) vc->mrail.srp.credits[rail].remote_credit--; @@ -747,7 +753,10 @@ (v->buffer + v->content_size - *num_bytes_ptr); PACKET_SET_CREDIT(p, vc, v->rail); - +#ifdef CRC_CHECK + p->mrail.crc = update_crc(1, (void *)((uintptr_t)p+sizeof *p), + v->desc.sg_entry.length - sizeof *p); +#endif v->vc = (void *) vc; p->mrail.src_rank = MPIDI_Process.my_pg_rank; p->mrail.rail = v->rail; @@ -853,6 +862,10 @@ p = (MPIDI_CH3I_MRAILI_Pkt_comm_header *) v->pheader; PACKET_SET_CREDIT(p, vc, rail); +#ifdef CRC_CHECK + p->mrail.crc = update_crc(1, (void *)((uintptr_t)p+sizeof *p), + v->desc.sg_entry.length - sizeof *p); +#endif vc->mrail.srp.credits[rail].remote_credit--; if (MPIDI_CH3I_RDMA_Process.has_srq) { Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/mpidi_ch3_rdma_post.h =================================================================== --- mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/mpidi_ch3_rdma_post.h 2007-08-22 06:36:50 UTC (rev 1469) +++ mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/mpidi_ch3_rdma_post.h 2007-08-22 19:24:23 UTC (rev 1470) @@ -27,6 +27,12 @@ have been included. */ +#ifdef CRC_CHECK +void gen_crc_table(); +unsigned long update_crc(unsigned long crc_accum, char *data_blk_ptr, + int data_blk_size); +#endif + #ifdef ONE_SIDED /* structure MPIDI_CH3I_RDMA_put_get_list is the queue pool to record every * issued signaled RDMA write and RDMA read operation. The address of Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/mpidi_ch3_rdma_pre.h =================================================================== --- mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/mpidi_ch3_rdma_pre.h 2007-08-22 06:36:50 UTC (rev 1469) +++ mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/mpidi_ch3_rdma_pre.h 2007-08-22 19:24:23 UTC (rev 1470) @@ -36,9 +36,12 @@ uint8_t vbuf_credit; uint8_t remote_credit; uint8_t rdma_credit; - int smp_index; + int smp_index; uint32_t src_rank; uint8_t rail; +#ifdef CRC_CHECK + unsigned long crc; +#endif } MPIDI_CH3I_MRAILI_Iba_pkt_t; #define MPIDI_CH3I_MRAILI_IBA_PKT_DECL \ Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/rdma_iba_priv.c =================================================================== --- mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/rdma_iba_priv.c 2007-08-22 06:36:50 UTC (rev 1469) +++ mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/rdma_iba_priv.c 2007-08-22 19:24:23 UTC (rev 1470) @@ -331,7 +331,9 @@ struct ibv_device **dev_list; int i, j; int mpi_errno = MPI_SUCCESS; - +#ifdef CRC_CHECK + gen_crc_table(); +#endif dev_list = ibv_get_device_list(NULL); for (i = 0; i < rdma_num_hcas; i ++) { Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_rndvtransfer.c =================================================================== --- mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_rndvtransfer.c 2007-08-22 06:36:50 UTC (rev 1469) +++ mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_rndvtransfer.c 2007-08-22 19:24:23 UTC (rev 1470) @@ -361,7 +361,6 @@ break; } } else { - assert(0); MPIDI_CH3I_SMP_SendQ_enqueue_head(vc, sreq); vc->smp.send_active = sreq; sreq->mrail.nearly_complete = 1; @@ -814,7 +813,6 @@ MPIDI_CH3I_MRAILI_RREQ_RNDV_FINISH(rreq); mpi_errno = MPIDI_CH3U_Handle_recv_req(vc, rreq, &complete); - if (mpi_errno != MPI_SUCCESS) { mpi_errno = MPIR_Err_create_code(mpi_errno, Modified: mvapich2/trunk/src/mpid/osu_ch3/src/ch3u_handle_recv_pkt.c =================================================================== --- mvapich2/trunk/src/mpid/osu_ch3/src/ch3u_handle_recv_pkt.c 2007-08-22 06:36:50 UTC (rev 1469) +++ mvapich2/trunk/src/mpid/osu_ch3/src/ch3u_handle_recv_pkt.c 2007-08-22 19:24:23 UTC (rev 1470) @@ -518,6 +518,7 @@ MPID_Request *cts_req; MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_rndv_clr_to_send_t *cts_pkt = &upkt.rndv_clr_to_send; + MPID_Seqnum_t seqnum; req->dev.sender_req_id = rts_pkt->sender_req_id; req->dev.recv_data_sz = rts_pkt->data_sz; @@ -527,7 +528,8 @@ MPIDI_CH3U_Post_data_receive_found(req); MPIDI_Pkt_init(cts_pkt, MPIDI_CH3_PKT_RMA_RNDV_CLR_TO_SEND); - MPIDI_Pkt_set_seqnum(cts_pkt, -1); + MPIDI_VC_FAI_send_seqnum(vc, seqnum); + MPIDI_Pkt_set_seqnum(cts_pkt, seqnum); cts_pkt->sender_req_id = rts_pkt->sender_req_id; cts_pkt->receiver_req_id = req->handle; @@ -893,6 +895,7 @@ MPID_Request *cts_req; MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_rndv_clr_to_send_t *cts_pkt = &upkt.rndv_clr_to_send; + MPID_Seqnum_t seqnum; req->dev.sender_req_id = rts_pkt->sender_req_id; @@ -902,7 +905,8 @@ MPIDI_CH3U_Post_data_receive_found(req); MPIDI_Pkt_init(cts_pkt, MPIDI_CH3_PKT_RMA_RNDV_CLR_TO_SEND); - MPIDI_Pkt_set_seqnum(cts_pkt, -1); + MPIDI_VC_FAI_send_seqnum(vc, seqnum); + MPIDI_Pkt_set_seqnum(cts_pkt, seqnum); cts_pkt->sender_req_id = rts_pkt->sender_req_id; cts_pkt->receiver_req_id = req->handle; Modified: mvapich2/trunk/src/mpid/osu_ch3/src/ch3u_handle_recv_req.c =================================================================== --- mvapich2/trunk/src/mpid/osu_ch3/src/ch3u_handle_recv_req.c 2007-08-22 06:36:50 UTC (rev 1469) +++ mvapich2/trunk/src/mpid/osu_ch3/src/ch3u_handle_recv_req.c 2007-08-22 19:24:23 UTC (rev 1470) @@ -229,10 +229,12 @@ MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_rndv_clr_to_send_t *cts_pkt = &upkt.rndv_clr_to_send; + MPID_Seqnum_t seqnum; MPIDI_Pkt_init(cts_pkt, MPIDI_CH3_PKT_RMA_RNDV_CLR_TO_SEND); - MPIDI_Pkt_set_seqnum(cts_pkt, -1); + MPIDI_VC_FAI_send_seqnum(vc, seqnum); + MPIDI_Pkt_set_seqnum(cts_pkt, seqnum); cts_pkt->sender_req_id = rreq->dev.sender_req_id; cts_pkt->receiver_req_id = rreq->handle; @@ -341,10 +343,12 @@ MPIDI_CH3_Pkt_t upkt; MPIDI_CH3_Pkt_rndv_clr_to_send_t *cts_pkt = &upkt.rndv_clr_to_send; + MPID_Seqnum_t seqnum; MPIDI_Pkt_init(cts_pkt, MPIDI_CH3_PKT_RMA_RNDV_CLR_TO_SEND); - MPIDI_Pkt_set_seqnum(cts_pkt, -1); + MPIDI_VC_FAI_send_seqnum(vc, seqnum); + MPIDI_Pkt_set_seqnum(cts_pkt, seqnum); cts_pkt->sender_req_id = rreq->dev.sender_req_id; cts_pkt->receiver_req_id = rreq->handle; From mamidala at mvapich.cse.ohio-state.edu Thu Aug 23 00:39:23 2007 From: mamidala at mvapich.cse.ohio-state.edu (mamidala@mvapich.cse.ohio-state.edu) Date: Thu Aug 23 00:39:49 2007 Subject: [mvapich-commit] r1471 - in mvapich2/trunk/src: include mpi/coll mpi/init Message-ID: <200708230439.l7N4dNM2008036@mvapich.cse.ohio-state.edu> Author: mamidala Date: 2007-08-23 00:39:21 -0400 (Thu, 23 Aug 2007) New Revision: 1471 Modified: mvapich2/trunk/src/include/mpiimpl.h mvapich2/trunk/src/mpi/coll/allreduce.c mvapich2/trunk/src/mpi/coll/reduce.c mvapich2/trunk/src/mpi/init/initthread.c Log: checking in run-time variables, parameters Modified: mvapich2/trunk/src/include/mpiimpl.h =================================================================== --- mvapich2/trunk/src/include/mpiimpl.h 2007-08-22 19:24:23 UTC (rev 1470) +++ mvapich2/trunk/src/include/mpiimpl.h 2007-08-23 04:39:21 UTC (rev 1471) @@ -3354,6 +3354,22 @@ #define MPIR_EXSCAN_TAG 24 #define MPIR_ALLTOALLW_TAG 25 +#ifdef _SMP_ +#define SHMEM_ALLREDUCE_THRESHOLD (1<<15) +#define SHMEM_REDUCE_THRESHOLD (1<<10) +#endif + + +struct coll_runtime { + int allreduce_short_msg; + int reduce_short_msg; +#ifdef _SMP_ + int shmem_allreduce_msg; + int shmem_reduce_msg; +#endif +}; + + /* These functions are used in the implementation of collective operations. They are wrappers around MPID send/recv functions. They do sends/receives by setting the context offset to Modified: mvapich2/trunk/src/mpi/coll/allreduce.c =================================================================== --- mvapich2/trunk/src/mpi/coll/allreduce.c 2007-08-22 19:24:23 UTC (rev 1470) +++ mvapich2/trunk/src/mpi/coll/allreduce.c 2007-08-23 04:39:21 UTC (rev 1471) @@ -49,6 +49,7 @@ MPIR_LXOR_check_dtype, MPIR_BXOR_check_dtype, MPIR_MINLOC_check_dtype, MPIR_MAXLOC_check_dtype, }; +extern struct coll_runtime coll_param; /* This is the default implementation of allreduce. The algorithm is: @@ -282,7 +283,7 @@ using recursive doubling in that case.) */ if (newrank != -1) { - if ((count*type_size <= MPIR_ALLREDUCE_SHORT_MSG) || + if ((count*type_size <= coll_param.allreduce_short_msg) || (HANDLE_GET_KIND(op) != HANDLE_KIND_BUILTIN) || (count < pof2)) { /* use recursive doubling */ mask = 0x1; @@ -608,7 +609,6 @@ #ifdef _SMP_ extern int enable_shmem_collectives; extern int disable_shmem_allreduce; -#define SHMEM_COLL_ALLREDUCE_THRESHOLD (1<<15) #endif int MPI_Allreduce ( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm ) @@ -755,7 +755,7 @@ } MPIR_Nest_decr(); } - if ((comm_ptr->shmem_coll_ok == 1)&&(stride < SHMEM_COLL_ALLREDUCE_THRESHOLD)&& + if ((comm_ptr->shmem_coll_ok == 1)&&(stride < coll_param.shmem_allreduce_msg)&& (disable_shmem_allreduce == 0) &&(is_commutative) &&(enable_shmem_collectives) &&(check_comm_registry(comm))){ MPIR_Nest_incr(); my_rank = comm_ptr->rank; @@ -770,7 +770,6 @@ MPID_Comm_get_ptr(leader_comm, leader_commptr); MPIR_Nest_decr(); - if (local_rank == 0){ global_rank = leader_commptr->rank; if (sendbuf != MPI_IN_PLACE){ Modified: mvapich2/trunk/src/mpi/coll/reduce.c =================================================================== --- mvapich2/trunk/src/mpi/coll/reduce.c 2007-08-22 19:24:23 UTC (rev 1470) +++ mvapich2/trunk/src/mpi/coll/reduce.c 2007-08-23 04:39:21 UTC (rev 1471) @@ -35,6 +35,8 @@ #undef MPI_Reduce #define MPI_Reduce PMPI_Reduce +extern struct coll_runtime coll_param; + /* This is the default implementation of reduce. The algorithm is: Algorithm: MPI_Reduce @@ -188,7 +190,7 @@ /* check if multiple threads are calling this collective function */ MPIDU_ERR_CHECK_MULTIPLE_THREADS_ENTER( comm_ptr ); - if ((count*type_size > MPIR_REDUCE_SHORT_MSG) && + if ((count*type_size > coll_param.reduce_short_msg) && (HANDLE_GET_KIND(op) == HANDLE_KIND_BUILTIN) && (count >= pof2)) { /* do a reduce-scatter followed by gather to root. */ @@ -715,7 +717,6 @@ #ifdef _SMP_ extern int enable_shmem_collectives; extern int disable_shmem_reduce; -#define SHMEM_COLL_REDUCE_THRESHOLD (1<<10) #endif int MPI_Reduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm) @@ -897,7 +898,7 @@ MPIR_Nest_decr(); } - if ((comm_ptr->shmem_coll_ok == 1)&&(stride < SHMEM_COLL_REDUCE_THRESHOLD)&& + if ((comm_ptr->shmem_coll_ok == 1)&&(stride < coll_param.shmem_reduce_msg)&& (disable_shmem_reduce == 0) &&(is_commutative==1) &&(enable_shmem_collectives)&&(check_comm_registry(comm))){ MPIR_Nest_incr(); my_rank = comm_ptr->rank; Modified: mvapich2/trunk/src/mpi/init/initthread.c =================================================================== --- mvapich2/trunk/src/mpi/init/initthread.c 2007-08-22 19:24:23 UTC (rev 1470) +++ mvapich2/trunk/src/mpi/init/initthread.c 2007-08-23 04:39:21 UTC (rev 1471) @@ -371,6 +371,15 @@ void MV2_Read_env_vars(void); void init_thread_reg(); #endif + +struct coll_runtime coll_param = { MPIR_ALLREDUCE_SHORT_MSG, + MPIR_REDUCE_SHORT_MSG, +#ifdef _SMP_ + SHMEM_ALLREDUCE_THRESHOLD, + SHMEM_REDUCE_THRESHOLD +#endif +}; + int MPI_Init_thread( int *argc, char ***argv, int required, int *provided ) { static const char FCNAME[] = "MPI_Init_thread"; @@ -493,6 +502,22 @@ if (flag > 0) enable_shmem_collectives = 0; } + if ((value = getenv("MV2_ALLREDUCE_SHORT_MSG")) != NULL){ + flag = (int)atoi(value); + if (flag >= 0) coll_param.allreduce_short_msg = flag; + } + if ((value = getenv("MV2_REDUCE_SHORT_MSG")) != NULL){ + flag = (int)atoi(value); + if (flag >= 0) coll_param.reduce_short_msg = flag; + } + if ((value = getenv("MV2_SHMEM_ALLREDUCE_MSG")) != NULL){ + flag = (int)atoi(value); + if (flag >= 0) coll_param.shmem_allreduce_msg = flag; + } + if ((value = getenv("MV2_SHMEM_REDUCE_MSG")) != NULL){ + flag = (int)atoi(value); + if (flag >= 0) coll_param.shmem_reduce_msg = flag; + } init_thread_reg(); } #endif From mamidala at mvapich.cse.ohio-state.edu Sat Aug 25 02:04:54 2007 From: mamidala at mvapich.cse.ohio-state.edu (mamidala@mvapich.cse.ohio-state.edu) Date: Sat Aug 25 02:05:19 2007 Subject: [mvapich-commit] r1472 - in mvapich2/trunk/src: mpi/comm mpid/osu_ch3/channels/mrail/src/rdma Message-ID: <200708250604.l7P64sRO013900@mvapich.cse.ohio-state.edu> Author: mamidala Date: 2007-08-25 02:04:52 -0400 (Sat, 25 Aug 2007) New Revision: 1472 Modified: mvapich2/trunk/src/mpi/comm/Makefile.in mvapich2/trunk/src/mpi/comm/create_2level_comm.c mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/Makefile.in mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_shmem_coll.c mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/coll_shmem.h Log: fix for seg. fault for IMB-EXT tests with multi option Modified: mvapich2/trunk/src/mpi/comm/Makefile.in =================================================================== --- mvapich2/trunk/src/mpi/comm/Makefile.in 2007-08-23 04:39:21 UTC (rev 1471) +++ mvapich2/trunk/src/mpi/comm/Makefile.in 2007-08-25 06:04:52 UTC (rev 1472) @@ -21,7 +21,7 @@ PMPILIBNAME = @PMPILIBNAME@ CC = @CC@ -CFLAGS = @CFLAGS@ +CFLAGS = @CFLAGS@ -D_GNU_SOURCE C_COMPILE = $(CC) $(DEFS) $(INCLUDES) $(CFLAGS) $(CPPFLAGS) LIBTOOL = @LIBTOOL@ CC_SHL = @CC_SHL@ Modified: mvapich2/trunk/src/mpi/comm/create_2level_comm.c =================================================================== --- mvapich2/trunk/src/mpi/comm/create_2level_comm.c 2007-08-23 04:39:21 UTC (rev 1471) +++ mvapich2/trunk/src/mpi/comm/create_2level_comm.c 2007-08-25 06:04:52 UTC (rev 1472) @@ -143,10 +143,10 @@ MPI_Comm_rank(comm_ptr->shmem_comm, &my_local_id); if (my_local_id == 0){ - pthread_mutex_lock(&shmem_coll->shmem_coll_lock); + pthread_spin_lock(&shmem_coll->shmem_coll_lock); shmem_coll->shmem_comm_count++; shmem_comm_count = shmem_coll->shmem_comm_count; - pthread_mutex_unlock(&shmem_coll->shmem_coll_lock); + pthread_spin_unlock(&shmem_coll->shmem_coll_lock); } MPI_Bcast (&shmem_comm_count, 1, MPI_INT, 0, comm_ptr->shmem_comm); Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/Makefile.in =================================================================== --- mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/Makefile.in 2007-08-23 04:39:21 UTC (rev 1471) +++ mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/Makefile.in 2007-08-25 06:04:52 UTC (rev 1472) @@ -13,7 +13,7 @@ MPILIBNAME = @MPILIBNAME@ CC = @CC@ -CFLAGS = @CFLAGS@ +CFLAGS = @CFLAGS@ -D_GNU_SOURCE C_COMPILE = $(CC) $(DEFS) $(INCLUDES) $(CFLAGS) $(CPPFLAGS) LIBTOOL = @LIBTOOL@ CC_SHL = @CC_SHL@ Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_shmem_coll.c =================================================================== --- mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_shmem_coll.c 2007-08-23 04:39:21 UTC (rev 1471) +++ mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_shmem_coll.c 2007-08-25 06:04:52 UTC (rev 1472) @@ -191,7 +191,7 @@ shmem_coll->root_complete_gather[j][i] = 1; } } - pthread_mutex_init(&shmem_coll->shmem_coll_lock,NULL); + pthread_spin_init(&shmem_coll->shmem_coll_lock,0); } fn_exit: Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/coll_shmem.h =================================================================== --- mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/coll_shmem.h 2007-08-23 04:39:21 UTC (rev 1471) +++ mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/coll_shmem.h 2007-08-25 06:04:52 UTC (rev 1472) @@ -21,7 +21,7 @@ #ifndef _COLL_SHMEM_ #define _COLL_SHMEM_ - +#include /*********** Macro defines of local variables ************/ #define PID_CHAR_LEN 22 @@ -99,7 +99,7 @@ volatile int barrier_gather[SHMEM_COLL_NUM_COMM][SHMEM_COLL_NUM_PROCS]; volatile int barrier_bcast[SHMEM_COLL_NUM_COMM][SHMEM_COLL_NUM_PROCS]; volatile int shmem_comm_count; - pthread_mutex_t shmem_coll_lock; + pthread_spinlock_t shmem_coll_lock; /* the collective buffer */ char shmem_coll_buf; From chail at mvapich.cse.ohio-state.edu Sat Aug 25 21:23:29 2007 From: chail at mvapich.cse.ohio-state.edu (chail@mvapich.cse.ohio-state.edu) Date: Sat Aug 25 21:23:55 2007 Subject: [mvapich-commit] r1473 - mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/udapl Message-ID: <200708260123.l7Q1NSsi015968@mvapich.cse.ohio-state.edu> Author: chail Date: 2007-08-25 21:23:27 -0400 (Sat, 25 Aug 2007) New Revision: 1473 Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/udapl/udapl_channel_manager.c Log: Apply the one-sided patch to uDPAL to force the operations in order. Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/udapl/udapl_channel_manager.c =================================================================== --- mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/udapl/udapl_channel_manager.c 2007-08-25 06:04:52 UTC (rev 1472) +++ mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/udapl/udapl_channel_manager.c 2007-08-26 01:23:27 UTC (rev 1473) @@ -151,7 +151,11 @@ { return ((MPIDI_CH3_Pkt_put_t *)(buf->pheader))->seqnum; } - + case MPIDI_CH3_PKT_RMA_RNDV_CLR_TO_SEND: + { + return ((MPIDI_CH3_Pkt_rndv_clr_to_send_t *) + (buf->pheader))->seqnum; + } default: return PKT_NO_SEQ_NUM; } From koop at mvapich.cse.ohio-state.edu Sun Aug 26 20:05:36 2007 From: koop at mvapich.cse.ohio-state.edu (koop@mvapich.cse.ohio-state.edu) Date: Sun Aug 26 20:06:03 2007 Subject: [mvapich-commit] r1474 - mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma Message-ID: <200708270005.l7R05aOU003412@mvapich.cse.ohio-state.edu> Author: koop Date: 2007-08-26 20:05:35 -0400 (Sun, 26 Aug 2007) New Revision: 1474 Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_progress.c Log: * Need to have packet types with sequence number in MPIDI_CH3I_Seq Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_progress.c =================================================================== --- mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_progress.c 2007-08-26 01:23:27 UTC (rev 1473) +++ mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_progress.c 2007-08-27 00:05:35 UTC (rev 1474) @@ -89,6 +89,7 @@ case MPIDI_CH3_PKT_PUT_RNDV: case MPIDI_CH3_PKT_ACCUMULATE_RNDV: case MPIDI_CH3_PKT_GET_RNDV: + case MPIDI_CH3_PKT_RMA_RNDV_CLR_TO_SEND: return 1; default: return 0; From chail at mvapich.cse.ohio-state.edu Mon Aug 27 12:31:32 2007 From: chail at mvapich.cse.ohio-state.edu (chail@mvapich.cse.ohio-state.edu) Date: Mon Aug 27 12:31:58 2007 Subject: [mvapich-commit] r1475 - mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/udapl Message-ID: <200708271631.l7RGVW7f005520@mvapich.cse.ohio-state.edu> Author: chail Date: 2007-08-27 12:31:31 -0400 (Mon, 27 Aug 2007) New Revision: 1475 Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/udapl/dreg.c mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/udapl/udapl_util.h Log: Don't abort if cannot register memory on Solaris, just return NULL and let the upper layer fallback to point-to-point based one-sided. Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/udapl/dreg.c =================================================================== --- mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/udapl/dreg.c 2007-08-27 00:05:35 UTC (rev 1474) +++ mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/udapl/dreg.c 2007-08-27 16:31:31 UTC (rev 1475) @@ -1069,7 +1069,9 @@ &d->memhandle.hndl, &d->memhandle.lkey, &d->memhandle.rkey, ®_size, ®_addr); +#ifndef SOLARIS CHECK_RETURN(ret, "cannot create lmr\n"); +#endif DEBUG_PRINT("register return mr %p, buf %p, len %d\n", mr, buf, len); return ret; } Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/udapl/udapl_util.h =================================================================== --- mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/udapl/udapl_util.h 2007-08-27 00:05:35 UTC (rev 1474) +++ mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/udapl/udapl_util.h 2007-08-27 16:31:31 UTC (rev 1475) @@ -71,7 +71,7 @@ #define CHECK_RETURN(ret, s) \ do { \ if (ret != DAT_SUCCESS) { \ - fprintf(stderr, "[%s:%d] error(%d): %s\n", \ + fprintf(stderr, "[%s:%d] error(%x): %s\n", \ __FILE__,__LINE__, ret, s); \ exit(1); \ } \ From koop at mvapich.cse.ohio-state.edu Tue Aug 28 16:56:30 2007 From: koop at mvapich.cse.ohio-state.edu (koop@mvapich.cse.ohio-state.edu) Date: Tue Aug 28 16:56:56 2007 Subject: [mvapich-commit] r1477 - mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma Message-ID: <200708282056.l7SKuUmN009054@mvapich.cse.ohio-state.edu> Author: koop Date: 2007-08-28 16:56:29 -0400 (Tue, 28 Aug 2007) New Revision: 1477 Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_rndvtransfer.c Log: * Make sure the correct length is given to Eager_Send so space left in the vbuf is correctly reported for coalescing Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_rndvtransfer.c =================================================================== --- mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_rndvtransfer.c 2007-08-28 20:55:10 UTC (rev 1476) +++ mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_rndvtransfer.c 2007-08-28 20:56:29 UTC (rev 1477) @@ -418,9 +418,17 @@ sreq->dev.iov_count, sreq->ch.iov_offset, sreq->dev.iov[0].MPID_IOV_LEN); - mpi_errno = - MPIDI_CH3I_MRAILI_Eager_send(vc, iov, n_iov, - sizeof(MPIDI_CH3_Pkt_rndv_r3_data_t), &nb, &buf); + { + int i = 0, total_len = 0; + for (i = 0; i < n_iov; i++) { + total_len += (iov[i].MPID_IOV_LEN); + } + + mpi_errno = + MPIDI_CH3I_MRAILI_Eager_send(vc, iov, n_iov, + total_len, &nb, &buf); + } + DEBUG_PRINT("[istartmsgv] mpierr %d, nb %d\n", mpi_errno, nb); From santhana at mvapich.cse.ohio-state.edu Wed Aug 29 16:01:08 2007 From: santhana at mvapich.cse.ohio-state.edu (santhana@mvapich.cse.ohio-state.edu) Date: Wed Aug 29 16:01:33 2007 Subject: [mvapich-commit] r1480 - in mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src: gen2 udapl vapi Message-ID: <200708292001.l7TK18QV011984@mvapich.cse.ohio-state.edu> Author: santhana Date: 2007-08-29 16:01:06 -0400 (Wed, 29 Aug 2007) New Revision: 1480 Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_param.h mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/udapl/udapl_param.h mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/vapi/vapi_param.h Log: increasing the number of dreg entries to 1100 from 1000 Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_param.h =================================================================== --- mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_param.h 2007-08-28 21:22:29 UTC (rev 1479) +++ mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_param.h 2007-08-29 20:01:06 UTC (rev 1480) @@ -124,7 +124,7 @@ MAX_NUM_PORTS* \ MAX_NUM_QP_PER_PORT) -#define RDMA_NDREG_ENTRIES (1000) +#define RDMA_NDREG_ENTRIES (1100) #define RDMA_VBUF_POOL_SIZE (512) #define RDMA_VBUF_SECONDARY_POOL_SIZE (128) #define RDMA_PREPOST_DEPTH (64) Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/udapl/udapl_param.h =================================================================== --- mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/udapl/udapl_param.h 2007-08-28 21:22:29 UTC (rev 1479) +++ mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/udapl/udapl_param.h 2007-08-29 20:01:06 UTC (rev 1480) @@ -61,7 +61,7 @@ extern long rdma_eagersize_1sc; #endif -#define RDMA_NDREG_ENTRIES (1000) +#define RDMA_NDREG_ENTRIES (1100) #define RDMA_PIN_POOL_SIZE (2*1024*1024) /* for small size message */ #define RDMA_DEFAULT_MAX_CQ_SIZE (6000) #define RDMA_DEFAULT_PORT (1) Modified: mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/vapi/vapi_param.h =================================================================== --- mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/vapi/vapi_param.h 2007-08-28 21:22:29 UTC (rev 1479) +++ mvapich2/trunk/src/mpid/osu_ch3/channels/mrail/src/vapi/vapi_param.h 2007-08-29 20:01:06 UTC (rev 1480) @@ -90,7 +90,7 @@ #define RDMA_INTEGER_POOL_SIZE (1024) #define MAX_NUM_HCAS (1) #define MAX_SUBCHANNELS (1) -#define VAPI_NDREG_ENTRIES (1000) +#define VAPI_NDREG_ENTRIES (1100) #define VAPI_VBUF_POOL_SIZE (5000) #define VAPI_VBUF_SECONDARY_POOL_SIZE (500) #define VAPI_PREPOST_DEPTH (80)