From 064bca74dbb55761260d41e9c065872ba8c148ad Mon Sep 17 00:00:00 2001 From: Soren Rasmussen Date: Wed, 1 May 2019 18:41:46 +0100 Subject: [PATCH] adding lots of good stuff --- base/comm/internals/psi_dswapdata.F90 | 168 +++++++++---------- base/comm/internals/psi_dswapdata_a.F90 | 212 ++++++++++++------------ base/comm/psb_dhalo.f90 | 58 +++---- 3 files changed, 213 insertions(+), 225 deletions(-) diff --git a/base/comm/internals/psi_dswapdata.F90 b/base/comm/internals/psi_dswapdata.F90 index 5bb1dd6e..68da338c 100644 --- a/base/comm/internals/psi_dswapdata.F90 +++ b/base/comm/internals/psi_dswapdata.F90 @@ -1,5 +1,5 @@ ! debug out-of-bound -#define DBG_OOB +! #define DBG_OOB ! debug personal ! #define DBG_PER @@ -113,19 +113,19 @@ subroutine psi_dswapdata_vect(flag,beta,y,desc_a,work,info,data) #endif #include "scorep/SCOREP_User.inc" - integer(psb_ipk_), intent(in) :: flag - integer(psb_ipk_), intent(out) :: info - class(psb_d_base_vect_type) :: y - real(psb_dpk_) :: beta - real(psb_dpk_), target :: work(:) - type(psb_desc_type), target :: desc_a - integer(psb_ipk_), optional :: data + integer(psb_ipk_), intent(in) :: flag + integer(psb_ipk_), intent(out) :: info + class(psb_d_base_vect_type) :: y + real(psb_dpk_) :: beta + real(psb_dpk_), target :: work(:) + type(psb_desc_type), target :: desc_a + integer(psb_ipk_), optional :: data ! locals integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, data_, err_act class(psb_i_base_vect_type), pointer :: d_vidx character(len=20) :: name - logical :: swap_persistent + logical :: swap_persistent, swap_nonpersistent, do_alltoallv info=psb_success_ name='psi_swap_datav' @@ -136,7 +136,9 @@ subroutine psi_dswapdata_vect(flag,beta,y,desc_a,work,info,data) icomm = desc_a%get_mpic() ! TODO: get_mpic should be used to get dist_graph_comm, but for now this works swap_persistent = iand(flag,psb_swap_persistent_) /= 0 - if (swap_persistent) then + swap_nonpersistent = iand(flag,psb_swap_nonpersistent_) /= 0 + do_alltoallv = swap_persistent .or. swap_nonpersistent + if (do_alltoallv) then if (allocated(desc_a%dist_graph_comm)) then ! print *, "desc_a%dist_graph_comm", desc_a%dist_graph_comm icomm = desc_a%dist_graph_comm @@ -231,13 +233,14 @@ subroutine psi_dswap_vidx_vect(iictxt,iicomm,flag,beta,y,idx, & & err_act, i, j, ii, ri, si, idx_pt, totsnd_, totrcv_,& & snd_pt, rcv_pt, pnti, n, ierr logical :: swap_mpi, swap_sync, swap_send, swap_recv,& - & albf,do_send,do_recv, swap_persistent, do_persistent + & albf,do_send,do_recv, & + do_alltoallv, swap_persistent, swap_nonpersistent logical, parameter :: usersend=.false., debug=.false. character(len=20) :: name !remove integer :: status(MPI_STATUS_SIZE) - real :: start_t, finish_t + real(psb_dpk_) :: start_t, finish_t, start_comm_t, end_comm_t logical :: weight real(psb_dpk_) :: tmp ! score-p declaration @@ -260,14 +263,15 @@ subroutine psi_dswap_vidx_vect(iictxt,iicomm,flag,beta,y,idx, & endif n=1 - swap_mpi = iand(flag,psb_swap_mpi_) /= 0 - swap_sync = iand(flag,psb_swap_sync_) /= 0 - swap_send = iand(flag,psb_swap_send_) /= 0 - swap_recv = iand(flag,psb_swap_recv_) /= 0 - swap_persistent = iand(flag,psb_swap_persistent_) /= 0 + swap_mpi = iand(flag,psb_swap_mpi_) /= 0 + swap_sync = iand(flag,psb_swap_sync_) /= 0 + swap_send = iand(flag,psb_swap_send_) /= 0 + swap_recv = iand(flag,psb_swap_recv_) /= 0 + swap_persistent = iand(flag,psb_swap_persistent_) /= 0 + swap_nonpersistent = iand(flag,psb_swap_nonpersistent_) /= 0 do_send = swap_mpi .or. swap_sync .or. swap_send do_recv = swap_mpi .or. swap_sync .or. swap_recv - do_persistent = swap_persistent + do_alltoallv = swap_persistent .or. swap_nonpersistent totrcv_ = totrcv * n totsnd_ = totsnd * n @@ -276,19 +280,17 @@ subroutine psi_dswap_vidx_vect(iictxt,iicomm,flag,beta,y,idx, & ! TODO check do_persistent twice, here and calling psi_dswapdata_vect, ! redudent but needed for now - if (do_persistent) then - ! if not allocated, allocate buffers and create request + if (do_alltoallv) then if (.not. allocated(y%p)) then allocate(y%p) end if if (.not. allocated(y%p%sndbuf)) then + ! allocate time counters + allocate(y%p%alltoall_comm_time, y%p%total_time) + y%p%alltoall_comm_time = 0 + ! allocate buffers allocate(y%p%sndbuf(totsnd), y%p%rcvbuf(totrcv)) allocate(y%p%rcv_count, y%p%snd_count) -#ifdef DBG_OOB ! debug out-of-bound - y%p%sndbuf = 0 - y%p%rcvbuf = me + 10 -#endif - ! get number of neighbors and graph so we know who to communicate with call MPI_Dist_graph_neighbors_count(icomm, y%p%rcv_count, & y%p%snd_count, weight, ierr) ! should weight go into psb_d_persis_vect_type?? @@ -339,49 +341,41 @@ subroutine psi_dswap_vidx_vect(iictxt,iicomm,flag,beta,y,idx, & pnti = pnti + nerv + nesd + 3 end do -#ifdef DBG_PER - print *, me, ":totxch=", totxch,"totsnd", totsnd, "totrcv", totrcv - print *, me, ": rcv_count = ", y%p%rcv_count, "snd_count = ", y%p%snd_count - print *, me, ": y%p%rcv_from =", y%p%rcv_from, "y%p%snd_to =", y%p%snd_to - print *, me, ":y%p%snd_counts", y%p%snd_counts, "y%p%snd_displs = ",y%p%snd_displs, & - "y%p%rcv_counts", y%p%rcv_counts, "y%p%rcv_displs",y%p%rcv_displs -#endif - - allocate(y%p%init_request) - y%p%init_request = -1 - - ! SCOREP_USER_REGION_BEGIN(MPIX_Neighbor_alltoallv_init_region, & - ! "MPIX_Neighbor_alltoallv_init", SCOREP_USER_REGION_TYPE_FUNCTION) - ! SCOREP_USER_REGION_BEGIN(artless_test, & - ! "foo", SCOREP_USER_REGION_TYPE_COMMON) - call MPI_Barrier(MPI_COMM_WORLD,ierr) - call cpu_time(start_t) - call MPIX_Neighbor_alltoallv_init(y%p%sndbuf, y%p%snd_counts, y%p%snd_displs, & - MPI_DOUBLE_PRECISION, y%p%rcvbuf, y%p%rcv_counts, y%p%rcv_displs, & - MPI_DOUBLE_PRECISION, icomm, MPI_INFO_NULL, & - y%p%init_request, ierr) - ! SCOREP_USER_REGION_END(artless_test) - ! SCOREP_USER_REGION_END(MPIX_Neighbor_alltoallv_init_region) - call cpu_time(finish_t) - allocate(y%p%comm_create_time) - y%p%comm_create_time = finish_t - start_t - ! print *,"cpu time of MPIX_Neighbor_alltoallv_init is", finish_t - start_t - - if (ierr .ne. 0) then - print *, "ERROR: MPIX_Neighbor_alltoallvinit ierr = ", ierr - goto 9999 +! #ifdef DBG_PER + ! print *, me, ": totxch", totxch,"total to send", totsnd, "total to recv", totrcv + ! print *, me, ": rcv_count = ", y%p%rcv_count, "snd_count = ", y%p%snd_count + ! print *, me, ":y%p%snd_displs = ",y%p%snd_displs, & + ! "y%p%rcv_displs",y%p%rcv_displs +! #endif + + ! ---------------------------- + ! print *, me, ": totsnd=",totsnd ,"y%p%snd_to =", y%p%snd_to, & + ! "y%p%snd_counts = ", y%p%snd_counts + ! print *, me, ": totrcv=",totrcv,"y%p%rcv_from =", y%p%rcv_from, & + ! "y%p%rcv_counts = ", y%p%rcv_counts + ! ---------------------------- + + + ! ---------------------------- + ! Persistant Request Creation + ! ---------------------------- + if (swap_persistent) then + allocate(y%p%init_request) + y%p%init_request = -1 + start_t = MPI_Wtime() + call MPIX_Neighbor_alltoallv_init(y%p%sndbuf, y%p%snd_counts, y%p%snd_displs, & + MPI_DOUBLE_PRECISION, y%p%rcvbuf, y%p%rcv_counts, y%p%rcv_displs, & + MPI_DOUBLE_PRECISION, icomm, MPI_INFO_NULL, & + y%p%init_request, ierr) + finish_t = MPI_Wtime() + allocate(y%p%request_create_time) + y%p%request_create_time = finish_t - start_t end if - - ! ARTLESS: SENDBUF LOOKS OK HERE! - ! call MPI_Barrier(MPI_COMM_WORLD, ierr) - ! write(*,'(I2 A)', advance='no') me, ": sndbuf = " - ! print '(104 F4.0)', y%p%sndbuf - ! call MPI_Barrier(MPI_COMM_WORLD, ierr) - -#ifdef DBG_PER - print* ,me, ": end MPIX_Neighbor_alltoallv_init" -#endif - + ! ---------------------------- + ! if (ierr .ne. 0) then + ! print *, "ERROR: MPIX_Neighbor_alltoallvinit ierr = ", ierr + ! goto 9999 + ! end if else ! send and recv buffers exist, need to pack send buffer si = 1 ! sndbuf index pnti = 1 @@ -405,38 +399,24 @@ subroutine psi_dswap_vidx_vect(iictxt,iicomm,flag,beta,y,idx, & end do end if ! end packing phase - ! --- start communication -#ifdef DBG_PER - if (.not. allocated(y%p%init_request)) then - print *, "error: y%p%init_request should be allocated" - goto 9999 + ! ---- start communication ---- + start_comm_t = MPI_Wtime() + ! ---- if persistant ---- + if (swap_persistent) then + call MPI_Start(y%p%init_request, ierr) + call MPI_Wait(y%p%init_request, status, ierr) end if -#endif - call MPI_Start(y%p%init_request, ierr) -#ifdef DBG_PER - if (ierr .ne. 0) then - print *, "ERROR: rank ",me,"has MPI_Start status(MPI_ERROR) = ", & - status(MPI_ERROR), "and ierr = ", ierr - goto 9999 - end if -#endif - - call MPI_Wait(y%p%init_request, status, ierr) -#ifdef DBG_PER - if (ierr .ne. 0) then - print *, "ERROR: rank ",me,"has MPI_Wait status(MPI_ERROR) = ", & - status(MPI_ERROR), "and ierr = ", ierr - goto 9999 + ! ---- if non-persistent ---- + if (swap_nonpersistent) then + call MPI_Neighbor_alltoallv(y%p%sndbuf, y%p%snd_counts, y%p%snd_displs, & + MPI_DOUBLE_PRECISION, y%p%rcvbuf, y%p%rcv_counts, y%p%rcv_displs, & + MPI_DOUBLE_PRECISION, icomm, ierr) end if -#endif + end_comm_t = MPI_Wtime() - start_comm_t + y%p%alltoall_comm_time = y%p%alltoall_comm_time + end_comm_t - ! write(*,'(I2 A)', advance='no') me, "::::: rcvbuf =" - ! print '(104 F4.0)', y%p%rcvbuf ! ----scatter/unpack buffer---- -#ifdef DBG_PER - print*, me, ": begin scatter/unpack buffer" -#endif pnti = 1 rcv_pt = 1 do i=1, totxch diff --git a/base/comm/internals/psi_dswapdata_a.F90 b/base/comm/internals/psi_dswapdata_a.F90 index ec330ef7..f5d174ab 100644 --- a/base/comm/internals/psi_dswapdata_a.F90 +++ b/base/comm/internals/psi_dswapdata_a.F90 @@ -1,9 +1,9 @@ -! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -15,7 +15,7 @@ ! 3. The name of the PSBLAS group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -27,58 +27,58 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! -! +! +! ! ! File: psi_dswapdata.F90 ! ! Subroutine: psi_dswapdatam -! Does the data exchange among processes. Essentially this is doing -! a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but -! it is capable of pruning empty exchanges, which are very likely in out -! application environment. All the variants have the same structure +! Does the data exchange among processes. Essentially this is doing +! a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but +! it is capable of pruning empty exchanges, which are very likely in out +! application environment. All the variants have the same structure ! In all these subroutines X may be: I Integer ! S real(psb_spk_) ! D real(psb_dpk_) ! C complex(psb_spk_) ! Z complex(psb_dpk_) -! Basically the operation is as follows: on each process, we identify +! Basically the operation is as follows: on each process, we identify ! sections SND(Y) and RCV(Y); then we do a send on (PACK(SND(Y))); -! then we receive, and we do an update with Y = UNPACK(RCV(Y)) + BETA * Y -! but only on the elements involved in the UNPACK operation. -! Thus: for halo data exchange, the receive section is confined in the -! halo indices, and BETA=0, whereas for overlap exchange the receive section +! then we receive, and we do an update with Y = UNPACK(RCV(Y)) + BETA * Y +! but only on the elements involved in the UNPACK operation. +! Thus: for halo data exchange, the receive section is confined in the +! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. -! -! Arguments: -! flag - integer Choose the algorithm for data exchange: -! this is chosen through bit fields. +! +! Arguments: +! flag - integer Choose the algorithm for data exchange: +! this is chosen through bit fields. ! swap_mpi = iand(flag,psb_swap_mpi_) /= 0 ! swap_sync = iand(flag,psb_swap_sync_) /= 0 ! swap_send = iand(flag,psb_swap_send_) /= 0 ! swap_recv = iand(flag,psb_swap_recv_) /= 0 ! if (swap_mpi): use underlying MPI_ALLTOALLV. -! if (swap_sync): use PSB_SND and PSB_RCV in +! if (swap_sync): use PSB_SND and PSB_RCV in ! synchronized pairs -! if (swap_send .and. swap_recv): use mpi_irecv +! if (swap_send .and. swap_recv): use mpi_irecv ! and mpi_send -! if (swap_send): use psb_snd (but need another +! if (swap_send): use psb_snd (but need another ! call with swap_recv to complete) -! if (swap_recv): use psb_rcv (completing a +! if (swap_recv): use psb_rcv (completing a ! previous call with swap_send) ! ! -! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:,:) - X The data area -! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! n - integer Number of columns in Y +! beta - X Choose overwrite or sum. +! y(:,:) - X The data area +! desc_a - type(psb_desc_type). The communication descriptor. +! work(:) - X Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data ! default psb_comm_halo_ ! psb_comm_halo_ use halo_index -! psb_comm_ext_ use ext_index +! psb_comm_ext_ use ext_index ! psb_comm_ovrl_ use ovrl_index ! psb_comm_mov_ use ovr_mst_idx ! @@ -108,6 +108,7 @@ subroutine psi_dswapdatam(flag,n,beta,y,desc_a,work,info,data) integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, data_, err_act integer(psb_ipk_), pointer :: d_idx(:) character(len=20) :: name + print *, "psi_dswapdata_a.F90:psi_dswapdatam" info=psb_success_ name='psi_swap_data' @@ -115,14 +116,14 @@ subroutine psi_dswapdatam(flag,n,beta,y,desc_a,work,info,data) ictxt = desc_a%get_context() icomm = desc_a%get_mpic() - call psb_info(ictxt,me,np) + call psb_info(ictxt,me,np) if (np == -1) then info=psb_err_context_error_ call psb_errpush(info,name) goto 9999 endif - if (.not.psb_is_asb_desc(desc_a)) then + if (.not.psb_is_asb_desc(desc_a)) then info=psb_err_invalid_cd_state_ call psb_errpush(info,name) goto 9999 @@ -134,8 +135,8 @@ subroutine psi_dswapdatam(flag,n,beta,y,desc_a,work,info,data) data_ = psb_comm_halo_ end if - call desc_a%get_list(data_,d_idx,totxch,idxr,idxs,info) - if (info /= psb_success_) then + call desc_a%get_list(data_,d_idx,totxch,idxr,idxs,info) + if (info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if @@ -189,6 +190,7 @@ subroutine psi_dswapidxm(iictxt,iicomm,flag,n,beta,y,idx, & volatile :: sndbuf, rcvbuf #endif character(len=20) :: name + print *, "psi_dswapdata_a.F90:psi_dswapidxm" info=psb_success_ name='psi_swap_data' @@ -196,7 +198,7 @@ subroutine psi_dswapidxm(iictxt,iicomm,flag,n,beta,y,idx, & ictxt = iictxt icomm = iicomm - call psb_info(ictxt,me,np) + call psb_info(ictxt,me,np) if (np == -1) then info=psb_err_context_error_ call psb_errpush(info,name) @@ -214,18 +216,18 @@ subroutine psi_dswapidxm(iictxt,iicomm,flag,n,beta,y,idx, & totrcv_ = totrcv * n totsnd_ = totsnd * n - if (swap_mpi) then + if (swap_mpi) then allocate(sdsz(0:np-1), rvsz(0:np-1), bsdidx(0:np-1),& & brvidx(0:np-1), rvhd(0:np-1), sdhd(0:np-1), prcid(0:np-1),& - & stat=info) + & stat=info) if(info /= psb_success_) then call psb_errpush(psb_err_alloc_dealloc_,name) goto 9999 end if rvhd(:) = mpi_request_null - sdsz(:) = 0 - rvsz(:) = 0 + sdsz(:) = 0 + rvsz(:) = 0 ! prepare info for communications @@ -250,7 +252,7 @@ subroutine psi_dswapidxm(iictxt,iicomm,flag,n,beta,y,idx, & end do else - allocate(rvhd(totxch),prcid(totxch),stat=info) + allocate(rvhd(totxch),prcid(totxch),stat=info) if(info /= psb_success_) then call psb_errpush(psb_err_alloc_dealloc_,name) goto 9999 @@ -324,8 +326,8 @@ subroutine psi_dswapidxm(iictxt,iicomm,flag,n,beta,y,idx, & & rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm) if (nesd>0) call psb_snd(ictxt,& & sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm) - else if (proc_to_comm == me) then - if (nesd /= nerv) then + else if (proc_to_comm == me) then + if (nesd /= nerv) then write(psb_err_unit,*) & & 'Fatal error in swapdata: mismatch on self send',& & nerv,nesd @@ -349,8 +351,8 @@ subroutine psi_dswapidxm(iictxt,iicomm,flag,n,beta,y,idx, & proc_to_comm = idx(pnti+psb_proc_id_) nerv = idx(pnti+psb_n_elem_recv_) nesd = idx(pnti+nerv+psb_n_elem_send_) - call psb_get_rank(prcid(i),ictxt,proc_to_comm) - if ((nerv>0).and.(proc_to_comm /= me)) then + call psb_get_rank(prcid(i),ictxt,proc_to_comm) + if ((nerv>0).and.(proc_to_comm /= me)) then p2ptag = psb_double_swap_tag call mpi_irecv(rcvbuf(rcv_pt),n*nerv,& & psb_mpi_r_dpk_,prcid(i),& @@ -374,8 +376,8 @@ subroutine psi_dswapidxm(iictxt,iicomm,flag,n,beta,y,idx, & nesd = idx(pnti+nerv+psb_n_elem_send_) p2ptag = psb_double_swap_tag - if ((nesd>0).and.(proc_to_comm /= me)) then - if (usersend) then + if ((nesd>0).and.(proc_to_comm /= me)) then + if (usersend) then call mpi_rsend(sndbuf(snd_pt),n*nesd,& & psb_mpi_r_dpk_,prcid(i),& & p2ptag,icomm,iret) @@ -405,7 +407,7 @@ subroutine psi_dswapidxm(iictxt,iicomm,flag,n,beta,y,idx, & nesd = idx(pnti+nerv+psb_n_elem_send_) p2ptag = psb_double_swap_tag - + if ((proc_to_comm /= me).and.(nerv>0)) then call mpi_wait(rvhd(i),p2pstat,iret) if(iret /= mpi_success) then @@ -413,8 +415,8 @@ subroutine psi_dswapidxm(iictxt,iicomm,flag,n,beta,y,idx, & call psb_errpush(info,name,m_err=(/iret/)) goto 9999 end if - else if (proc_to_comm == me) then - if (nesd /= nerv) then + else if (proc_to_comm == me) then + if (nesd /= nerv) then write(psb_err_unit,*)& & 'Fatal error in swapdata: mismatch on self send', & & nerv,nesd @@ -460,7 +462,7 @@ subroutine psi_dswapidxm(iictxt,iicomm,flag,n,beta,y,idx, & end if - if (do_recv) then + if (do_recv) then pnti = 1 snd_pt = 1 @@ -471,7 +473,7 @@ subroutine psi_dswapidxm(iictxt,iicomm,flag,n,beta,y,idx, & nesd = idx(pnti+nerv+psb_n_elem_send_) idx_pt = 1+pnti+psb_n_elem_recv_ call psi_sct(nerv,n,idx(idx_pt:idx_pt+nerv-1),& - & rcvbuf(rcv_pt:rcv_pt+n*nerv-1),beta,y) + & rcvbuf(rcv_pt:rcv_pt+n*nerv-1),beta,y) rcv_pt = rcv_pt + n*nerv snd_pt = snd_pt + n*nesd pnti = pnti + nerv + nesd + 3 @@ -480,7 +482,7 @@ subroutine psi_dswapidxm(iictxt,iicomm,flag,n,beta,y,idx, & end if - if (swap_mpi) then + if (swap_mpi) then deallocate(sdsz,rvsz,bsdidx,brvidx,rvhd,prcid,sdhd,& & stat=info) else @@ -507,52 +509,52 @@ end subroutine psi_dswapidxm ! ! ! Subroutine: psi_dswapdatav -! Does the data exchange among processes. Essentially this is doing -! a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but -! it is capable of pruning empty exchanges, which are very likely in out -! application environment. All the variants have the same structure +! Does the data exchange among processes. Essentially this is doing +! a variable all-to-all data exchange (ALLTOALLV in MPI parlance), but +! it is capable of pruning empty exchanges, which are very likely in out +! application environment. All the variants have the same structure ! In all these subroutines X may be: I Integer ! S real(psb_spk_) ! D real(psb_dpk_) ! C complex(psb_spk_) ! Z complex(psb_dpk_) -! Basically the operation is as follows: on each process, we identify +! Basically the operation is as follows: on each process, we identify ! sections SND(Y) and RCV(Y); then we do a SEND(PACK(SND(Y))); -! then we receive, and we do an update with Y = UNPACK(RCV(Y)) + BETA * Y -! but only on the elements involved in the UNPACK operation. -! Thus: for halo data exchange, the receive section is confined in the -! halo indices, and BETA=0, whereas for overlap exchange the receive section +! then we receive, and we do an update with Y = UNPACK(RCV(Y)) + BETA * Y +! but only on the elements involved in the UNPACK operation. +! Thus: for halo data exchange, the receive section is confined in the +! halo indices, and BETA=0, whereas for overlap exchange the receive section ! is scattered in the owned indices, and BETA=1. -! -! Arguments: -! flag - integer Choose the algorithm for data exchange: -! this is chosen through bit fields. +! +! Arguments: +! flag - integer Choose the algorithm for data exchange: +! this is chosen through bit fields. ! swap_mpi = iand(flag,psb_swap_mpi_) /= 0 ! swap_sync = iand(flag,psb_swap_sync_) /= 0 ! swap_send = iand(flag,psb_swap_send_) /= 0 ! swap_recv = iand(flag,psb_swap_recv_) /= 0 ! if (swap_mpi): use underlying MPI_ALLTOALLV. -! if (swap_sync): use PSB_SND and PSB_RCV in +! if (swap_sync): use PSB_SND and PSB_RCV in ! synchronized pairs -! if (swap_send .and. swap_recv): use mpi_irecv +! if (swap_send .and. swap_recv): use mpi_irecv ! and mpi_send -! if (swap_send): use psb_snd (but need another +! if (swap_send): use psb_snd (but need another ! call with swap_recv to complete) -! if (swap_recv): use psb_rcv (completing a +! if (swap_recv): use psb_rcv (completing a ! previous call with swap_send) ! ! -! n - integer Number of columns in Y -! beta - X Choose overwrite or sum. -! y(:) - X The data area -! desc_a - type(psb_desc_type). The communication descriptor. -! work(:) - X Buffer space. If not sufficient, will do +! n - integer Number of columns in Y +! beta - X Choose overwrite or sum. +! y(:) - X The data area +! desc_a - type(psb_desc_type). The communication descriptor. +! work(:) - X Buffer space. If not sufficient, will do ! our own internal allocation. ! info - integer. return code. ! data - integer which list is to be used to exchange data ! default psb_comm_halo_ ! psb_comm_halo_ use halo_index -! psb_comm_ext_ use ext_index +! psb_comm_ext_ use ext_index ! psb_comm_ovrl_ use ovrl_index ! psb_comm_mov_ use ovr_mst_idx ! @@ -582,6 +584,7 @@ subroutine psi_dswapdatav(flag,beta,y,desc_a,work,info,data) integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, data_, err_act integer(psb_ipk_), pointer :: d_idx(:) character(len=20) :: name + print *, "psi_dswapdata_a.F90:psi_dswapdatav" info=psb_success_ name='psi_swap_datav' @@ -589,14 +592,14 @@ subroutine psi_dswapdatav(flag,beta,y,desc_a,work,info,data) ictxt = desc_a%get_context() icomm = desc_a%get_mpic() - call psb_info(ictxt,me,np) + call psb_info(ictxt,me,np) if (np == -1) then info=psb_err_context_error_ call psb_errpush(info,name) goto 9999 endif - if (.not.psb_is_asb_desc(desc_a)) then + if (.not.psb_is_asb_desc(desc_a)) then info=psb_err_invalid_cd_state_ call psb_errpush(info,name) goto 9999 @@ -608,8 +611,8 @@ subroutine psi_dswapdatav(flag,beta,y,desc_a,work,info,data) data_ = psb_comm_halo_ end if - call desc_a%get_list(data_,d_idx,totxch,idxr,idxs,info) - if (info /= psb_success_) then + call desc_a%get_list(data_,d_idx,totxch,idxr,idxs,info) + if (info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if @@ -629,13 +632,13 @@ end subroutine psi_dswapdatav ! ! ! Subroutine: psi_dswapdataidxv -! Does the data exchange among processes. -! +! Does the data exchange among processes. +! ! The real workhorse: the outer routines will only choose the index list -! this one takes the index list and does the actual exchange. -! -! -! +! this one takes the index list and does the actual exchange. +! +! +! subroutine psi_dswapidxv(iictxt,iicomm,flag,beta,y,idx, & & totxch,totsnd,totrcv,work,info) @@ -674,6 +677,7 @@ subroutine psi_dswapidxv(iictxt,iicomm,flag,beta,y,idx, & volatile :: sndbuf, rcvbuf #endif character(len=20) :: name + print *, "psi_dswapdata_a.F90:psi_dswapidxv" info=psb_success_ name='psi_swap_datav' @@ -681,7 +685,7 @@ subroutine psi_dswapidxv(iictxt,iicomm,flag,beta,y,idx, & ictxt = iictxt icomm = iicomm - call psb_info(ictxt,me,np) + call psb_info(ictxt,me,np) if (np == -1) then info=psb_err_context_error_ call psb_errpush(info,name) @@ -699,26 +703,27 @@ subroutine psi_dswapidxv(iictxt,iicomm,flag,beta,y,idx, & totrcv_ = totrcv * n totsnd_ = totsnd * n - if (swap_mpi) then + if (swap_mpi) then allocate(sdsz(0:np-1), rvsz(0:np-1), bsdidx(0:np-1),& & brvidx(0:np-1), rvhd(0:np-1), sdhd(0:np-1), prcid(0:np-1),& - & stat=info) + & stat=info) if(info /= psb_success_) then call psb_errpush(psb_err_alloc_dealloc_,name) goto 9999 end if rvhd(:) = mpi_request_null - sdsz(:) = 0 - rvsz(:) = 0 + sdsz(:) = 0 + rvsz(:) = 0 ! prepare info for communications pnti = 1 snd_pt = 1 rcv_pt = 1 - do i=1, totxch + do i=1, totxch ! artless: this is what i want to do MPI_V for MPI_COMM_WORLD proc_to_comm = idx(pnti+psb_proc_id_) + nerv = idx(pnti+psb_n_elem_recv_) nesd = idx(pnti+nerv+psb_n_elem_send_) call psb_get_rank(prcid(proc_to_comm),ictxt,proc_to_comm) @@ -732,11 +737,11 @@ subroutine psi_dswapidxv(iictxt,iicomm,flag,beta,y,idx, & rcv_pt = rcv_pt + nerv snd_pt = snd_pt + nesd pnti = pnti + nerv + nesd + 3 - + print *, "---ARTLESS rcv_pt = ", rcv_pt, "snd_pt = ", snd_pt end do else - allocate(rvhd(totxch),prcid(totxch),stat=info) + allocate(rvhd(totxch),prcid(totxch),stat=info) if(info /= psb_success_) then call psb_errpush(psb_err_alloc_dealloc_,name) goto 9999 @@ -771,8 +776,9 @@ subroutine psi_dswapidxv(iictxt,iicomm,flag,beta,y,idx, & idx_pt = 1+pnti+nerv+psb_n_elem_send_ call psi_gth(nesd,idx(idx_pt:idx_pt+nesd-1),& & y,sndbuf(snd_pt:snd_pt+nesd-1)) - snd_pt = snd_pt + nesd + snd_pt = snd_pt + nesd pnti = pnti + nerv + nesd + 3 + end do end if @@ -811,7 +817,7 @@ subroutine psi_dswapidxv(iictxt,iicomm,flag,beta,y,idx, & if (nesd>0) call psb_snd(ictxt,& & sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm) else if (proc_to_comm == me) then - if (nesd /= nerv) then + if (nesd /= nerv) then write(psb_err_unit,*) & & 'Fatal error in swapdata: mismatch on self send', & & nerv,nesd @@ -835,8 +841,8 @@ subroutine psi_dswapidxv(iictxt,iicomm,flag,beta,y,idx, & nerv = idx(pnti+psb_n_elem_recv_) nesd = idx(pnti+nerv+psb_n_elem_send_) - call psb_get_rank(prcid(i),ictxt,proc_to_comm) - if ((nerv>0).and.(proc_to_comm /= me)) then + call psb_get_rank(prcid(i),ictxt,proc_to_comm) + if ((nerv>0).and.(proc_to_comm /= me)) then p2ptag = psb_double_swap_tag call mpi_irecv(rcvbuf(rcv_pt),nerv,& & psb_mpi_r_dpk_,prcid(i),& @@ -861,8 +867,8 @@ subroutine psi_dswapidxv(iictxt,iicomm,flag,beta,y,idx, & p2ptag = psb_double_swap_tag - if ((nesd>0).and.(proc_to_comm /= me)) then - if (usersend) then + if ((nesd>0).and.(proc_to_comm /= me)) then + if (usersend) then call mpi_rsend(sndbuf(snd_pt),nesd,& & psb_mpi_r_dpk_,prcid(i),& & p2ptag,icomm,iret) @@ -898,8 +904,8 @@ subroutine psi_dswapidxv(iictxt,iicomm,flag,beta,y,idx, & call psb_errpush(info,name,m_err=(/iret/)) goto 9999 end if - else if (proc_to_comm == me) then - if (nesd /= nerv) then + else if (proc_to_comm == me) then + if (nesd /= nerv) then write(psb_err_unit,*) & & 'Fatal error in swapdata: mismatch on self send', & & nerv,nesd @@ -944,7 +950,7 @@ subroutine psi_dswapidxv(iictxt,iicomm,flag,beta,y,idx, & end if - if (do_recv) then + if (do_recv) then pnti = 1 snd_pt = 1 @@ -963,7 +969,7 @@ subroutine psi_dswapidxv(iictxt,iicomm,flag,beta,y,idx, & end if - if (swap_mpi) then + if (swap_mpi) then deallocate(sdsz,rvsz,bsdidx,brvidx,rvhd,prcid,sdhd,& & stat=info) else diff --git a/base/comm/psb_dhalo.f90 b/base/comm/psb_dhalo.f90 index 005c59b2..ece95e89 100644 --- a/base/comm/psb_dhalo.f90 +++ b/base/comm/psb_dhalo.f90 @@ -1,9 +1,9 @@ -! +! ! Parallel Sparse BLAS version 3.5 ! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! +! Salvatore Filippone +! Alfredo Buttari +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions ! are met: @@ -15,7 +15,7 @@ ! 3. The name of the PSBLAS group or the names of its contributors may ! not be used to endorse or promote products derived from this ! software without specific written permission. -! +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -27,27 +27,27 @@ ! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. -! -! +! +! ! File: psb_dhalo.f90 ! ! Subroutine: psb_dhalom -! This subroutine performs the exchange of the halo elements in a +! This subroutine performs the exchange of the halo elements in a ! distributed dense matrix between all the processes. ! ! Arguments: ! x - real,dimension(:,:). The local part of the dense matrix. ! desc_a - type(psb_desc_type). The communication descriptor. ! info - integer. Return code -! jx - integer(optional). The starting column of the global matrix. -! ik - integer(optional). The number of columns to gather. +! jx - integer(optional). The starting column of the global matrix. +! ik - integer(optional). The number of columns to gather. ! work - real(optional). Work area. ! tran - character(optional). Transpose exchange. ! mode - integer(optional). Communication mode (see Swapdata) ! data - integer Which index list in desc_a should be used ! to retrieve rows, default psb_comm_halo_ ! psb_comm_halo_ use halo_index -! psb_comm_ext_ use ext_index +! psb_comm_ext_ use ext_index ! psb_comm_ovrl_ use ovrl_index ! psb_comm_mov_ use ovr_mst_idx ! @@ -74,6 +74,9 @@ subroutine psb_dhalo_vect(x,desc_a,info,work,tran,mode,data) character(len=20) :: name, ch_err logical :: aliw + ! print *, "======ARTLESS======" + ! print *, "psb_dhalo.f90:78:psb_dhalo_vect" + name='psb_dhalov' info=psb_success_ call psb_erractionsave(err_act) @@ -83,7 +86,7 @@ subroutine psb_dhalo_vect(x,desc_a,info,work,tran,mode,data) ictxt=desc_a%get_context() - ! check on blacs grid + ! check on blacs grid call psb_info(ictxt, me, np) if (np == -1) then info = psb_err_context_error_ @@ -91,7 +94,7 @@ subroutine psb_dhalo_vect(x,desc_a,info,work,tran,mode,data) goto 9999 endif - if (.not.allocated(x%v)) then + if (.not.allocated(x%v)) then info = psb_err_invalid_vect_state_ call psb_errpush(info,name) goto 9999 @@ -104,17 +107,17 @@ subroutine psb_dhalo_vect(x,desc_a,info,work,tran,mode,data) n = desc_a%get_global_cols() nrow = desc_a%get_local_rows() - if (present(tran)) then + if (present(tran)) then tran_ = psb_toupper(tran) else tran_ = 'N' endif - if (present(data)) then + if (present(data)) then data_ = data else data_ = psb_comm_halo_ endif - if (present(mode)) then + if (present(mode)) then imode = mode else imode = IOR(psb_swap_send_,psb_swap_recv_) @@ -166,14 +169,14 @@ subroutine psb_dhalo_vect(x,desc_a,info,work,tran,mode,data) ! exchange halo elements if(tran_ == 'N') then call psi_swapdata(imode,dzero,x%v,& - & desc_a,iwork,info,data=data_) + & desc_a,iwork,info,data=data_) ! artless: this gets called, desc_a has comm else if((tran_ == 'T').or.(tran_ == 'C')) then call psi_swaptran(imode,done,x%v,& & desc_a,iwork,info) else info = psb_err_internal_error_ call psb_errpush(info,name,a_err='invalid tran') - goto 9999 + goto 9999 end if if (info /= psb_success_) then @@ -186,7 +189,7 @@ subroutine psb_dhalo_vect(x,desc_a,info,work,tran,mode,data) nullify(iwork) call psb_erractionrestore(err_act) - return + return 9999 call psb_error_handler(ione*ictxt,err_act) @@ -214,7 +217,7 @@ subroutine psb_dhalo_multivect(x,desc_a,info,work,tran,mode,data) character :: tran_ character(len=20) :: name, ch_err logical :: aliw - + print *, "psb_dhalo.f90:psb_dhalo_multivect" ! artless name='psb_dhalov' info=psb_success_ call psb_erractionsave(err_act) @@ -224,7 +227,7 @@ subroutine psb_dhalo_multivect(x,desc_a,info,work,tran,mode,data) ictxt=desc_a%get_context() - ! check on blacs grid + ! check on blacs grid call psb_info(ictxt, me, np) if (np == -1) then info = psb_err_context_error_ @@ -232,7 +235,7 @@ subroutine psb_dhalo_multivect(x,desc_a,info,work,tran,mode,data) goto 9999 endif - if (.not.allocated(x%v)) then + if (.not.allocated(x%v)) then info = psb_err_invalid_vect_state_ call psb_errpush(info,name) goto 9999 @@ -245,17 +248,17 @@ subroutine psb_dhalo_multivect(x,desc_a,info,work,tran,mode,data) n = desc_a%get_global_cols() nrow = desc_a%get_local_rows() - if (present(tran)) then + if (present(tran)) then tran_ = psb_toupper(tran) else tran_ = 'N' endif - if (present(data)) then + if (present(data)) then data_ = data else data_ = psb_comm_halo_ endif - if (present(mode)) then + if (present(mode)) then imode = mode else imode = IOR(psb_swap_send_,psb_swap_recv_) @@ -314,7 +317,7 @@ subroutine psb_dhalo_multivect(x,desc_a,info,work,tran,mode,data) else info = psb_err_internal_error_ call psb_errpush(info,name,a_err='invalid tran') - goto 9999 + goto 9999 end if if (info /= psb_success_) then @@ -327,10 +330,9 @@ subroutine psb_dhalo_multivect(x,desc_a,info,work,tran,mode,data) nullify(iwork) call psb_erractionrestore(err_act) - return + return 9999 call psb_error_handler(ione*ictxt,err_act) return end subroutine psb_dhalo_multivect -