diff --git a/src/internals/psi_dswapdata.f90 b/src/internals/psi_dswapdata.f90 index edc6e9f7..6b7e41d6 100644 --- a/src/internals/psi_dswapdata.f90 +++ b/src/internals/psi_dswapdata.f90 @@ -28,41 +28,41 @@ subroutine psi_dswapdatam(flag,n,beta,y,desc_a,work,info,data) character(len=20) :: name, ch_err interface psi_gth - subroutine psi_dgthm(n,k,idx,x,y) - integer :: n, k, idx(:) - real(kind(1.d0)) :: x(:,:), y(:) - end subroutine psi_dgthm - subroutine psi_dgthv(n,idx,x,y) - integer :: n, idx(:) - real(kind(1.d0)) :: x(:), y(:) - end subroutine psi_dgthv - subroutine psi_igthm(n,k,idx,x,y) - integer :: n, k, idx(:) - integer :: x(:,:), y(:) - end subroutine psi_igthm - subroutine psi_igthv(n,idx,x,y) - integer :: n, idx(:) - integer :: x(:), y(:) - end subroutine psi_igthv + subroutine psi_dgthm(n,k,idx,x,y) + integer :: n, k, idx(:) + real(kind(1.d0)) :: x(:,:), y(:) + end subroutine psi_dgthm + subroutine psi_dgthv(n,idx,x,y) + integer :: n, idx(:) + real(kind(1.d0)) :: x(:), y(:) + end subroutine psi_dgthv + subroutine psi_igthm(n,k,idx,x,y) + integer :: n, k, idx(:) + integer :: x(:,:), y(:) + end subroutine psi_igthm + subroutine psi_igthv(n,idx,x,y) + integer :: n, idx(:) + integer :: x(:), y(:) + end subroutine psi_igthv end interface interface psi_sct - subroutine psi_dsctm(n,k,idx,x,beta,y) - integer :: n, k, idx(:) - real(kind(1.d0)) :: beta, x(:), y(:,:) - end subroutine psi_dsctm - subroutine psi_dsctv(n,idx,x,beta,y) - integer :: n, idx(:) - real(kind(1.d0)) :: beta, x(:), y(:) - end subroutine psi_dsctv - subroutine psi_isctm(n,k,idx,x,beta,y) - integer :: n, k, idx(:) - integer :: beta, x(:), y(:,:) - end subroutine psi_isctm - subroutine psi_isctv(n,idx,x,beta,y) - integer :: n, idx(:) - integer :: beta, x(:), y(:) - end subroutine psi_isctv + subroutine psi_dsctm(n,k,idx,x,beta,y) + integer :: n, k, idx(:) + real(kind(1.d0)) :: beta, x(:), y(:,:) + end subroutine psi_dsctm + subroutine psi_dsctv(n,idx,x,beta,y) + integer :: n, idx(:) + real(kind(1.d0)) :: beta, x(:), y(:) + end subroutine psi_dsctv + subroutine psi_isctm(n,k,idx,x,beta,y) + integer :: n, k, idx(:) + integer :: beta, x(:), y(:,:) + end subroutine psi_isctm + subroutine psi_isctv(n,idx,x,beta,y) + integer :: n, idx(:) + integer :: beta, x(:), y(:) + end subroutine psi_isctv end interface info = 0 @@ -72,14 +72,14 @@ subroutine psi_dswapdatam(flag,n,beta,y,desc_a,work,info,data) icontxt=desc_a%matrix_data(psb_ctxt_) call blacs_gridinfo(icontxt,nprow,npcol,myrow,mycol) if (nprow == -1) then - info = 2010 - call psb_errpush(info,name) - goto 9999 + info = 2010 + call psb_errpush(info,name) + goto 9999 else if (npcol /= 1) then - info = 2030 - int_err(1) = npcol - call psb_errpush(info,name) - goto 9999 + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 endif call blacs_get(icontxt,10,icomm) @@ -88,8 +88,8 @@ subroutine psi_dswapdatam(flag,n,beta,y,desc_a,work,info,data) & brvidx(0:nprow-1), rvhd(0:nprow-1), prcid(0:nprow-1),& & ptp(0:nprow-1), stat=info) if(info.ne.0) then - call psb_errpush(4000,name) - goto 9999 + call psb_errpush(4000,name) + goto 9999 end if swap_mpi = iand(flag,psb_swap_mpi_) .ne.0 @@ -98,15 +98,15 @@ subroutine psi_dswapdatam(flag,n,beta,y,desc_a,work,info,data) swap_recv = iand(flag,psb_swap_recv_).ne.0 if(present(data)) then - if(data.eq.psb_comm_halo_) then - d_idx => desc_a%halo_index - else if(data.eq.psb_comm_ovr_) then - d_idx => desc_a%ovrlap_index - else - d_idx => desc_a%halo_index - end if + if(data.eq.psb_comm_halo_) then + d_idx => desc_a%halo_index + else if(data.eq.psb_comm_ovr_) then + d_idx => desc_a%ovrlap_index + else + d_idx => desc_a%halo_index + end if else - d_idx => desc_a%halo_index + d_idx => desc_a%halo_index end if idxs = 1 @@ -118,252 +118,252 @@ subroutine psi_dswapdatam(flag,n,beta,y,desc_a,work,info,data) ! prepare info for communications proc_to_comm = d_idx(point_to_proc+psb_proc_id_) do while (proc_to_comm.ne.-1) - if(proc_to_comm .ne. myrow) totxch = totxch+1 - nerv = d_idx(point_to_proc+psb_n_elem_recv_) - nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) + if(proc_to_comm .ne. myrow) totxch = totxch+1 + nerv = d_idx(point_to_proc+psb_n_elem_recv_) + nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) - prcid(proc_to_comm) = blacs_pnum(icontxt,proc_to_comm,mycol) - ptp(proc_to_comm) = point_to_proc + prcid(proc_to_comm) = blacs_pnum(icontxt,proc_to_comm,mycol) + ptp(proc_to_comm) = point_to_proc - brvidx(proc_to_comm) = idxr - rvsz(proc_to_comm) = n*nerv - idxr = idxr+rvsz(proc_to_comm) + brvidx(proc_to_comm) = idxr + rvsz(proc_to_comm) = n*nerv + idxr = idxr+rvsz(proc_to_comm) - bsdidx(proc_to_comm) = idxs - sdsz(proc_to_comm) = n*nesd - idxs = idxs+sdsz(proc_to_comm) + bsdidx(proc_to_comm) = idxs + sdsz(proc_to_comm) = n*nesd + idxs = idxs+sdsz(proc_to_comm) - point_to_proc = point_to_proc+nerv+nesd+3 - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) end do if((idxr+idxs).lt.size(work)) then - sndbuf => work(1:idxs) - rcvbuf => work(idxs+1:idxs+idxr) + sndbuf => work(1:idxs) + rcvbuf => work(idxs+1:idxs+idxr) else - allocate(sndbuf(idxs),rcvbuf(idxr), stat=info) - if(info.ne.0) then - call psb_errpush(4000,name) - goto 9999 - end if + allocate(sndbuf(idxs),rcvbuf(idxr), stat=info) + if(info.ne.0) then + call psb_errpush(4000,name) + goto 9999 + end if end if ! Case SWAP_MPI if(swap_mpi) then - - ! gather elements into sendbuffer for swapping - point_to_proc = 1 - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - do while (proc_to_comm .ne. -1) - nerv = d_idx(point_to_proc+psb_n_elem_recv_) - nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) - + + ! gather elements into sendbuffer for swapping + point_to_proc = 1 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = d_idx(point_to_proc+psb_n_elem_recv_) + nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + + call psi_gth(nesd,n,d_idx(idx_pt:idx_pt+nesd-1),& + & y,sndbuf(snd_pt:snd_pt+nesd*n-1)) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + end do + + ! swap elements using mpi_alltoallv + call mpi_alltoallv(sndbuf,sdsz,bsdidx,& + & mpi_double_precision,rcvbuf,rvsz,& + & brvidx,mpi_double_precision,icomm,iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + ! scatter elements from receivebuffer after swapping + point_to_proc = 1 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = d_idx(point_to_proc+psb_n_elem_recv_) + nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_sct(nerv,n,d_idx(idx_pt:idx_pt+nerv-1),& + & rcvbuf(rcv_pt:rcv_pt+n*nerv-1),beta,y) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + end do + + else if (swap_sync) then + + point_to_proc = 1 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = d_idx(point_to_proc+psb_n_elem_recv_) + nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) + if (proc_to_comm .lt. myrow) then + ! First I send + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_gth(nesd,n,d_idx(idx_pt:idx_pt+nesd-1),& + & y,sndbuf(snd_pt:snd_pt+nesd*n-1)) + call dgesd2d(icontxt,nesd,n,sndbuf(snd_pt),nesd,proc_to_comm,0) + ! Then I receive + rcv_pt = brvidx(proc_to_comm) + call dgerv2d(icontxt,nerv,n,rcvbuf(rcv_pt),nerv,proc_to_comm,0) + else if (proc_to_comm .gt. myrow) then + ! First I receive + rcv_pt = brvidx(proc_to_comm) + call dgerv2d(icontxt,nerv,n,rcvbuf(rcv_pt),nerv,proc_to_comm,0) + ! Then I send + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_gth(nesd,n,d_idx(idx_pt:idx_pt+nesd-1),& + & y,sndbuf(snd_pt:snd_pt+nesd*n-1)) + call dgesd2d(icontxt,nesd,n,sndbuf(snd_pt),nesd,proc_to_comm,0) + else if (proc_to_comm .eq. myrow) then + ! I send to myself idx_pt = point_to_proc+nerv+psb_elem_send_ snd_pt = bsdidx(proc_to_comm) - call psi_gth(nesd,n,d_idx(idx_pt:idx_pt+nesd-1),& & y,sndbuf(snd_pt:snd_pt+nesd*n-1)) + end if - point_to_proc = point_to_proc+nerv+nesd+3 - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - end do - - ! swap elements using mpi_alltoallv - call mpi_alltoallv(sndbuf,sdsz,bsdidx,& - & mpi_double_precision,rcvbuf,rvsz,& - & brvidx,mpi_double_precision,icomm,iret) - if(iret.ne.mpi_success) then - int_err(1) = iret - info=400 - call psb_errpush(info,name,i_err=int_err) - goto 9999 - end if - - ! scatter elements from receivebuffer after swapping - point_to_proc = 1 - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - do while (proc_to_comm .ne. -1) - nerv = d_idx(point_to_proc+psb_n_elem_recv_) - nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) - + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + end do + + point_to_proc = 1 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = d_idx(point_to_proc+psb_n_elem_recv_) + nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm.ne.myrow) then idx_pt = point_to_proc+psb_elem_recv_ rcv_pt = brvidx(proc_to_comm) call psi_sct(nerv,n,d_idx(idx_pt:idx_pt+nerv-1),& & rcvbuf(rcv_pt:rcv_pt+n*nerv-1),beta,y) + else + idx_pt = point_to_proc+psb_elem_recv_ + snd_pt = bsdidx(proc_to_comm) + call psi_sct(nerv,n,d_idx(idx_pt:idx_pt+nerv-1),& + & sndbuf(snd_pt:snd_pt+n*nesd-1),beta,y) + end if - point_to_proc = point_to_proc+nerv+nesd+3 - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - end do - - else if (swap_sync) then - - point_to_proc = 1 - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - do while (proc_to_comm .ne. -1) - nerv = d_idx(point_to_proc+psb_n_elem_recv_) - nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) - if (proc_to_comm .lt. myrow) then - ! First I send - idx_pt = point_to_proc+nerv+psb_elem_send_ - snd_pt = bsdidx(proc_to_comm) - call psi_gth(nesd,n,d_idx(idx_pt:idx_pt+nesd-1),& - & y,sndbuf(snd_pt:snd_pt+nesd*n-1)) - call dgesd2d(icontxt,nesd,n,sndbuf(snd_pt),nesd,proc_to_comm,0) - ! Then I receive - rcv_pt = brvidx(proc_to_comm) - call dgerv2d(icontxt,nerv,n,rcvbuf(rcv_pt),nerv,proc_to_comm,0) - else if (proc_to_comm .gt. myrow) then - ! First I receive - rcv_pt = brvidx(proc_to_comm) - call dgerv2d(icontxt,nerv,n,rcvbuf(rcv_pt),nerv,proc_to_comm,0) - ! Then I send - idx_pt = point_to_proc+nerv+psb_elem_send_ - snd_pt = bsdidx(proc_to_comm) - call psi_gth(nesd,n,d_idx(idx_pt:idx_pt+nesd-1),& - & y,sndbuf(snd_pt:snd_pt+nesd*n-1)) - call dgesd2d(icontxt,nesd,n,sndbuf(snd_pt),nesd,proc_to_comm,0) - else if (proc_to_comm .eq. myrow) then - ! I send to myself - idx_pt = point_to_proc+nerv+psb_elem_send_ - snd_pt = bsdidx(proc_to_comm) - call psi_gth(nesd,n,d_idx(idx_pt:idx_pt+nesd-1),& - & y,sndbuf(snd_pt:snd_pt+nesd*n-1)) - end if - - point_to_proc = point_to_proc+nerv+nesd+3 - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - end do - - point_to_proc = 1 - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - do while (proc_to_comm .ne. -1) - nerv = d_idx(point_to_proc+psb_n_elem_recv_) - nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) - - if(proc_to_comm.ne.myrow) then - idx_pt = point_to_proc+psb_elem_recv_ - rcv_pt = brvidx(proc_to_comm) - call psi_sct(nerv,n,d_idx(idx_pt:idx_pt+nerv-1),& - & rcvbuf(rcv_pt:rcv_pt+n*nerv-1),beta,y) - else - idx_pt = point_to_proc+psb_elem_recv_ - snd_pt = bsdidx(proc_to_comm) - call psi_sct(nerv,n,d_idx(idx_pt:idx_pt+nerv-1),& - & sndbuf(snd_pt:snd_pt+n*nesd-1),beta,y) - end if - - point_to_proc = point_to_proc+nerv+nesd+3 - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - end do + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + end do else if (swap_send .and. swap_recv) then - ! First I post all the non blocking receives - point_to_proc = 1 - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - do while (proc_to_comm .ne. -1) - nerv = d_idx(point_to_proc+psb_n_elem_recv_) - nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) - - if(proc_to_comm.ne.myrow) then - p2ptag = krecvid(icontxt,proc_to_comm,myrow) - rcv_pt = brvidx(proc_to_comm) - call mpi_irecv(rcvbuf(rcv_pt),rvsz(proc_to_comm),& - & mpi_double_precision,prcid(proc_to_comm),& - & p2ptag, icomm,rvhd(proc_to_comm),iret) - if(iret.ne.mpi_success) then - int_err(1) = iret - info=400 - call psb_errpush(info,name,i_err=int_err) - goto 9999 - end if + ! First I post all the non blocking receives + point_to_proc = 1 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = d_idx(point_to_proc+psb_n_elem_recv_) + nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm.ne.myrow) then + p2ptag = krecvid(icontxt,proc_to_comm,myrow) + rcv_pt = brvidx(proc_to_comm) + call mpi_irecv(rcvbuf(rcv_pt),rvsz(proc_to_comm),& + & mpi_double_precision,prcid(proc_to_comm),& + & p2ptag, icomm,rvhd(proc_to_comm),iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 end if + end if - point_to_proc = point_to_proc+nerv+nesd+3 - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - end do - - ! Then I post all the blocking sends - point_to_proc = 1 - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - do while (proc_to_comm .ne. -1) - nerv = d_idx(point_to_proc+psb_n_elem_recv_) - nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) - - idx_pt = point_to_proc+nerv+psb_elem_send_ - snd_pt = bsdidx(proc_to_comm) - - call psi_gth(nesd,n,d_idx(idx_pt:idx_pt+nesd-1),& - & y,sndbuf(snd_pt:snd_pt+nesd*n-1)) - - if(proc_to_comm .ne. myrow) then - p2ptag=ksendid(icontxt,proc_to_comm,myrow) - call mpi_send(sndbuf(snd_pt),sdsz(proc_to_comm),& - & mpi_double_precision,prcid(proc_to_comm),& - & p2ptag,icomm,iret) - if(iret.ne.mpi_success) then - int_err(1) = iret - info=400 - call psb_errpush(info,name,i_err=int_err) - goto 9999 - end if + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + end do + + ! Then I post all the blocking sends + point_to_proc = 1 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = d_idx(point_to_proc+psb_n_elem_recv_) + nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + + call psi_gth(nesd,n,d_idx(idx_pt:idx_pt+nesd-1),& + & y,sndbuf(snd_pt:snd_pt+nesd*n-1)) + + if(proc_to_comm .ne. myrow) then + p2ptag=ksendid(icontxt,proc_to_comm,myrow) + call mpi_send(sndbuf(snd_pt),sdsz(proc_to_comm),& + & mpi_double_precision,prcid(proc_to_comm),& + & p2ptag,icomm,iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 end if - point_to_proc = point_to_proc+nerv+nesd+3 - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - end do + end if + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + end do - if(.false.) then - do i=1, totxch + if(.false.) then + do i=1, totxch call mpi_waitany(nprow,rvhd,ixrec,p2pstat,iret) if(iret.ne.mpi_success) then - int_err(1) = iret - info=400 - call psb_errpush(info,name,i_err=int_err) - goto 9999 + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 end if if (ixrec .ne. mpi_undefined) then - ixrec=ixrec-1 ! mpi_waitany returns an 1 to nprow index - point_to_proc = ptp(ixrec) - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - nerv = d_idx(point_to_proc+psb_n_elem_recv_) - nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) - - idx_pt = point_to_proc+psb_elem_recv_ - rcv_pt = brvidx(proc_to_comm) - call psi_sct(nerv,n,d_idx(idx_pt:idx_pt+nerv-1),& - & rcvbuf(rcv_pt:rcv_pt+n*nerv-1),beta,y) + ixrec=ixrec-1 ! mpi_waitany returns an 1 to nprow index + point_to_proc = ptp(ixrec) + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + nerv = d_idx(point_to_proc+psb_n_elem_recv_) + nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_sct(nerv,n,d_idx(idx_pt:idx_pt+nerv-1),& + & rcvbuf(rcv_pt:rcv_pt+n*nerv-1),beta,y) else - int_err(1) = ixrec - info=400 - call psb_errpush(info,name,i_err=int_err) - goto 9999 + int_err(1) = ixrec + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 end if - end do - - point_to_proc = 1 - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - do while (proc_to_comm .ne. -1) + end do + + point_to_proc = 1 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) nerv = d_idx(point_to_proc+psb_n_elem_recv_) nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) - + if(proc_to_comm .eq. myrow) then - idx_pt = point_to_proc+psb_elem_recv_ - snd_pt = bsdidx(proc_to_comm) - call psi_sct(nerv,n,d_idx(idx_pt:idx_pt+nerv-1),& - & sndbuf(snd_pt:snd_pt+n*nesd-1),beta,y) + idx_pt = point_to_proc+psb_elem_recv_ + snd_pt = bsdidx(proc_to_comm) + call psi_sct(nerv,n,d_idx(idx_pt:idx_pt+nerv-1),& + & sndbuf(snd_pt:snd_pt+n*nesd-1),beta,y) end if point_to_proc = point_to_proc+nerv+nesd+3 proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - end do - else + end do + else point_to_proc = 1 proc_to_comm = d_idx(point_to_proc+psb_proc_id_) do while (proc_to_comm .ne. -1) nerv = d_idx(point_to_proc+psb_n_elem_recv_) nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) - + if(proc_to_comm.ne.myrow) then call mpi_wait(rvhd(proc_to_comm),p2pstat,iret) if(iret.ne.mpi_success) then @@ -383,57 +383,57 @@ subroutine psi_dswapdatam(flag,n,beta,y,desc_a,work,info,data) & sndbuf(snd_pt:snd_pt+n*nesd-1),beta,y) end if - + point_to_proc = point_to_proc+nerv+nesd+3 proc_to_comm = d_idx(point_to_proc+psb_proc_id_) end do - end if + end if else if (swap_send) then - point_to_proc = 1 - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - do while (proc_to_comm .ne. -1) - nerv = d_idx(point_to_proc+psb_n_elem_recv_) - nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) + point_to_proc = 1 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = d_idx(point_to_proc+psb_n_elem_recv_) + nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) - idx_pt = point_to_proc+nerv+psb_elem_send_ - snd_pt = bsdidx(proc_to_comm) - call psi_gth(nesd,n,d_idx(idx_pt:idx_pt+nesd-1),& - & y,sndbuf(snd_pt:snd_pt+nesd*n-1)) - call dgesd2d(icontxt,nesd,n,sndbuf(snd_pt),nesd,proc_to_comm,0) - - point_to_proc = point_to_proc+nerv+nesd+3 - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - end do + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_gth(nesd,n,d_idx(idx_pt:idx_pt+nesd-1),& + & y,sndbuf(snd_pt:snd_pt+nesd*n-1)) + call dgesd2d(icontxt,nesd,n,sndbuf(snd_pt),nesd,proc_to_comm,0) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + end do else if (swap_recv) then - point_to_proc = 1 - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - do while (proc_to_comm .ne. -1) - nerv = d_idx(point_to_proc+psb_n_elem_recv_) - nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) + point_to_proc = 1 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = d_idx(point_to_proc+psb_n_elem_recv_) + nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) - if(proc_to_comm.ne.myrow) then - rcv_pt = brvidx(proc_to_comm) - call dgerv2d(icontxt,nerv,n,rcvbuf(rcv_pt),nerv,proc_to_comm,0) - idx_pt = point_to_proc+psb_elem_recv_ - rcv_pt = brvidx(proc_to_comm) - call psi_sct(nerv,n,d_idx(idx_pt:idx_pt+nerv-1),& - & rcvbuf(rcv_pt:rcv_pt+n*nerv-1),beta,y) - else - idx_pt = point_to_proc+psb_elem_recv_ - snd_pt = bsdidx(proc_to_comm) - call psi_sct(nerv,n,d_idx(idx_pt:idx_pt+nerv-1),& - & sndbuf(snd_pt:snd_pt+n*nesd-1),beta,y) - end if + if(proc_to_comm.ne.myrow) then + rcv_pt = brvidx(proc_to_comm) + call dgerv2d(icontxt,nerv,n,rcvbuf(rcv_pt),nerv,proc_to_comm,0) + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_sct(nerv,n,d_idx(idx_pt:idx_pt+nerv-1),& + & rcvbuf(rcv_pt:rcv_pt+n*nerv-1),beta,y) + else + idx_pt = point_to_proc+psb_elem_recv_ + snd_pt = bsdidx(proc_to_comm) + call psi_sct(nerv,n,d_idx(idx_pt:idx_pt+nerv-1),& + & sndbuf(snd_pt:snd_pt+n*nesd-1),beta,y) + end if - point_to_proc = point_to_proc+nerv+nesd+3 - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - end do + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + end do end if @@ -443,8 +443,8 @@ subroutine psi_dswapdatam(flag,n,beta,y,desc_a,work,info,data) 9999 continue call psb_erractionrestore(err_act) if (err_act.eq.act_abort) then - call psb_error(icontxt) - return + call psb_error(icontxt) + return end if return end subroutine psi_dswapdatam @@ -481,41 +481,41 @@ subroutine psi_dswapdatav(flag,beta,y,desc_a,work,info,data) character(len=20) :: name, ch_err interface psi_gth - subroutine psi_dgthm(n,k,idx,x,y) - integer :: n, k, idx(:) - real(kind(1.d0)) :: x(:,:), y(:) - end subroutine psi_dgthm - subroutine psi_dgthv(n,idx,x,y) - integer :: n, idx(:) - real(kind(1.d0)) :: x(:), y(:) - end subroutine psi_dgthv - subroutine psi_igthm(n,k,idx,x,y) - integer :: n, k, idx(:) - integer :: x(:,:), y(:) - end subroutine psi_igthm - subroutine psi_igthv(n,idx,x,y) - integer :: n, idx(:) - integer :: x(:), y(:) - end subroutine psi_igthv + subroutine psi_dgthm(n,k,idx,x,y) + integer :: n, k, idx(:) + real(kind(1.d0)) :: x(:,:), y(:) + end subroutine psi_dgthm + subroutine psi_dgthv(n,idx,x,y) + integer :: n, idx(:) + real(kind(1.d0)) :: x(:), y(:) + end subroutine psi_dgthv + subroutine psi_igthm(n,k,idx,x,y) + integer :: n, k, idx(:) + integer :: x(:,:), y(:) + end subroutine psi_igthm + subroutine psi_igthv(n,idx,x,y) + integer :: n, idx(:) + integer :: x(:), y(:) + end subroutine psi_igthv end interface interface psi_sct - subroutine psi_dsctm(n,k,idx,x,beta,y) - integer :: n, k, idx(:) - real(kind(1.d0)) :: beta, x(:), y(:,:) - end subroutine psi_dsctm - subroutine psi_dsctv(n,idx,x,beta,y) - integer :: n, idx(:) - real(kind(1.d0)) :: beta, x(:), y(:) - end subroutine psi_dsctv - subroutine psi_isctm(n,k,idx,x,beta,y) - integer :: n, k, idx(:) - integer :: beta, x(:), y(:,:) - end subroutine psi_isctm - subroutine psi_isctv(n,idx,x,beta,y) - integer :: n, idx(:) - integer :: beta, x(:), y(:) - end subroutine psi_isctv + subroutine psi_dsctm(n,k,idx,x,beta,y) + integer :: n, k, idx(:) + real(kind(1.d0)) :: beta, x(:), y(:,:) + end subroutine psi_dsctm + subroutine psi_dsctv(n,idx,x,beta,y) + integer :: n, idx(:) + real(kind(1.d0)) :: beta, x(:), y(:) + end subroutine psi_dsctv + subroutine psi_isctm(n,k,idx,x,beta,y) + integer :: n, k, idx(:) + integer :: beta, x(:), y(:,:) + end subroutine psi_isctm + subroutine psi_isctv(n,idx,x,beta,y) + integer :: n, idx(:) + integer :: beta, x(:), y(:) + end subroutine psi_isctv end interface info = 0 @@ -525,14 +525,14 @@ subroutine psi_dswapdatav(flag,beta,y,desc_a,work,info,data) icontxt=desc_a%matrix_data(psb_ctxt_) call blacs_gridinfo(icontxt,nprow,npcol,myrow,mycol) if (nprow == -1) then - info = 2010 - call psb_errpush(info,name) - goto 9999 + info = 2010 + call psb_errpush(info,name) + goto 9999 else if (npcol /= 1) then - info = 2030 - int_err(1) = npcol - call psb_errpush(info,name) - goto 9999 + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 endif call blacs_get(icontxt,10,icomm) @@ -542,8 +542,8 @@ subroutine psi_dswapdatav(flag,beta,y,desc_a,work,info,data) & brvidx(0:nprow-1), rvhd(0:nprow-1), prcid(0:nprow-1),& & ptp(0:nprow-1), stat=info) if(info.ne.0) then - call psb_errpush(4000,name) - goto 9999 + call psb_errpush(4000,name) + goto 9999 end if swap_mpi = iand(flag,psb_swap_mpi_).ne.0 @@ -552,15 +552,15 @@ subroutine psi_dswapdatav(flag,beta,y,desc_a,work,info,data) swap_recv = iand(flag,psb_swap_recv_).ne.0 if(present(data)) then - if(data.eq.psb_comm_halo_) then - d_idx => desc_a%halo_index - else if(data.eq.psb_comm_ovr_) then - d_idx => desc_a%ovrlap_index - else - d_idx => desc_a%halo_index - end if + if(data.eq.psb_comm_halo_) then + d_idx => desc_a%halo_index + else if(data.eq.psb_comm_ovr_) then + d_idx => desc_a%ovrlap_index + else + d_idx => desc_a%halo_index + end if else - d_idx => desc_a%halo_index + d_idx => desc_a%halo_index end if idxs = 1 @@ -573,285 +573,321 @@ subroutine psi_dswapdatav(flag,beta,y,desc_a,work,info,data) ! prepare info for communications proc_to_comm = d_idx(point_to_proc+psb_proc_id_) do while (proc_to_comm.ne.-1) - if(proc_to_comm .ne. myrow) totxch = totxch+1 - nerv = d_idx(point_to_proc+psb_n_elem_recv_) - nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) + if(proc_to_comm .ne. myrow) totxch = totxch+1 + nerv = d_idx(point_to_proc+psb_n_elem_recv_) + nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) - prcid(proc_to_comm) = blacs_pnum(icontxt,proc_to_comm,mycol) - ptp(proc_to_comm) = point_to_proc + prcid(proc_to_comm) = blacs_pnum(icontxt,proc_to_comm,mycol) + ptp(proc_to_comm) = point_to_proc - brvidx(proc_to_comm) = idxr - rvsz(proc_to_comm) = n*nerv - idxr = idxr+rvsz(proc_to_comm) + brvidx(proc_to_comm) = idxr + rvsz(proc_to_comm) = n*nerv + idxr = idxr+rvsz(proc_to_comm) - bsdidx(proc_to_comm) = idxs - sdsz(proc_to_comm) = n*nesd - idxs = idxs+sdsz(proc_to_comm) + bsdidx(proc_to_comm) = idxs + sdsz(proc_to_comm) = n*nesd + idxs = idxs+sdsz(proc_to_comm) - point_to_proc = point_to_proc+nerv+nesd+3 - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) end do if((idxr+idxs).lt.size(work)) then - sndbuf => work(1:idxs) - rcvbuf => work(idxs+1:idxs+idxr) + sndbuf => work(1:idxs) + rcvbuf => work(idxs+1:idxs+idxr) else - allocate(sndbuf(idxs),rcvbuf(idxr), stat=info) - if(info.ne.0) then - call psb_errpush(4000,name) - goto 9999 - end if + allocate(sndbuf(idxs),rcvbuf(idxr), stat=info) + if(info.ne.0) then + call psb_errpush(4000,name) + goto 9999 + end if end if ! Case SWAP_MPI if(swap_mpi) then - - ! gather elements into sendbuffer for swapping - point_to_proc = 1 - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - do while (proc_to_comm .ne. -1) - nerv = d_idx(point_to_proc+psb_n_elem_recv_) - nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) - + + ! gather elements into sendbuffer for swapping + point_to_proc = 1 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = d_idx(point_to_proc+psb_n_elem_recv_) + nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_gth(nesd,d_idx(idx_pt:idx_pt+nesd-1),& + & y,sndbuf(snd_pt:snd_pt+nesd-1)) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + end do + + ! swap elements using mpi_alltoallv + call mpi_alltoallv(sndbuf,sdsz,bsdidx,& + & mpi_double_precision,rcvbuf,rvsz,& + & brvidx,mpi_double_precision,icomm,iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + ! scatter elements from receivebuffer after swapping + point_to_proc = 1 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = d_idx(point_to_proc+psb_n_elem_recv_) + nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_sct(nerv,d_idx(idx_pt:idx_pt+nerv-1),& + & rcvbuf(rcv_pt:rcv_pt+nerv-1),beta,y) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + end do + + else if (swap_sync) then + + point_to_proc = 1 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = d_idx(point_to_proc+psb_n_elem_recv_) + nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) + if (proc_to_comm .lt. myrow) then + ! First I send + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_gth(nesd,d_idx(idx_pt:idx_pt+nesd-1),& + & y,sndbuf(snd_pt:snd_pt+nesd-1)) + call dgesd2d(icontxt,nesd,1,sndbuf(snd_pt),nesd,proc_to_comm,0) + ! Then I receive + rcv_pt = brvidx(proc_to_comm) + call dgerv2d(icontxt,nerv,1,rcvbuf(rcv_pt),nerv,proc_to_comm,0) + else if (proc_to_comm .gt. myrow) then + ! First I receive + rcv_pt = brvidx(proc_to_comm) + call dgerv2d(icontxt,nerv,1,rcvbuf(rcv_pt),nerv,proc_to_comm,0) + ! Then I send + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_gth(nesd,d_idx(idx_pt:idx_pt+nesd-1),& + & y,sndbuf(snd_pt:snd_pt+nesd-1)) + call dgesd2d(icontxt,nesd,1,sndbuf(snd_pt),nesd,proc_to_comm,0) + else if (proc_to_comm .eq. myrow) then + ! I send to myself idx_pt = point_to_proc+nerv+psb_elem_send_ snd_pt = bsdidx(proc_to_comm) call psi_gth(nesd,d_idx(idx_pt:idx_pt+nesd-1),& & y,sndbuf(snd_pt:snd_pt+nesd-1)) + end if - point_to_proc = point_to_proc+nerv+nesd+3 - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - end do - - ! swap elements using mpi_alltoallv - call mpi_alltoallv(sndbuf,sdsz,bsdidx,& - & mpi_double_precision,rcvbuf,rvsz,& - & brvidx,mpi_double_precision,icomm,iret) - if(iret.ne.mpi_success) then - int_err(1) = iret - info=400 - call psb_errpush(info,name,i_err=int_err) - goto 9999 - end if - - ! scatter elements from receivebuffer after swapping - point_to_proc = 1 - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - do while (proc_to_comm .ne. -1) - nerv = d_idx(point_to_proc+psb_n_elem_recv_) - nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) - + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + end do + + point_to_proc = 1 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = d_idx(point_to_proc+psb_n_elem_recv_) + nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm.ne.myrow) then idx_pt = point_to_proc+psb_elem_recv_ rcv_pt = brvidx(proc_to_comm) call psi_sct(nerv,d_idx(idx_pt:idx_pt+nerv-1),& & rcvbuf(rcv_pt:rcv_pt+nerv-1),beta,y) + else + idx_pt = point_to_proc+psb_elem_recv_ + snd_pt = bsdidx(proc_to_comm) + call psi_sct(nerv,d_idx(idx_pt:idx_pt+nerv-1),& + & sndbuf(snd_pt:snd_pt+nesd-1),beta,y) + end if - point_to_proc = point_to_proc+nerv+nesd+3 - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - end do + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + end do - else if (swap_sync) then + else if (swap_send .and. swap_recv) then + ! First I post all the non blocking receives + point_to_proc = 1 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = d_idx(point_to_proc+psb_n_elem_recv_) + nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm.ne.myrow) then + p2ptag = krecvid(icontxt,proc_to_comm,myrow) + rcv_pt = brvidx(proc_to_comm) + call mpi_irecv(rcvbuf(rcv_pt),rvsz(proc_to_comm),& + & mpi_double_precision,prcid(proc_to_comm),& + & p2ptag, icomm,rvhd(proc_to_comm),iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + end if - point_to_proc = 1 - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - do while (proc_to_comm .ne. -1) - nerv = d_idx(point_to_proc+psb_n_elem_recv_) - nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) - if (proc_to_comm .lt. myrow) then - ! First I send - idx_pt = point_to_proc+nerv+psb_elem_send_ - snd_pt = bsdidx(proc_to_comm) - call psi_gth(nesd,d_idx(idx_pt:idx_pt+nesd-1),& - & y,sndbuf(snd_pt:snd_pt+nesd-1)) - call dgesd2d(icontxt,nesd,1,sndbuf(snd_pt),nesd,proc_to_comm,0) - ! Then I receive - rcv_pt = brvidx(proc_to_comm) - call dgerv2d(icontxt,nerv,1,rcvbuf(rcv_pt),nerv,proc_to_comm,0) - else if (proc_to_comm .gt. myrow) then - ! First I receive - rcv_pt = brvidx(proc_to_comm) - call dgerv2d(icontxt,nerv,1,rcvbuf(rcv_pt),nerv,proc_to_comm,0) - ! Then I send - idx_pt = point_to_proc+nerv+psb_elem_send_ - snd_pt = bsdidx(proc_to_comm) - call psi_gth(nesd,d_idx(idx_pt:idx_pt+nesd-1),& - & y,sndbuf(snd_pt:snd_pt+nesd-1)) - call dgesd2d(icontxt,nesd,1,sndbuf(snd_pt),nesd,proc_to_comm,0) - else if (proc_to_comm .eq. myrow) then - ! I send to myself - idx_pt = point_to_proc+nerv+psb_elem_send_ - snd_pt = bsdidx(proc_to_comm) - call psi_gth(nesd,d_idx(idx_pt:idx_pt+nesd-1),& - & y,sndbuf(snd_pt:snd_pt+nesd-1)) + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + end do + + ! Then I post all the blocking sends + point_to_proc = 1 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = d_idx(point_to_proc+psb_n_elem_recv_) + nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_gth(nesd,d_idx(idx_pt:idx_pt+nesd-1),& + & y,sndbuf(snd_pt:snd_pt+nesd-1)) + + if(proc_to_comm .ne. myrow) then + p2ptag=ksendid(icontxt,proc_to_comm,myrow) + call mpi_send(sndbuf(snd_pt),sdsz(proc_to_comm),& + & mpi_double_precision,prcid(proc_to_comm),& + & p2ptag,icomm,iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 end if + end if + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + end do - point_to_proc = point_to_proc+nerv+nesd+3 - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - end do - point_to_proc = 1 - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - do while (proc_to_comm .ne. -1) - nerv = d_idx(point_to_proc+psb_n_elem_recv_) - nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) + if(.false.) then + do i=1, totxch + call mpi_waitany(nprow,rvhd,ixrec,p2pstat,iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (ixrec .ne. mpi_undefined) then + ixrec=ixrec-1 ! mpi_waitany returns an 1 to nprow index + point_to_proc = ptp(ixrec) + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + nerv = d_idx(point_to_proc+psb_n_elem_recv_) + nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) - if(proc_to_comm.ne.myrow) then - idx_pt = point_to_proc+psb_elem_recv_ - rcv_pt = brvidx(proc_to_comm) - call psi_sct(nerv,d_idx(idx_pt:idx_pt+nerv-1),& - & rcvbuf(rcv_pt:rcv_pt+nerv-1),beta,y) + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_sct(nerv,d_idx(idx_pt:idx_pt+nerv-1),& + & rcvbuf(rcv_pt:rcv_pt+nerv-1),beta,y) else - idx_pt = point_to_proc+psb_elem_recv_ - snd_pt = bsdidx(proc_to_comm) - call psi_sct(nerv,d_idx(idx_pt:idx_pt+nerv-1),& - & sndbuf(snd_pt:snd_pt+nesd-1),beta,y) + int_err(1) = ixrec + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 end if + end do - point_to_proc = point_to_proc+nerv+nesd+3 - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - end do - - else if (swap_send .and. swap_recv) then - ! First I post all the non blocking receives - point_to_proc = 1 - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - do while (proc_to_comm .ne. -1) + point_to_proc = 1 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) nerv = d_idx(point_to_proc+psb_n_elem_recv_) nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) - if(proc_to_comm.ne.myrow) then - p2ptag = krecvid(icontxt,proc_to_comm,myrow) - rcv_pt = brvidx(proc_to_comm) - call mpi_irecv(rcvbuf(rcv_pt),rvsz(proc_to_comm),& - & mpi_double_precision,prcid(proc_to_comm),& - & p2ptag, icomm,rvhd(proc_to_comm),iret) - if(iret.ne.mpi_success) then - int_err(1) = iret - info=400 - call psb_errpush(info,name,i_err=int_err) - goto 9999 - end if + if(proc_to_comm .eq. myrow) then + idx_pt = point_to_proc+psb_elem_recv_ + snd_pt = bsdidx(proc_to_comm) + call psi_sct(nerv,d_idx(idx_pt:idx_pt+nerv-1),& + & sndbuf(snd_pt:snd_pt+nesd-1),beta,y) end if point_to_proc = point_to_proc+nerv+nesd+3 proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - end do + end do + + else - ! Then I post all the blocking sends - point_to_proc = 1 - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - do while (proc_to_comm .ne. -1) + point_to_proc = 1 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) nerv = d_idx(point_to_proc+psb_n_elem_recv_) nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) - - idx_pt = point_to_proc+nerv+psb_elem_send_ - snd_pt = bsdidx(proc_to_comm) - call psi_gth(nesd,d_idx(idx_pt:idx_pt+nesd-1),& - & y,sndbuf(snd_pt:snd_pt+nesd-1)) - - if(proc_to_comm .ne. myrow) then - p2ptag=ksendid(icontxt,proc_to_comm,myrow) - call mpi_send(sndbuf(snd_pt),sdsz(proc_to_comm),& - & mpi_double_precision,prcid(proc_to_comm),& - & p2ptag,icomm,iret) - if(iret.ne.mpi_success) then - int_err(1) = iret - info=400 - call psb_errpush(info,name,i_err=int_err) - goto 9999 - end if - end if - point_to_proc = point_to_proc+nerv+nesd+3 - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - end do - do i=1, totxch - call mpi_waitany(nprow,rvhd,ixrec,p2pstat,iret) - if(iret.ne.mpi_success) then - int_err(1) = iret - info=400 - call psb_errpush(info,name,i_err=int_err) - goto 9999 - end if - if (ixrec .ne. mpi_undefined) then - ixrec=ixrec-1 ! mpi_waitany returns an 1 to nprow index - point_to_proc = ptp(ixrec) - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - nerv = d_idx(point_to_proc+psb_n_elem_recv_) - nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) - - idx_pt = point_to_proc+psb_elem_recv_ - rcv_pt = brvidx(proc_to_comm) - call psi_sct(nerv,d_idx(idx_pt:idx_pt+nerv-1),& - & rcvbuf(rcv_pt:rcv_pt+nerv-1),beta,y) + if(proc_to_comm.ne.myrow) then + call mpi_wait(rvhd(proc_to_comm),p2pstat,iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_sct(nerv,n,d_idx(idx_pt:idx_pt+nerv-1),& + & rcvbuf(rcv_pt:rcv_pt+n*nerv-1),beta,y) else - int_err(1) = ixrec - info=400 - call psb_errpush(info,name,i_err=int_err) - goto 9999 - end if - end do - - point_to_proc = 1 - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - do while (proc_to_comm .ne. -1) - nerv = d_idx(point_to_proc+psb_n_elem_recv_) - nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) - - if(proc_to_comm .eq. myrow) then - idx_pt = point_to_proc+psb_elem_recv_ - snd_pt = bsdidx(proc_to_comm) - call psi_sct(nerv,d_idx(idx_pt:idx_pt+nerv-1),& - & sndbuf(snd_pt:snd_pt+nesd-1),beta,y) + idx_pt = point_to_proc+psb_elem_recv_ + snd_pt = bsdidx(proc_to_comm) + call psi_sct(nerv,n,d_idx(idx_pt:idx_pt+nerv-1),& + & sndbuf(snd_pt:snd_pt+n*nesd-1),beta,y) + end if point_to_proc = point_to_proc+nerv+nesd+3 proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - end do + end do + + end if else if (swap_send) then - point_to_proc = 1 - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - do while (proc_to_comm .ne. -1) - nerv = d_idx(point_to_proc+psb_n_elem_recv_) - nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) + point_to_proc = 1 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = d_idx(point_to_proc+psb_n_elem_recv_) + nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) - idx_pt = point_to_proc+nerv+psb_elem_send_ - snd_pt = bsdidx(proc_to_comm) - call psi_gth(nesd,d_idx(idx_pt:idx_pt+nesd-1),& - & y,sndbuf(snd_pt:snd_pt+nesd-1)) - call dgesd2d(icontxt,nesd,1,sndbuf(snd_pt),nesd,proc_to_comm,0) - - point_to_proc = point_to_proc+nerv+nesd+3 - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - end do + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_gth(nesd,d_idx(idx_pt:idx_pt+nesd-1),& + & y,sndbuf(snd_pt:snd_pt+nesd-1)) + call dgesd2d(icontxt,nesd,1,sndbuf(snd_pt),nesd,proc_to_comm,0) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + end do else if (swap_recv) then - point_to_proc = 1 - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - do while (proc_to_comm .ne. -1) - nerv = d_idx(point_to_proc+psb_n_elem_recv_) - nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) + point_to_proc = 1 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = d_idx(point_to_proc+psb_n_elem_recv_) + nesd = d_idx(point_to_proc+nerv+psb_n_elem_send_) - if(proc_to_comm.ne.myrow) then - rcv_pt = brvidx(proc_to_comm) - call dgerv2d(icontxt,nerv,1,rcvbuf(rcv_pt),nerv,proc_to_comm,0) - idx_pt = point_to_proc+psb_elem_recv_ - rcv_pt = brvidx(proc_to_comm) - call psi_sct(nerv,d_idx(idx_pt:idx_pt+nerv-1),& - & rcvbuf(rcv_pt:rcv_pt+nerv-1),beta,y) - else - idx_pt = point_to_proc+psb_elem_recv_ - snd_pt = bsdidx(proc_to_comm) - call psi_sct(nerv,d_idx(idx_pt:idx_pt+nerv-1),& - & sndbuf(snd_pt:snd_pt+nesd-1),beta,y) - end if + if(proc_to_comm.ne.myrow) then + rcv_pt = brvidx(proc_to_comm) + call dgerv2d(icontxt,nerv,1,rcvbuf(rcv_pt),nerv,proc_to_comm,0) + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_sct(nerv,d_idx(idx_pt:idx_pt+nerv-1),& + & rcvbuf(rcv_pt:rcv_pt+nerv-1),beta,y) + else + idx_pt = point_to_proc+psb_elem_recv_ + snd_pt = bsdidx(proc_to_comm) + call psi_sct(nerv,d_idx(idx_pt:idx_pt+nerv-1),& + & sndbuf(snd_pt:snd_pt+nesd-1),beta,y) + end if - point_to_proc = point_to_proc+nerv+nesd+3 - proc_to_comm = d_idx(point_to_proc+psb_proc_id_) - end do + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = d_idx(point_to_proc+psb_proc_id_) + end do end if @@ -861,8 +897,8 @@ subroutine psi_dswapdatav(flag,beta,y,desc_a,work,info,data) 9999 continue call psb_erractionrestore(err_act) if (err_act.eq.act_abort) then - call psb_error(icontxt) - return + call psb_error(icontxt) + return end if return end subroutine psi_dswapdatav