diff --git a/base/internals/psi_dswapdata.F90 b/base/internals/psi_dswapdata.F90 index a7ba43c5..c3585303 100644 --- a/base/internals/psi_dswapdata.F90 +++ b/base/internals/psi_dswapdata.F90 @@ -1073,9 +1073,15 @@ subroutine psi_dswapdata_vect(flag,beta,y,desc_a,work,info,data) call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if - - call psi_swapdata(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,& - & desc_a%sendtypes,desc_a%recvtypes,work,info) + + if ((data_ == psb_comm_halo_) .and. (beta == dzero)) then + call psi_swapdata(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,& + & desc_a%sendtypes(:,psb_rdpkidx_),desc_a%recvtypes(:,psb_rdpkidx_),& + & work,info) + else + call psi_swapdata(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,& + & work,info) + end if if (info /= psb_success_) goto 9999 call psb_erractionrestore(err_act) @@ -1092,7 +1098,7 @@ end subroutine psi_dswapdata_vect subroutine psi_dswapidx_vect(iictxt,iicomm,flag,beta,y,idx,totxch,totsnd,totrcv,& - & sendtypes,recvtypes,work,info) + & work,info) use psi_mod, psb_protect_name => psi_dswapidx_vect use psb_error_mod @@ -1113,7 +1119,6 @@ subroutine psi_dswapidx_vect(iictxt,iicomm,flag,beta,y,idx,totxch,totsnd,totrcv, real(psb_dpk_) :: beta real(psb_dpk_), target :: work(:) integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv - integer, allocatable,optional, intent(in) :: sendtypes(:),recvtypes(:) ! locals integer(psb_mpik_) :: ictxt, icomm, np, me,& @@ -1133,9 +1138,6 @@ subroutine psi_dswapidx_vect(iictxt,iicomm,flag,beta,y,idx,totxch,totsnd,totrcv, volatile :: sndbuf, rcvbuf #endif character(len=20) :: name - !integer, dimension(totxch) :: sendtypes,recvtypes - !integer, allocatable :: sendtypes(:),recvtypes(:) - !integer, allocatable :: blens(:), new_idx(:) info=psb_success_ name='psi_swap_datav' @@ -1274,7 +1276,7 @@ subroutine psi_dswapidx_vect(iictxt,iicomm,flag,beta,y,idx,totxch,totsnd,totrcv, ! end if - if (beta/=0 .and. do_send) then + if (do_send) then ! Pack send buffers pnti = 1 @@ -1380,19 +1382,15 @@ subroutine psi_dswapidx_vect(iictxt,iicomm,flag,beta,y,idx,totxch,totsnd,totrcv, if ((nesd>0).and.(proc_to_comm /= me)) then - if(beta==0) then - call send_routine(y%v,sendtypes(i),prcid(i),p2ptag,icomm, iret) - else - if (usersend) then - call mpi_rsend(sndbuf(snd_pt),nesd,& - & psb_mpi_r_dpk_,prcid(i),& - & p2ptag,icomm,iret) - else - call mpi_send(sndbuf(snd_pt),nesd,& - & psb_mpi_r_dpk_,prcid(i),& - & p2ptag,icomm,iret) - end if - end if + if (usersend) then + call mpi_rsend(sndbuf(snd_pt),nesd,& + & psb_mpi_r_dpk_,prcid(i),& + & p2ptag,icomm,iret) + else + call mpi_send(sndbuf(snd_pt),nesd,& + & psb_mpi_r_dpk_,prcid(i),& + & p2ptag,icomm,iret) + end if if(iret /= mpi_success) then ierr(1) = iret @@ -1519,6 +1517,791 @@ subroutine psi_dswapidx_vect(iictxt,iicomm,flag,beta,y,idx,totxch,totsnd,totrcv, end if return +end subroutine psi_dswapidx_vect + +subroutine psi_dswapidx_vect_mptx(iictxt,iicomm,flag,beta,y,idx,totxch,totsnd,totrcv,& + & sendtypes,recvtypes,work,info) + + use psi_mod, psb_protect_name => psi_dswapidx_vect_mptx + use psb_error_mod + use psb_desc_mod + use psb_penv_mod + use psb_d_base_vect_mod +#ifdef MPI_MOD + use mpi +#endif + implicit none +#ifdef MPI_H + include 'mpif.h' +#endif + + integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag + integer(psb_ipk_), intent(out) :: info + class(psb_d_base_vect_type) :: y + real(psb_dpk_) :: beta + real(psb_dpk_), target :: work(:) + integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd, totrcv + integer(psb_mpik_), intent(in) :: sendtypes(:),recvtypes(:) + + ! locals + integer(psb_mpik_) :: ictxt, icomm, np, me,& + & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret + integer(psb_mpik_), allocatable, dimension(:) :: bsdidx, brvidx,& + & sdsz, rvsz, prcid, rvhd, sdhd + integer(psb_ipk_) :: nesd, nerv,& + & err_act, i, idx_pt, totsnd_, totrcv_,& + & snd_pt, rcv_pt, pnti, n,j + integer(psb_ipk_) :: ierr(5) + logical :: swap_mpi, swap_sync, swap_send, swap_recv,& + & albf,do_send,do_recv + logical, parameter :: usersend=.false. + + real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf +#ifdef HAVE_VOLATILE + volatile :: sndbuf, rcvbuf +#endif + character(len=20) :: name + integer, allocatable :: blens(:), new_idx(:) + + info=psb_success_ + name='psi_swap_datav' + call psb_erractionsave(err_act) + ictxt = iictxt + icomm = iicomm + call psb_info(ictxt,me,np) + if (np == -1) then + info=psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + + n=1 + + swap_mpi = iand(flag,psb_swap_mpi_) /= 0 + swap_sync = iand(flag,psb_swap_sync_) /= 0 + swap_send = iand(flag,psb_swap_send_) /= 0 + swap_recv = iand(flag,psb_swap_recv_) /= 0 + do_send = swap_mpi .or. swap_sync .or. swap_send + do_recv = swap_mpi .or. swap_sync .or. swap_recv + + if (beta==0 .and. do_send .and. do_recv) then + + + totrcv_ = totrcv * n + totsnd_ = totsnd * n + + if (swap_mpi) then + allocate(sdsz(0:np-1), rvsz(0:np-1), bsdidx(0:np-1),& + & brvidx(0:np-1), rvhd(0:np-1), sdhd(0:np-1), prcid(0:np-1),& + & stat=info) + if(info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + rvhd(:) = mpi_request_null + sdsz(:) = 0 + rvsz(:) = 0 + + ! prepare info for communications + + pnti = 1 + snd_pt = 1 + rcv_pt = 1 + do i=1, totxch + proc_to_comm = idx(pnti+psb_proc_id_) + nerv = idx(pnti+psb_n_elem_recv_) + nesd = idx(pnti+nerv+psb_n_elem_send_) + call psb_get_rank(prcid(proc_to_comm),ictxt,proc_to_comm) + + brvidx(proc_to_comm) = rcv_pt + rvsz(proc_to_comm) = nerv + + bsdidx(proc_to_comm) = snd_pt + sdsz(proc_to_comm) = nesd + + rcv_pt = rcv_pt + nerv + snd_pt = snd_pt + nesd + pnti = pnti + nerv + nesd + 3 + + end do + + else + allocate(rvhd(totxch),prcid(totxch),stat=info) + if(info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + end if + + + totrcv_ = max(totrcv_,1) + totsnd_ = max(totsnd_,1) + if((totrcv_+totsnd_) < size(work)) then + sndbuf => work(1:totsnd_) + rcvbuf => work(totsnd_+1:totsnd_+totrcv_) + albf=.false. + else + allocate(sndbuf(totsnd_),rcvbuf(totrcv_), stat=info) + if(info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + albf=.true. + end if + + + !We've to set the derivate datatypes + !Send/Gather + ! pnti = 1 + ! snd_pt = 1 + ! if(.not.allocated(sendtypes)) then + ! allocate(sendtypes(totxch), stat=info) + ! do i=1, totxch + ! nerv = idx(pnti+psb_n_elem_recv_) + ! nesd = idx(pnti+nerv+psb_n_elem_send_) + ! idx_pt = 1+pnti+nerv+psb_n_elem_send_ + ! allocate(blens(nesd),stat=info) + ! do j=1,nesd + ! blens(j) = 1 + ! end do + + ! call MPI_TYPE_INDEXED(nesd,blens,(idx(idx_pt:idx_pt+nesd-1)-1),& + ! & mpi_double_precision,sendtypes(i),info) + ! call MPI_TYPE_COMMIT(sendtypes(i),info) + ! deallocate(blens,stat=info) + ! snd_pt = snd_pt + nesd + ! pnti = pnti + nerv + nesd + 3 + ! end do + ! end if + + !Recv/Scatter + ! pnti = 1 + ! snd_pt = 1 + ! rcv_pt = 1 + !if(.not.allocated(recvtypes)) then + ! allocate(recvtypes(totxch), stat=info) + ! do i=1, totxch + ! proc_to_comm = idx(pnti+psb_proc_id_) + ! nerv = idx(pnti+psb_n_elem_recv_) + ! nesd = idx(pnti+nerv+psb_n_elem_send_) + ! idx_pt = 1+pnti+psb_n_elem_recv_ + ! allocate(blens(nerv),stat=info) + ! do j=1, nerv + ! blens(j) = 1 + ! end do + ! + ! call MPI_TYPE_INDEXED(nerv,blens,(idx(idx_pt:idx_pt+nerv-1)-1),& + ! & mpi_double_precision,recvtypes(i),info) + ! call MPI_TYPE_COMMIT(recvtypes(i),info) + ! deallocate(blens,stat=info) + + ! rcv_pt = rcv_pt + nerv + ! snd_pt = snd_pt + nesd + ! pnti = pnti + nerv + nesd + 3 + ! end do + ! end if + + + if (beta/=0 .and. do_send) then + + ! Pack send buffers + pnti = 1 + snd_pt = 1 + do i=1, totxch + nerv = idx(pnti+psb_n_elem_recv_) + nesd = idx(pnti+nerv+psb_n_elem_send_) + idx_pt = 1+pnti+nerv+psb_n_elem_send_ + call y%gth(nesd,idx(idx_pt:idx_pt+nesd-1),& + & sndbuf(snd_pt:snd_pt+nesd-1)) + snd_pt = snd_pt + nesd + pnti = pnti + nerv + nesd + 3 + end do + + end if + + ! Case SWAP_MPI + if (swap_mpi) then !swap_mpi==false + + ! swap elements using mpi_alltoallv + call mpi_alltoallv(sndbuf,sdsz,bsdidx,& + & psb_mpi_r_dpk_,rcvbuf,rvsz,& + & brvidx,psb_mpi_r_dpk_,icomm,iret) + if(iret /= mpi_success) then + ierr(1) = iret + info=psb_err_mpi_error_ + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + else if (swap_sync) then !swap_sync==false + + pnti = 1 + snd_pt = 1 + rcv_pt = 1 + do i=1, totxch + proc_to_comm = idx(pnti+psb_proc_id_) + nerv = idx(pnti+psb_n_elem_recv_) + nesd = idx(pnti+nerv+psb_n_elem_send_) + + if (proc_to_comm < me) then + if (nesd>0) call psb_snd(ictxt,& + & sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm) + if (nerv>0) call psb_rcv(ictxt,& + & rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm) + else if (proc_to_comm > me) then + if (nerv>0) call psb_rcv(ictxt,& + & rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm) + if (nesd>0) call psb_snd(ictxt,& + & sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm) + else if (proc_to_comm == me) then + if (nesd /= nerv) then + write(psb_err_unit,*) & + & 'Fatal error in swapdata: mismatch on self send',& + & nerv,nesd + end if + rcvbuf(rcv_pt:rcv_pt+nerv-1) = sndbuf(snd_pt:snd_pt+nesd-1) + end if + rcv_pt = rcv_pt + nerv + snd_pt = snd_pt + nesd + pnti = pnti + nerv + nesd + 3 + end do + + + else if (swap_send .and. swap_recv) then + + !write(*,*) 'Sono dentro swap_send .and. swap_recv' + + ! First I post all the non blocking receives + pnti = 1 + snd_pt = 1 + rcv_pt = 1 + do i=1, totxch + proc_to_comm = idx(pnti+psb_proc_id_) + nerv = idx(pnti+psb_n_elem_recv_) + nesd = idx(pnti+nerv+psb_n_elem_send_) + + call psb_get_rank(prcid(i),ictxt,proc_to_comm) + if ((nerv>0).and.(proc_to_comm /= me)) then + p2ptag = psb_double_swap_tag + call mpi_irecv(rcvbuf(rcv_pt),nerv,& + & psb_mpi_r_dpk_,prcid(i),& + & p2ptag, icomm,rvhd(i),iret) + end if + rcv_pt = rcv_pt + nerv + snd_pt = snd_pt + nesd + pnti = pnti + nerv + nesd + 3 + end do + + + ! Then I post all the blocking sends + if (usersend) call mpi_barrier(icomm,iret) + + pnti = 1 + snd_pt = 1 + rcv_pt = 1 + do i=1, totxch + proc_to_comm = idx(pnti+psb_proc_id_) + nerv = idx(pnti+psb_n_elem_recv_) + nesd = idx(pnti+nerv+psb_n_elem_send_) + + p2ptag = psb_double_swap_tag + + if ((nesd>0).and.(proc_to_comm /= me)) then + + if(beta==0) then + call send_routine(y%v,sendtypes(i),prcid(i),p2ptag,icomm, iret) + else + if (usersend) then + call mpi_rsend(sndbuf(snd_pt),nesd,& + & psb_mpi_r_dpk_,prcid(i),& + & p2ptag,icomm,iret) + else + call mpi_send(sndbuf(snd_pt),nesd,& + & psb_mpi_r_dpk_,prcid(i),& + & p2ptag,icomm,iret) + end if + end if + + if(iret /= mpi_success) then + ierr(1) = iret + info=psb_err_mpi_error_ + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + end if + rcv_pt = rcv_pt + nerv + snd_pt = snd_pt + nesd + pnti = pnti + nerv + nesd + 3 + end do + + + pnti = 1 + do i=1, totxch + proc_to_comm = idx(pnti+psb_proc_id_) + nerv = idx(pnti+psb_n_elem_recv_) + nesd = idx(pnti+nerv+psb_n_elem_send_) + + p2ptag = psb_double_swap_tag + + if ((proc_to_comm /= me).and.(nerv>0)) then + call mpi_wait(rvhd(i),p2pstat,iret) + if(iret /= mpi_success) then + ierr(1) = iret + info=psb_err_mpi_error_ + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + else if (proc_to_comm == me) then + if (nesd /= nerv) then + write(psb_err_unit,*) & + & 'Fatal error in swapdata: mismatch on self send',& + & nerv,nesd + end if + rcvbuf(rcv_pt:rcv_pt+nerv-1) = sndbuf(snd_pt:snd_pt+nesd-1) + end if + pnti = pnti + nerv + nesd + 3 + end do + + + else if (swap_send) then + + pnti = 1 + snd_pt = 1 + rcv_pt = 1 + do i=1, totxch + proc_to_comm = idx(pnti+psb_proc_id_) + nerv = idx(pnti+psb_n_elem_recv_) + nesd = idx(pnti+nerv+psb_n_elem_send_) + if (nesd>0) call psb_snd(ictxt,& + & sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm) + rcv_pt = rcv_pt + nerv + snd_pt = snd_pt + nesd + pnti = pnti + nerv + nesd + 3 + end do + + else if (swap_recv) then + + pnti = 1 + snd_pt = 1 + rcv_pt = 1 + do i=1, totxch + proc_to_comm = idx(pnti+psb_proc_id_) + nerv = idx(pnti+psb_n_elem_recv_) + nesd = idx(pnti+nerv+psb_n_elem_send_) + if (nerv>0) call psb_rcv(ictxt,& + & rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm) + rcv_pt = rcv_pt + nerv + snd_pt = snd_pt + nesd + pnti = pnti + nerv + nesd + 3 + end do + + end if + + if (do_recv) then + + pnti = 1 + snd_pt = 1 + rcv_pt = 1 + do i=1, totxch + !call mpi_type_free(sendtypes(i),info) + !call mpi_type_free(recvtypes(i),info) + if(beta/=0) then + proc_to_comm = idx(pnti+psb_proc_id_) + nerv = idx(pnti+psb_n_elem_recv_) + nesd = idx(pnti+nerv+psb_n_elem_send_) + idx_pt = 1+pnti+psb_n_elem_recv_ + call y%sct(nerv,idx(idx_pt:idx_pt+nerv-1),& + & rcvbuf(rcv_pt:rcv_pt+nerv-1),beta) + rcv_pt = rcv_pt + nerv + snd_pt = snd_pt + nesd + pnti = pnti + nerv + nesd + 3 + end if + end do + + end if + + if (swap_mpi) then + deallocate(sdsz,rvsz,bsdidx,brvidx,rvhd,prcid,sdhd,& + & stat=info) + else + deallocate(rvhd,prcid,stat=info) + end if + if(info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + if(albf) deallocate(sndbuf,rcvbuf,stat=info) + if(info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + + else + + + totrcv_ = totrcv * n + totsnd_ = totsnd * n + + if (swap_mpi) then + allocate(sdsz(0:np-1), rvsz(0:np-1), bsdidx(0:np-1),& + & brvidx(0:np-1), rvhd(0:np-1), sdhd(0:np-1), prcid(0:np-1),& + & stat=info) + if(info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + rvhd(:) = mpi_request_null + sdsz(:) = 0 + rvsz(:) = 0 + + ! prepare info for communications + + pnti = 1 + snd_pt = 1 + rcv_pt = 1 + do i=1, totxch + proc_to_comm = idx(pnti+psb_proc_id_) + nerv = idx(pnti+psb_n_elem_recv_) + nesd = idx(pnti+nerv+psb_n_elem_send_) + call psb_get_rank(prcid(proc_to_comm),ictxt,proc_to_comm) + + brvidx(proc_to_comm) = rcv_pt + rvsz(proc_to_comm) = nerv + + bsdidx(proc_to_comm) = snd_pt + sdsz(proc_to_comm) = nesd + + rcv_pt = rcv_pt + nerv + snd_pt = snd_pt + nesd + pnti = pnti + nerv + nesd + 3 + + end do + + else + allocate(rvhd(totxch),prcid(totxch),stat=info) + if(info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + end if + + + totrcv_ = max(totrcv_,1) + totsnd_ = max(totsnd_,1) + if((totrcv_+totsnd_) < size(work)) then + sndbuf => work(1:totsnd_) + rcvbuf => work(totsnd_+1:totsnd_+totrcv_) + albf=.false. + else + allocate(sndbuf(totsnd_),rcvbuf(totrcv_), stat=info) + if(info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + albf=.true. + end if + + + !We've to set the derivate datatypes + !Send/Gather + ! pnti = 1 + ! snd_pt = 1 + ! if(.not.allocated(sendtypes)) then + ! allocate(sendtypes(totxch), stat=info) + ! do i=1, totxch + ! nerv = idx(pnti+psb_n_elem_recv_) + ! nesd = idx(pnti+nerv+psb_n_elem_send_) + ! idx_pt = 1+pnti+nerv+psb_n_elem_send_ + ! allocate(blens(nesd),stat=info) + ! do j=1,nesd + ! blens(j) = 1 + ! end do + + ! call MPI_TYPE_INDEXED(nesd,blens,(idx(idx_pt:idx_pt+nesd-1)-1),& + ! & mpi_double_precision,sendtypes(i),info) + ! call MPI_TYPE_COMMIT(sendtypes(i),info) + ! deallocate(blens,stat=info) + ! snd_pt = snd_pt + nesd + ! pnti = pnti + nerv + nesd + 3 + ! end do + ! end if + + !Recv/Scatter + ! pnti = 1 + ! snd_pt = 1 + ! rcv_pt = 1 + !if(.not.allocated(recvtypes)) then + ! allocate(recvtypes(totxch), stat=info) + ! do i=1, totxch + ! proc_to_comm = idx(pnti+psb_proc_id_) + ! nerv = idx(pnti+psb_n_elem_recv_) + ! nesd = idx(pnti+nerv+psb_n_elem_send_) + ! idx_pt = 1+pnti+psb_n_elem_recv_ + ! allocate(blens(nerv),stat=info) + ! do j=1, nerv + ! blens(j) = 1 + ! end do + ! + ! call MPI_TYPE_INDEXED(nerv,blens,(idx(idx_pt:idx_pt+nerv-1)-1),& + ! & mpi_double_precision,recvtypes(i),info) + ! call MPI_TYPE_COMMIT(recvtypes(i),info) + ! deallocate(blens,stat=info) + + ! rcv_pt = rcv_pt + nerv + ! snd_pt = snd_pt + nesd + ! pnti = pnti + nerv + nesd + 3 + ! end do + ! end if + + + if (beta/=0 .and. do_send) then + + ! Pack send buffers + pnti = 1 + snd_pt = 1 + do i=1, totxch + nerv = idx(pnti+psb_n_elem_recv_) + nesd = idx(pnti+nerv+psb_n_elem_send_) + idx_pt = 1+pnti+nerv+psb_n_elem_send_ + call y%gth(nesd,idx(idx_pt:idx_pt+nesd-1),& + & sndbuf(snd_pt:snd_pt+nesd-1)) + snd_pt = snd_pt + nesd + pnti = pnti + nerv + nesd + 3 + end do + + end if + + ! Case SWAP_MPI + if (swap_mpi) then !swap_mpi==false + + ! swap elements using mpi_alltoallv + call mpi_alltoallv(sndbuf,sdsz,bsdidx,& + & psb_mpi_r_dpk_,rcvbuf,rvsz,& + & brvidx,psb_mpi_r_dpk_,icomm,iret) + if(iret /= mpi_success) then + ierr(1) = iret + info=psb_err_mpi_error_ + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + else if (swap_sync) then !swap_sync==false + + pnti = 1 + snd_pt = 1 + rcv_pt = 1 + do i=1, totxch + proc_to_comm = idx(pnti+psb_proc_id_) + nerv = idx(pnti+psb_n_elem_recv_) + nesd = idx(pnti+nerv+psb_n_elem_send_) + + if (proc_to_comm < me) then + if (nesd>0) call psb_snd(ictxt,& + & sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm) + if (nerv>0) call psb_rcv(ictxt,& + & rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm) + else if (proc_to_comm > me) then + if (nerv>0) call psb_rcv(ictxt,& + & rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm) + if (nesd>0) call psb_snd(ictxt,& + & sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm) + else if (proc_to_comm == me) then + if (nesd /= nerv) then + write(psb_err_unit,*) & + & 'Fatal error in swapdata: mismatch on self send',& + & nerv,nesd + end if + rcvbuf(rcv_pt:rcv_pt+nerv-1) = sndbuf(snd_pt:snd_pt+nesd-1) + end if + rcv_pt = rcv_pt + nerv + snd_pt = snd_pt + nesd + pnti = pnti + nerv + nesd + 3 + end do + + + else if (swap_send .and. swap_recv) then + + !write(*,*) 'Sono dentro swap_send .and. swap_recv' + + ! First I post all the non blocking receives + pnti = 1 + snd_pt = 1 + rcv_pt = 1 + do i=1, totxch + proc_to_comm = idx(pnti+psb_proc_id_) + nerv = idx(pnti+psb_n_elem_recv_) + nesd = idx(pnti+nerv+psb_n_elem_send_) + + call psb_get_rank(prcid(i),ictxt,proc_to_comm) + if ((nerv>0).and.(proc_to_comm /= me)) then + p2ptag = psb_double_swap_tag + call mpi_irecv(rcvbuf(rcv_pt),nerv,& + & psb_mpi_r_dpk_,prcid(i),& + & p2ptag, icomm,rvhd(i),iret) + end if + rcv_pt = rcv_pt + nerv + snd_pt = snd_pt + nesd + pnti = pnti + nerv + nesd + 3 + end do + + + ! Then I post all the blocking sends + if (usersend) call mpi_barrier(icomm,iret) + + pnti = 1 + snd_pt = 1 + rcv_pt = 1 + do i=1, totxch + proc_to_comm = idx(pnti+psb_proc_id_) + nerv = idx(pnti+psb_n_elem_recv_) + nesd = idx(pnti+nerv+psb_n_elem_send_) + + p2ptag = psb_double_swap_tag + + if ((nesd>0).and.(proc_to_comm /= me)) then + + if(beta==0) then + call send_routine(y%v,sendtypes(i),prcid(i),p2ptag,icomm, iret) + else + if (usersend) then + call mpi_rsend(sndbuf(snd_pt),nesd,& + & psb_mpi_r_dpk_,prcid(i),& + & p2ptag,icomm,iret) + else + call mpi_send(sndbuf(snd_pt),nesd,& + & psb_mpi_r_dpk_,prcid(i),& + & p2ptag,icomm,iret) + end if + end if + + if(iret /= mpi_success) then + ierr(1) = iret + info=psb_err_mpi_error_ + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + end if + rcv_pt = rcv_pt + nerv + snd_pt = snd_pt + nesd + pnti = pnti + nerv + nesd + 3 + end do + + + pnti = 1 + do i=1, totxch + proc_to_comm = idx(pnti+psb_proc_id_) + nerv = idx(pnti+psb_n_elem_recv_) + nesd = idx(pnti+nerv+psb_n_elem_send_) + + p2ptag = psb_double_swap_tag + + if ((proc_to_comm /= me).and.(nerv>0)) then + call mpi_wait(rvhd(i),p2pstat,iret) + if(iret /= mpi_success) then + ierr(1) = iret + info=psb_err_mpi_error_ + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + else if (proc_to_comm == me) then + if (nesd /= nerv) then + write(psb_err_unit,*) & + & 'Fatal error in swapdata: mismatch on self send',& + & nerv,nesd + end if + rcvbuf(rcv_pt:rcv_pt+nerv-1) = sndbuf(snd_pt:snd_pt+nesd-1) + end if + pnti = pnti + nerv + nesd + 3 + end do + + + else if (swap_send) then + + pnti = 1 + snd_pt = 1 + rcv_pt = 1 + do i=1, totxch + proc_to_comm = idx(pnti+psb_proc_id_) + nerv = idx(pnti+psb_n_elem_recv_) + nesd = idx(pnti+nerv+psb_n_elem_send_) + if (nesd>0) call psb_snd(ictxt,& + & sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm) + rcv_pt = rcv_pt + nerv + snd_pt = snd_pt + nesd + pnti = pnti + nerv + nesd + 3 + end do + + else if (swap_recv) then + + pnti = 1 + snd_pt = 1 + rcv_pt = 1 + do i=1, totxch + proc_to_comm = idx(pnti+psb_proc_id_) + nerv = idx(pnti+psb_n_elem_recv_) + nesd = idx(pnti+nerv+psb_n_elem_send_) + if (nerv>0) call psb_rcv(ictxt,& + & rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm) + rcv_pt = rcv_pt + nerv + snd_pt = snd_pt + nesd + pnti = pnti + nerv + nesd + 3 + end do + + end if + + if (do_recv) then + + pnti = 1 + snd_pt = 1 + rcv_pt = 1 + do i=1, totxch + !call mpi_type_free(sendtypes(i),info) + !call mpi_type_free(recvtypes(i),info) + if(beta/=0) then + proc_to_comm = idx(pnti+psb_proc_id_) + nerv = idx(pnti+psb_n_elem_recv_) + nesd = idx(pnti+nerv+psb_n_elem_send_) + idx_pt = 1+pnti+psb_n_elem_recv_ + call y%sct(nerv,idx(idx_pt:idx_pt+nerv-1),& + & rcvbuf(rcv_pt:rcv_pt+nerv-1),beta) + rcv_pt = rcv_pt + nerv + snd_pt = snd_pt + nesd + pnti = pnti + nerv + nesd + 3 + end if + end do + + end if + + if (swap_mpi) then + deallocate(sdsz,rvsz,bsdidx,brvidx,rvhd,prcid,sdhd,& + & stat=info) + else + deallocate(rvhd,prcid,stat=info) + end if + if(info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + if(albf) deallocate(sndbuf,rcvbuf,stat=info) + if(info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + end if + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act == psb_act_abort_) then + call psb_error(ictxt) + return + end if + return + contains subroutine receive_routine(v,recvtype,procSender,tag,communicator, rvhd,info) @@ -1580,5 +2363,5 @@ contains end subroutine send_routine -end subroutine psi_dswapidx_vect +end subroutine psi_dswapidx_vect_mptx diff --git a/base/internals/sndrcv.c b/base/internals/sndrcv.c index 2ddeffac..00cdc998 100644 --- a/base/internals/sndrcv.c +++ b/base/internals/sndrcv.c @@ -4,20 +4,20 @@ int receiveRoutine(double * y, int recvtype, int procSender, int tag, int comm, int *handle){ - MPI_Comm co = MPI_Comm_f2c(comm); - MPI_Datatype dt = MPI_Type_f2c(recvtype); - MPI_Request req;// = MPI_Request_f2c(*handle); - MPI_Irecv(y, 1, dt, procSender,tag, co, &req); - *handle = MPI_Request_c2f(req); - return 0; - + MPI_Comm co = MPI_Comm_f2c(comm); + MPI_Datatype dt = MPI_Type_f2c(recvtype); + MPI_Request req;// = MPI_Request_f2c(*handle); + MPI_Irecv(y, 1, dt, procSender,tag, co, &req); + *handle = MPI_Request_c2f(req); + return 0; + } int sendRoutine(double * y, int sendtype, int procToSend,int tag, int comm){ - - MPI_Comm co = MPI_Comm_f2c(comm); - MPI_Datatype dt = MPI_Type_f2c(sendtype); - MPI_Send(y, 1, dt, procToSend,tag,co); - return 0; + + MPI_Comm co = MPI_Comm_f2c(comm); + MPI_Datatype dt = MPI_Type_f2c(sendtype); + MPI_Send(y, 1, dt, procToSend,tag,co); + return 0; } diff --git a/base/modules/psb_desc_mod.f90 b/base/modules/psb_desc_mod.f90 index ab00b137..8b4e6e7c 100644 --- a/base/modules/psb_desc_mod.f90 +++ b/base/modules/psb_desc_mod.f90 @@ -245,6 +245,14 @@ module psb_desc_mod module procedure psb_cdfree end interface psb_free + interface + subroutine psb_cd_destroy(desc) + implicit none + !....parameters... + class(psb_desc_type), intent(inout) :: desc + end subroutine psb_cd_destroy + end interface + private :: nullify_desc integer(psb_ipk_), private, save :: cd_large_threshold=psb_default_large_threshold @@ -660,59 +668,6 @@ contains end subroutine psb_cdfree - ! - ! Subroutine: psb_cdfree - ! Frees a descriptor data structure. - ! - ! Arguments: - ! desc_a - type(psb_desc_type). The communication descriptor to be freed. - subroutine psb_cd_destroy(desc) - !...free descriptor structure... - use psb_const_mod - use psb_error_mod - use psb_penv_mod - implicit none - !....parameters... - class(psb_desc_type), intent(inout) :: desc - !...locals.... - integer(psb_ipk_) :: info - - - if (allocated(desc%halo_index)) & - & deallocate(desc%halo_index,stat=info) - - if (allocated(desc%bnd_elem)) & - & deallocate(desc%bnd_elem,stat=info) - - if (allocated(desc%ovrlap_index)) & - & deallocate(desc%ovrlap_index,stat=info) - - if (allocated(desc%ovrlap_elem)) & - & deallocate(desc%ovrlap_elem,stat=info) - if (allocated(desc%ovr_mst_idx)) & - & deallocate(desc%ovr_mst_idx,stat=info) - - if (allocated(desc%lprm)) & - & deallocate(desc%lprm,stat=info) - if (allocated(desc%idx_space)) & - & deallocate(desc%idx_space,stat=info) - - if (allocated(desc%sendtypes)) & - & deallocate(desc%sendtypes,stat=info) - - if (allocated(desc%recvtypes)) & - & deallocate(desc%recvtypes,stat=info) - - if (allocated(desc%indxmap)) then - call desc%indxmap%free() - deallocate(desc%indxmap, stat=info) - end if - - call desc%nullify() - - return - - end subroutine psb_cd_destroy ! ! Subroutine: psb_cdtransfer ! Transfers data and allocation from in to out; behaves like MOVE_ALLOC, i.e. diff --git a/base/modules/psi_d_mod.f90 b/base/modules/psi_d_mod.f90 index 52b2fc03..4bbcea20 100644 --- a/base/modules/psi_d_mod.f90 +++ b/base/modules/psi_d_mod.f90 @@ -80,7 +80,7 @@ module psi_d_mod integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd,totrcv end subroutine psi_dswapidxv subroutine psi_dswapidx_vect(ictxt,icomm,flag,beta,y,idx,& - & totxch,totsnd,totrcv,sendtypes,recvtypes,work,info) + & totxch,totsnd,totrcv,work,info) import :: psb_desc_type, psb_ipk_, psb_dpk_, psb_d_base_vect_type integer(psb_ipk_), intent(in) :: ictxt,icomm,flag integer(psb_ipk_), intent(out) :: info @@ -88,8 +88,18 @@ module psi_d_mod real(psb_dpk_) :: beta real(psb_dpk_),target :: work(:) integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd,totrcv - integer, allocatable,optional, intent(in) :: sendtypes(:),recvtypes(:) end subroutine psi_dswapidx_vect + subroutine psi_dswapidx_vect_mptx(ictxt,icomm,flag,beta,y,idx,& + & totxch,totsnd,totrcv,sendtypes,recvtypes,work,info) + import :: psb_desc_type, psb_ipk_, psb_dpk_, psb_d_base_vect_type, psb_mpik_ + integer(psb_ipk_), intent(in) :: ictxt,icomm,flag + integer(psb_ipk_), intent(out) :: info + class(psb_d_base_vect_type) :: y + real(psb_dpk_) :: beta + real(psb_dpk_),target :: work(:) + integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd,totrcv + integer(psb_mpik_), intent(in) :: sendtypes(:),recvtypes(:) + end subroutine psi_dswapidx_vect_mptx end interface diff --git a/base/tools/Makefile b/base/tools/Makefile index 8ac704f7..362be952 100644 --- a/base/tools/Makefile +++ b/base/tools/Makefile @@ -5,7 +5,7 @@ FOBJS = psb_sallc.o psb_sasb.o \ psb_dallc.o psb_dasb.o \ psb_dfree.o psb_dins.o \ psb_cdall.o psb_cdals.o psb_cdalv.o psb_cd_inloc.o psb_cdins.o psb_cdprt.o \ - psb_cdren.o psb_cdrep.o psb_get_overlap.o psb_cd_lstext.o\ + psb_cdren.o psb_cdrep.o psb_get_overlap.o psb_cd_lstext.o psb_cd_destroy.o\ psb_cdcpy.o psb_cd_reinit.o psb_cd_switch_ovl_indxmap.o\ psb_dspalloc.o psb_dspasb.o \ psb_dspfree.o psb_dspins.o psb_dsprn.o \ diff --git a/base/tools/psb_cd_destroy.F90 b/base/tools/psb_cd_destroy.F90 new file mode 100644 index 00000000..ae7e78ff --- /dev/null +++ b/base/tools/psb_cd_destroy.F90 @@ -0,0 +1,78 @@ + ! + ! Subroutine: psb_cdfree + ! Frees a descriptor data structure. + ! + ! Arguments: + ! desc_a - type(psb_desc_type). The communication descriptor to be freed. +subroutine psb_cd_destroy(desc) + !...free descriptor structure... + use psb_const_mod + use psb_error_mod + use psb_penv_mod + use psb_desc_mod, psb_protect_name => psb_cd_destroy +#ifdef MPI_MOD + use mpi +#endif + Implicit None +#ifdef MPI_H + include 'mpif.h' +#endif + !....parameters... + class(psb_desc_type), intent(inout) :: desc + !...locals.... + integer(psb_ipk_) :: info, i, j + + + if (allocated(desc%halo_index)) & + & deallocate(desc%halo_index,stat=info) + + if (allocated(desc%bnd_elem)) & + & deallocate(desc%bnd_elem,stat=info) + + if (allocated(desc%ovrlap_index)) & + & deallocate(desc%ovrlap_index,stat=info) + + if (allocated(desc%ovrlap_elem)) & + & deallocate(desc%ovrlap_elem,stat=info) + if (allocated(desc%ovr_mst_idx)) & + & deallocate(desc%ovr_mst_idx,stat=info) + + if (allocated(desc%lprm)) & + & deallocate(desc%lprm,stat=info) + if (allocated(desc%idx_space)) & + & deallocate(desc%idx_space,stat=info) + + if (allocated(desc%sendtypes)) then + do j=1, size(desc%sendtypes,2) + do i=1, size(desc%sendtypes,1) + if (desc%sendtypes(i,j) == mpi_data_null) then + call mpi_type_free(desc%sendtypes(i,j),info) + end if + end do + end do + deallocate(desc%sendtypes,stat=info) + end if + + + if (allocated(desc%recvtypes)) then + do j=1, size(desc%recvtypes,2) + do i=1, size(desc%recvtypes,1) + if (desc%recvtypes(i,j) == mpi_data_null) then + call mpi_type_free(desc%recvtypes(i,j),info) + end if + end do + end do + deallocate(desc%recvtypes,stat=info) + end if + + + if (allocated(desc%indxmap)) then + call desc%indxmap%free() + deallocate(desc%indxmap, stat=info) + end if + + call desc%nullify() + + return + +end subroutine psb_cd_destroy diff --git a/base/tools/psb_icdasb.F90 b/base/tools/psb_icdasb.F90 index 1056612c..a5cfbc16 100644 --- a/base/tools/psb_icdasb.F90 +++ b/base/tools/psb_icdasb.F90 @@ -164,52 +164,18 @@ subroutine psb_icdasb(desc,info,ext_hv) !datatypes allocation data_ = psb_comm_halo_ call desc%get_list(data_,idx,totxch,idxr,idxs,info) - !Send/Gather - pnti = 1 - snd_pt = 1 - rcv_pt = 1 - allocate(desc%sendtypes(totxch), stat=info) - do i=1, totxch - nerv = idx(pnti+psb_n_elem_recv_) - nesd = idx(pnti+nerv+psb_n_elem_send_) - idx_pt = 1+pnti+nerv+psb_n_elem_send_ - allocate(blens(nesd),stat=info) - do j=1,nesd - blens(j) = 1 - end do - - call MPI_TYPE_INDEXED(nesd,blens,(idx(idx_pt:idx_pt+nesd-1)-1),& - & mpi_double_precision,desc%sendtypes(i),info) - call MPI_TYPE_COMMIT(desc%sendtypes(i),info) - deallocate(blens,stat=info) - snd_pt = snd_pt + nesd - pnti = pnti + nerv + nesd + 3 - end do - pnti = 1 - snd_pt = 1 - rcv_pt = 1 - !Recv/Scatter - allocate(desc%recvtypes(totxch), stat=info) - do i=1, totxch - proc_to_comm = idx(pnti+psb_proc_id_) - nerv = idx(pnti+psb_n_elem_recv_) - nesd = idx(pnti+nerv+psb_n_elem_send_) - idx_pt = 1+pnti+psb_n_elem_recv_ - allocate(blens(nerv),stat=info) - do j=1, nerv - blens(j) = 1 - end do - - call MPI_TYPE_INDEXED(nerv,blens,(idx(idx_pt:idx_pt+nerv-1)-1),& - & mpi_double_precision,desc%recvtypes(i),info) - call MPI_TYPE_COMMIT(desc%recvtypes(i),info) - deallocate(blens,stat=info) - - rcv_pt = rcv_pt + nerv - snd_pt = snd_pt + nesd - pnti = pnti + nerv + nesd + 3 - end do - + allocate(desc%sendtypes(totxch,psb_nkidx_),& + & desc%recvtypes(totxch,psb_nkidx_), stat=info) + if (info /= 0) then + info =psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + ! Init here, they will be filled in upon request + desc%sendtypes(:,:) = mpi_datatype_null + desc%recvtypes(:,:) = mpi_datatype_null + if (debug_level >= psb_debug_ext_) & & write(debug_unit,*) me,' ',trim(name),': Done'