Fixed to avoid calls when number of elements to be sent/received is 0;

this may happen with unsymmetric patterns.
psblas3-type-indexed
Salvatore Filippone 18 years ago
parent 2f0cfece0e
commit 2968c06cfb

@ -165,7 +165,8 @@ subroutine psi_dswapdatam(flag,n,beta,y,desc_a,work,info,data)
end if end if
end if end if
idxr = max(idxr,1)
idxs = max(idxs,1)
if((idxr+idxs) < size(work)) then if((idxr+idxs) < size(work)) then
sndbuf => work(1:idxs) sndbuf => work(1:idxs)
rcvbuf => work(idxs+1:idxs+idxr) rcvbuf => work(idxs+1:idxs+idxr)
@ -224,11 +225,15 @@ subroutine psi_dswapdatam(flag,n,beta,y,desc_a,work,info,data)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
if (proc_to_comm < me) then if (proc_to_comm < me) then
call psb_snd(ictxt,sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm) if (nesd>0) call psb_snd(ictxt,&
call psb_rcv(ictxt,rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm) & sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm)
if (nerv>0) call psb_rcv(ictxt,&
& rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm)
else if (proc_to_comm > me) then else if (proc_to_comm > me) then
call psb_rcv(ictxt,rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm) if (nerv>0) call psb_rcv(ictxt,&
call psb_snd(ictxt,sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm) & rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm)
if (nesd>0) call psb_snd(ictxt,&
& sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm)
end if end if
rcv_pt = rcv_pt + n*nerv rcv_pt = rcv_pt + n*nerv
@ -249,13 +254,13 @@ subroutine psi_dswapdatam(flag,n,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
if (nerv>0) then
p2ptag = krecvid(ictxt,proc_to_comm,me) p2ptag = krecvid(ictxt,proc_to_comm,me)
call psb_get_rank(prcid(i),ictxt,proc_to_comm) call psb_get_rank(prcid(i),ictxt,proc_to_comm)
call mpi_irecv(rcvbuf(rcv_pt),n*nerv,& call mpi_irecv(rcvbuf(rcv_pt),n*nerv,&
& mpi_double_precision,prcid(i),& & mpi_double_precision,prcid(i),&
& p2ptag, icomm,rvhd(i),iret) & p2ptag, icomm,rvhd(i),iret)
end if
rcv_pt = rcv_pt + n*nerv rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd snd_pt = snd_pt + n*nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -275,7 +280,7 @@ subroutine psi_dswapdatam(flag,n,beta,y,desc_a,work,info,data)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
p2ptag=ksendid(ictxt,proc_to_comm,me) p2ptag=ksendid(ictxt,proc_to_comm,me)
if (nesd>0) then
if (usersend) then if (usersend) then
call mpi_rsend(sndbuf(snd_pt),n*nesd,& call mpi_rsend(sndbuf(snd_pt),n*nesd,&
& mpi_double_precision,prcid(i),& & mpi_double_precision,prcid(i),&
@ -292,7 +297,7 @@ subroutine psi_dswapdatam(flag,n,beta,y,desc_a,work,info,data)
call psb_errpush(info,name,i_err=int_err) call psb_errpush(info,name,i_err=int_err)
goto 9999 goto 9999
end if end if
end if
rcv_pt = rcv_pt + n*nerv rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd snd_pt = snd_pt + n*nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -309,7 +314,7 @@ subroutine psi_dswapdatam(flag,n,beta,y,desc_a,work,info,data)
p2ptag = krecvid(ictxt,proc_to_comm,me) p2ptag = krecvid(ictxt,proc_to_comm,me)
if (proc_to_comm /= me) then if ((proc_to_comm /= me).and.(nerv>0)) then
call mpi_wait(rvhd(i),p2pstat,iret) call mpi_wait(rvhd(i),p2pstat,iret)
if(iret /= mpi_success) then if(iret /= mpi_success) then
int_err(1) = iret int_err(1) = iret
@ -332,7 +337,8 @@ subroutine psi_dswapdatam(flag,n,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
call psb_snd(ictxt,sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm) if (nesd>0) call psb_snd(ictxt,&
& sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm)
rcv_pt = rcv_pt + n*nerv rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd snd_pt = snd_pt + n*nesd
@ -350,7 +356,8 @@ subroutine psi_dswapdatam(flag,n,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
call psb_rcv(ictxt,rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm) if (nerv>0) call psb_rcv(ictxt,&
& rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm)
rcv_pt = rcv_pt + n*nerv rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd snd_pt = snd_pt + n*nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -581,7 +588,8 @@ subroutine psi_dswapdatav(flag,beta,y,desc_a,work,info,data)
end if end if
idxr = max(idxr,1)
idxs = max(idxs,1)
if((idxr+idxs) < size(work)) then if((idxr+idxs) < size(work)) then
sndbuf => work(1:idxs) sndbuf => work(1:idxs)
rcvbuf => work(idxs+1:idxs+idxr) rcvbuf => work(idxs+1:idxs+idxr)
@ -638,11 +646,15 @@ subroutine psi_dswapdatav(flag,beta,y,desc_a,work,info,data)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
if (proc_to_comm < me) then if (proc_to_comm < me) then
call psb_snd(ictxt,sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm) if (nesd>0) call psb_snd(ictxt,&
call psb_rcv(ictxt,rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm) & sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm)
if (nerv>0) call psb_rcv(ictxt,&
& rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm)
else if (proc_to_comm > me) then else if (proc_to_comm > me) then
call psb_rcv(ictxt,rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm) if (nerv>0) call psb_rcv(ictxt,&
call psb_snd(ictxt,sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm) & rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm)
if (nesd>0) call psb_snd(ictxt,&
& sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm)
end if end if
rcv_pt = rcv_pt + nerv rcv_pt = rcv_pt + nerv
snd_pt = snd_pt + nesd snd_pt = snd_pt + nesd
@ -661,11 +673,13 @@ subroutine psi_dswapdatav(flag,beta,y,desc_a,work,info,data)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
if (nerv>0) then
p2ptag = krecvid(ictxt,proc_to_comm,me) p2ptag = krecvid(ictxt,proc_to_comm,me)
call psb_get_rank(prcid(i),ictxt,proc_to_comm) call psb_get_rank(prcid(i),ictxt,proc_to_comm)
call mpi_irecv(rcvbuf(rcv_pt),nerv,& call mpi_irecv(rcvbuf(rcv_pt),nerv,&
& mpi_double_precision,prcid(i),& & mpi_double_precision,prcid(i),&
& p2ptag, icomm,rvhd(i),iret) & p2ptag, icomm,rvhd(i),iret)
end if
rcv_pt = rcv_pt + nerv rcv_pt = rcv_pt + nerv
snd_pt = snd_pt + nesd snd_pt = snd_pt + nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -685,6 +699,7 @@ subroutine psi_dswapdatav(flag,beta,y,desc_a,work,info,data)
p2ptag=ksendid(ictxt,proc_to_comm,me) p2ptag=ksendid(ictxt,proc_to_comm,me)
if (nesd>0) then
if (usersend) then if (usersend) then
call mpi_rsend(sndbuf(snd_pt),nesd,& call mpi_rsend(sndbuf(snd_pt),nesd,&
& mpi_double_precision,prcid(i),& & mpi_double_precision,prcid(i),&
@ -701,6 +716,7 @@ subroutine psi_dswapdatav(flag,beta,y,desc_a,work,info,data)
call psb_errpush(info,name,i_err=int_err) call psb_errpush(info,name,i_err=int_err)
goto 9999 goto 9999
end if end if
end if
rcv_pt = rcv_pt + nerv rcv_pt = rcv_pt + nerv
snd_pt = snd_pt + nesd snd_pt = snd_pt + nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -715,7 +731,7 @@ subroutine psi_dswapdatav(flag,beta,y,desc_a,work,info,data)
p2ptag = krecvid(ictxt,proc_to_comm,me) p2ptag = krecvid(ictxt,proc_to_comm,me)
if (proc_to_comm /= me) then if ((proc_to_comm /= me).and.(nerv>0)) then
call mpi_wait(rvhd(i),p2pstat,iret) call mpi_wait(rvhd(i),p2pstat,iret)
if(iret /= mpi_success) then if(iret /= mpi_success) then
int_err(1) = iret int_err(1) = iret
@ -737,7 +753,8 @@ subroutine psi_dswapdatav(flag,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
call psb_snd(ictxt,sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm) if (nesd>0) call psb_snd(ictxt,&
& sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm)
rcv_pt = rcv_pt + nerv rcv_pt = rcv_pt + nerv
snd_pt = snd_pt + nesd snd_pt = snd_pt + nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -752,7 +769,8 @@ subroutine psi_dswapdatav(flag,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
call psb_rcv(ictxt,rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm) if (nerv>0) call psb_rcv(ictxt,&
& rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm)
rcv_pt = rcv_pt + nerv rcv_pt = rcv_pt + nerv
snd_pt = snd_pt + nesd snd_pt = snd_pt + nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3

@ -161,6 +161,8 @@ subroutine psi_dswaptranm(flag,n,beta,y,desc_a,work,info,data)
end if end if
idxr = max(idxr,1)
idxs = max(idxs,1)
if((idxr+idxs) < size(work)) then if((idxr+idxs) < size(work)) then
sndbuf => work(1:idxs) sndbuf => work(1:idxs)
rcvbuf => work(idxs+1:idxs+idxr) rcvbuf => work(idxs+1:idxs+idxr)
@ -223,11 +225,15 @@ subroutine psi_dswaptranm(flag,n,beta,y,desc_a,work,info,data)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
if (proc_to_comm < me) then if (proc_to_comm < me) then
call psb_snd(ictxt,rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm) if (nerv>0) call psb_snd(ictxt,&
call psb_rcv(ictxt,sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm) & rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm)
if (nesd>0) call psb_rcv(ictxt,&
& sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm)
else if (proc_to_comm > me) then else if (proc_to_comm > me) then
call psb_rcv(ictxt,sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm) if (nesd>0) call psb_rcv(ictxt,&
call psb_snd(ictxt,rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm) & sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm)
if (nerv>0) call psb_snd(ictxt,&
& rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm)
end if end if
rcv_pt = rcv_pt + n*nerv rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd snd_pt = snd_pt + n*nesd
@ -246,11 +252,13 @@ subroutine psi_dswaptranm(flag,n,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
if (nesd>0) then
p2ptag = krecvid(ictxt,proc_to_comm,me) p2ptag = krecvid(ictxt,proc_to_comm,me)
call psb_get_rank(prcid(i),ictxt,proc_to_comm) call psb_get_rank(prcid(i),ictxt,proc_to_comm)
call mpi_irecv(sndbuf(snd_pt),n*nesd,& call mpi_irecv(sndbuf(snd_pt),n*nesd,&
& mpi_double_precision,prcid(i),& & mpi_double_precision,prcid(i),&
& p2ptag,icomm,rvhd(i),iret) & p2ptag,icomm,rvhd(i),iret)
end if
rcv_pt = rcv_pt + n*nerv rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd snd_pt = snd_pt + n*nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -267,8 +275,9 @@ subroutine psi_dswaptranm(flag,n,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
p2ptag=ksendid(ictxt,proc_to_comm,me)
if (nerv>0) then
p2ptag=ksendid(ictxt,proc_to_comm,me)
if (usersend) then if (usersend) then
call mpi_rsend(rcvbuf(rcv_pt),n*nerv,& call mpi_rsend(rcvbuf(rcv_pt),n*nerv,&
& mpi_double_precision,prcid(i),& & mpi_double_precision,prcid(i),&
@ -285,6 +294,7 @@ subroutine psi_dswaptranm(flag,n,beta,y,desc_a,work,info,data)
call psb_errpush(info,name,i_err=int_err) call psb_errpush(info,name,i_err=int_err)
goto 9999 goto 9999
end if end if
end if
rcv_pt = rcv_pt + n*nerv rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd snd_pt = snd_pt + n*nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -300,7 +310,7 @@ subroutine psi_dswaptranm(flag,n,beta,y,desc_a,work,info,data)
p2ptag = krecvid(ictxt,proc_to_comm,me) p2ptag = krecvid(ictxt,proc_to_comm,me)
if (proc_to_comm /= me) then if ((proc_to_comm /= me).and.(nesd>0)) then
call mpi_wait(rvhd(i),p2pstat,iret) call mpi_wait(rvhd(i),p2pstat,iret)
if(iret /= mpi_success) then if(iret /= mpi_success) then
int_err(1) = iret int_err(1) = iret
@ -322,7 +332,8 @@ subroutine psi_dswaptranm(flag,n,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
call psb_snd(ictxt,rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm) if (nerv>0) call psb_snd(ictxt,&
& rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm)
rcv_pt = rcv_pt + n*nerv rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd snd_pt = snd_pt + n*nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -338,7 +349,8 @@ subroutine psi_dswaptranm(flag,n,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
call psb_rcv(ictxt,sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm) if (nesd>0) call psb_rcv(ictxt,&
& sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm)
rcv_pt = rcv_pt + n*nerv rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd snd_pt = snd_pt + n*nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -564,6 +576,8 @@ subroutine psi_dswaptranv(flag,beta,y,desc_a,work,info,data)
idxr = max(idxr,1)
idxs = max(idxs,1)
if((idxr+idxs) < size(work)) then if((idxr+idxs) < size(work)) then
sndbuf => work(1:idxs) sndbuf => work(1:idxs)
rcvbuf => work(idxs+1:idxs+idxr) rcvbuf => work(idxs+1:idxs+idxr)
@ -625,11 +639,15 @@ subroutine psi_dswaptranv(flag,beta,y,desc_a,work,info,data)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
if (proc_to_comm < me) then if (proc_to_comm < me) then
call psb_snd(ictxt,rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm) if (nerv>0) call psb_snd(ictxt,&
call psb_rcv(ictxt,sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm) & rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm)
if (nesd>0) call psb_rcv(ictxt,&
& sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm)
else if (proc_to_comm > me) then else if (proc_to_comm > me) then
call psb_rcv(ictxt,sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm) if (nesd>0) call psb_rcv(ictxt,&
call psb_snd(ictxt,rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm) & sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm)
if (nerv>0) call psb_snd(ictxt,&
& rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm)
end if end if
rcv_pt = rcv_pt + nerv rcv_pt = rcv_pt + nerv
snd_pt = snd_pt + nesd snd_pt = snd_pt + nesd
@ -648,12 +666,13 @@ subroutine psi_dswaptranv(flag,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
if (nesd>0) then
p2ptag = krecvid(ictxt,proc_to_comm,me) p2ptag = krecvid(ictxt,proc_to_comm,me)
call psb_get_rank(prcid(i),ictxt,proc_to_comm) call psb_get_rank(prcid(i),ictxt,proc_to_comm)
call mpi_irecv(sndbuf(snd_pt),nesd,& call mpi_irecv(sndbuf(snd_pt),nesd,&
& mpi_double_precision,prcid(i),& & mpi_double_precision,prcid(i),&
& p2ptag,icomm,rvhd(i),iret) & p2ptag,icomm,rvhd(i),iret)
end if
rcv_pt = rcv_pt + nerv rcv_pt = rcv_pt + nerv
snd_pt = snd_pt + nesd snd_pt = snd_pt + nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -670,8 +689,9 @@ subroutine psi_dswaptranv(flag,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
p2ptag=ksendid(ictxt,proc_to_comm,me)
if (nerv>0) then
p2ptag=ksendid(ictxt,proc_to_comm,me)
if (usersend) then if (usersend) then
call mpi_rsend(rcvbuf(rcv_pt),nerv,& call mpi_rsend(rcvbuf(rcv_pt),nerv,&
& mpi_double_precision,prcid(i),& & mpi_double_precision,prcid(i),&
@ -688,6 +708,7 @@ subroutine psi_dswaptranv(flag,beta,y,desc_a,work,info,data)
call psb_errpush(info,name,i_err=int_err) call psb_errpush(info,name,i_err=int_err)
goto 9999 goto 9999
end if end if
end if
rcv_pt = rcv_pt + nerv rcv_pt = rcv_pt + nerv
snd_pt = snd_pt + nesd snd_pt = snd_pt + nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -702,7 +723,7 @@ subroutine psi_dswaptranv(flag,beta,y,desc_a,work,info,data)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
p2ptag = krecvid(ictxt,proc_to_comm,me) p2ptag = krecvid(ictxt,proc_to_comm,me)
if (proc_to_comm /= me) then if ((proc_to_comm /= me).and.(nesd>0)) then
call mpi_wait(rvhd(i),p2pstat,iret) call mpi_wait(rvhd(i),p2pstat,iret)
if(iret /= mpi_success) then if(iret /= mpi_success) then
int_err(1) = iret int_err(1) = iret
@ -724,7 +745,8 @@ subroutine psi_dswaptranv(flag,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
call psb_snd(ictxt,rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm) if (nerv>0) call psb_snd(ictxt,&
& rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm)
rcv_pt = rcv_pt + nerv rcv_pt = rcv_pt + nerv
snd_pt = snd_pt + nesd snd_pt = snd_pt + nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -740,7 +762,8 @@ subroutine psi_dswaptranv(flag,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
call psb_rcv(ictxt,sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm) if (nesd>0) call psb_rcv(ictxt,&
& sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm)
rcv_pt = rcv_pt + nerv rcv_pt = rcv_pt + nerv
snd_pt = snd_pt + nesd snd_pt = snd_pt + nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3

@ -165,7 +165,8 @@ subroutine psi_iswapdatam(flag,n,beta,y,desc_a,work,info,data)
end if end if
end if end if
idxr = max(idxr,1)
idxs = max(idxs,1)
if((idxr+idxs) < size(work)) then if((idxr+idxs) < size(work)) then
sndbuf => work(1:idxs) sndbuf => work(1:idxs)
rcvbuf => work(idxs+1:idxs+idxr) rcvbuf => work(idxs+1:idxs+idxr)
@ -224,11 +225,15 @@ subroutine psi_iswapdatam(flag,n,beta,y,desc_a,work,info,data)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
if (proc_to_comm < me) then if (proc_to_comm < me) then
call psb_snd(ictxt,sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm) if (nesd>0) call psb_snd(ictxt,&
call psb_rcv(ictxt,rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm) & sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm)
if (nerv>0) call psb_rcv(ictxt,&
& rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm)
else if (proc_to_comm > me) then else if (proc_to_comm > me) then
call psb_rcv(ictxt,rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm) if (nerv>0) call psb_rcv(ictxt,&
call psb_snd(ictxt,sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm) & rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm)
if (nesd>0) call psb_snd(ictxt,&
& sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm)
end if end if
rcv_pt = rcv_pt + n*nerv rcv_pt = rcv_pt + n*nerv
@ -249,13 +254,13 @@ subroutine psi_iswapdatam(flag,n,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
if (nerv>0) then
p2ptag = krecvid(ictxt,proc_to_comm,me) p2ptag = krecvid(ictxt,proc_to_comm,me)
call psb_get_rank(prcid(i),ictxt,proc_to_comm) call psb_get_rank(prcid(i),ictxt,proc_to_comm)
call mpi_irecv(rcvbuf(rcv_pt),n*nerv,& call mpi_irecv(rcvbuf(rcv_pt),n*nerv,&
& mpi_integer,prcid(i),& & mpi_integer,prcid(i),&
& p2ptag, icomm,rvhd(i),iret) & p2ptag, icomm,rvhd(i),iret)
end if
rcv_pt = rcv_pt + n*nerv rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd snd_pt = snd_pt + n*nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -275,7 +280,7 @@ subroutine psi_iswapdatam(flag,n,beta,y,desc_a,work,info,data)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
p2ptag=ksendid(ictxt,proc_to_comm,me) p2ptag=ksendid(ictxt,proc_to_comm,me)
if (nesd>0) then
if (usersend) then if (usersend) then
call mpi_rsend(sndbuf(snd_pt),n*nesd,& call mpi_rsend(sndbuf(snd_pt),n*nesd,&
& mpi_integer,prcid(i),& & mpi_integer,prcid(i),&
@ -292,7 +297,7 @@ subroutine psi_iswapdatam(flag,n,beta,y,desc_a,work,info,data)
call psb_errpush(info,name,i_err=int_err) call psb_errpush(info,name,i_err=int_err)
goto 9999 goto 9999
end if end if
end if
rcv_pt = rcv_pt + n*nerv rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd snd_pt = snd_pt + n*nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -309,7 +314,7 @@ subroutine psi_iswapdatam(flag,n,beta,y,desc_a,work,info,data)
p2ptag = krecvid(ictxt,proc_to_comm,me) p2ptag = krecvid(ictxt,proc_to_comm,me)
if (proc_to_comm /= me) then if ((proc_to_comm /= me).and.(nerv>0)) then
call mpi_wait(rvhd(i),p2pstat,iret) call mpi_wait(rvhd(i),p2pstat,iret)
if(iret /= mpi_success) then if(iret /= mpi_success) then
int_err(1) = iret int_err(1) = iret
@ -332,7 +337,8 @@ subroutine psi_iswapdatam(flag,n,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
call psb_snd(ictxt,sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm) if (nesd>0) call psb_snd(ictxt,&
& sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm)
rcv_pt = rcv_pt + n*nerv rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd snd_pt = snd_pt + n*nesd
@ -350,7 +356,8 @@ subroutine psi_iswapdatam(flag,n,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
call psb_rcv(ictxt,rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm) if (nerv>0) call psb_rcv(ictxt,&
& rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm)
rcv_pt = rcv_pt + n*nerv rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd snd_pt = snd_pt + n*nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -581,7 +588,8 @@ subroutine psi_iswapdatav(flag,beta,y,desc_a,work,info,data)
end if end if
idxr = max(idxr,1)
idxs = max(idxs,1)
if((idxr+idxs) < size(work)) then if((idxr+idxs) < size(work)) then
sndbuf => work(1:idxs) sndbuf => work(1:idxs)
rcvbuf => work(idxs+1:idxs+idxr) rcvbuf => work(idxs+1:idxs+idxr)
@ -638,11 +646,15 @@ subroutine psi_iswapdatav(flag,beta,y,desc_a,work,info,data)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
if (proc_to_comm < me) then if (proc_to_comm < me) then
call psb_snd(ictxt,sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm) if (nesd>0) call psb_snd(ictxt,&
call psb_rcv(ictxt,rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm) & sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm)
if (nerv>0) call psb_rcv(ictxt,&
& rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm)
else if (proc_to_comm > me) then else if (proc_to_comm > me) then
call psb_rcv(ictxt,rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm) if (nerv>0) call psb_rcv(ictxt,&
call psb_snd(ictxt,sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm) & rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm)
if (nesd>0) call psb_snd(ictxt,&
& sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm)
end if end if
rcv_pt = rcv_pt + nerv rcv_pt = rcv_pt + nerv
snd_pt = snd_pt + nesd snd_pt = snd_pt + nesd
@ -661,11 +673,13 @@ subroutine psi_iswapdatav(flag,beta,y,desc_a,work,info,data)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
if (nerv>0) then
p2ptag = krecvid(ictxt,proc_to_comm,me) p2ptag = krecvid(ictxt,proc_to_comm,me)
call psb_get_rank(prcid(i),ictxt,proc_to_comm) call psb_get_rank(prcid(i),ictxt,proc_to_comm)
call mpi_irecv(rcvbuf(rcv_pt),nerv,& call mpi_irecv(rcvbuf(rcv_pt),nerv,&
& mpi_integer,prcid(i),& & mpi_integer,prcid(i),&
& p2ptag, icomm,rvhd(i),iret) & p2ptag, icomm,rvhd(i),iret)
end if
rcv_pt = rcv_pt + nerv rcv_pt = rcv_pt + nerv
snd_pt = snd_pt + nesd snd_pt = snd_pt + nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -685,6 +699,7 @@ subroutine psi_iswapdatav(flag,beta,y,desc_a,work,info,data)
p2ptag=ksendid(ictxt,proc_to_comm,me) p2ptag=ksendid(ictxt,proc_to_comm,me)
if (nesd>0) then
if (usersend) then if (usersend) then
call mpi_rsend(sndbuf(snd_pt),nesd,& call mpi_rsend(sndbuf(snd_pt),nesd,&
& mpi_integer,prcid(i),& & mpi_integer,prcid(i),&
@ -701,6 +716,7 @@ subroutine psi_iswapdatav(flag,beta,y,desc_a,work,info,data)
call psb_errpush(info,name,i_err=int_err) call psb_errpush(info,name,i_err=int_err)
goto 9999 goto 9999
end if end if
end if
rcv_pt = rcv_pt + nerv rcv_pt = rcv_pt + nerv
snd_pt = snd_pt + nesd snd_pt = snd_pt + nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -715,7 +731,7 @@ subroutine psi_iswapdatav(flag,beta,y,desc_a,work,info,data)
p2ptag = krecvid(ictxt,proc_to_comm,me) p2ptag = krecvid(ictxt,proc_to_comm,me)
if (proc_to_comm /= me) then if ((proc_to_comm /= me).and.(nerv>0)) then
call mpi_wait(rvhd(i),p2pstat,iret) call mpi_wait(rvhd(i),p2pstat,iret)
if(iret /= mpi_success) then if(iret /= mpi_success) then
int_err(1) = iret int_err(1) = iret
@ -737,7 +753,8 @@ subroutine psi_iswapdatav(flag,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
call psb_snd(ictxt,sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm) if (nesd>0) call psb_snd(ictxt,&
& sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm)
rcv_pt = rcv_pt + nerv rcv_pt = rcv_pt + nerv
snd_pt = snd_pt + nesd snd_pt = snd_pt + nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -752,7 +769,8 @@ subroutine psi_iswapdatav(flag,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
call psb_rcv(ictxt,rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm) if (nerv>0) call psb_rcv(ictxt,&
& rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm)
rcv_pt = rcv_pt + nerv rcv_pt = rcv_pt + nerv
snd_pt = snd_pt + nesd snd_pt = snd_pt + nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3

@ -161,6 +161,8 @@ subroutine psi_iswaptranm(flag,n,beta,y,desc_a,work,info,data)
end if end if
idxr = max(idxr,1)
idxs = max(idxs,1)
if((idxr+idxs) < size(work)) then if((idxr+idxs) < size(work)) then
sndbuf => work(1:idxs) sndbuf => work(1:idxs)
rcvbuf => work(idxs+1:idxs+idxr) rcvbuf => work(idxs+1:idxs+idxr)
@ -223,11 +225,15 @@ subroutine psi_iswaptranm(flag,n,beta,y,desc_a,work,info,data)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
if (proc_to_comm < me) then if (proc_to_comm < me) then
call psb_snd(ictxt,rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm) if (nerv>0) call psb_snd(ictxt,&
call psb_rcv(ictxt,sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm) & rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm)
if (nesd>0) call psb_rcv(ictxt,&
& sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm)
else if (proc_to_comm > me) then else if (proc_to_comm > me) then
call psb_rcv(ictxt,sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm) if (nesd>0) call psb_rcv(ictxt,&
call psb_snd(ictxt,rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm) & sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm)
if (nerv>0) call psb_snd(ictxt,&
& rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm)
end if end if
rcv_pt = rcv_pt + n*nerv rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd snd_pt = snd_pt + n*nesd
@ -246,11 +252,13 @@ subroutine psi_iswaptranm(flag,n,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
if (nesd>0) then
p2ptag = krecvid(ictxt,proc_to_comm,me) p2ptag = krecvid(ictxt,proc_to_comm,me)
call psb_get_rank(prcid(i),ictxt,proc_to_comm) call psb_get_rank(prcid(i),ictxt,proc_to_comm)
call mpi_irecv(sndbuf(snd_pt),n*nesd,& call mpi_irecv(sndbuf(snd_pt),n*nesd,&
& mpi_integer,prcid(i),& & mpi_integer,prcid(i),&
& p2ptag,icomm,rvhd(i),iret) & p2ptag,icomm,rvhd(i),iret)
end if
rcv_pt = rcv_pt + n*nerv rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd snd_pt = snd_pt + n*nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -267,8 +275,9 @@ subroutine psi_iswaptranm(flag,n,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
p2ptag=ksendid(ictxt,proc_to_comm,me)
if (nerv>0) then
p2ptag=ksendid(ictxt,proc_to_comm,me)
if (usersend) then if (usersend) then
call mpi_rsend(rcvbuf(rcv_pt),n*nerv,& call mpi_rsend(rcvbuf(rcv_pt),n*nerv,&
& mpi_integer,prcid(i),& & mpi_integer,prcid(i),&
@ -285,6 +294,7 @@ subroutine psi_iswaptranm(flag,n,beta,y,desc_a,work,info,data)
call psb_errpush(info,name,i_err=int_err) call psb_errpush(info,name,i_err=int_err)
goto 9999 goto 9999
end if end if
end if
rcv_pt = rcv_pt + n*nerv rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd snd_pt = snd_pt + n*nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -300,7 +310,7 @@ subroutine psi_iswaptranm(flag,n,beta,y,desc_a,work,info,data)
p2ptag = krecvid(ictxt,proc_to_comm,me) p2ptag = krecvid(ictxt,proc_to_comm,me)
if (proc_to_comm /= me) then if ((proc_to_comm /= me).and.(nesd>0)) then
call mpi_wait(rvhd(i),p2pstat,iret) call mpi_wait(rvhd(i),p2pstat,iret)
if(iret /= mpi_success) then if(iret /= mpi_success) then
int_err(1) = iret int_err(1) = iret
@ -322,7 +332,8 @@ subroutine psi_iswaptranm(flag,n,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
call psb_snd(ictxt,rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm) if (nerv>0) call psb_snd(ictxt,&
& rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm)
rcv_pt = rcv_pt + n*nerv rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd snd_pt = snd_pt + n*nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -338,7 +349,8 @@ subroutine psi_iswaptranm(flag,n,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
call psb_rcv(ictxt,sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm) if (nesd>0) call psb_rcv(ictxt,&
& sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm)
rcv_pt = rcv_pt + n*nerv rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd snd_pt = snd_pt + n*nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -346,9 +358,8 @@ subroutine psi_iswaptranm(flag,n,beta,y,desc_a,work,info,data)
end if end if
if (do_recv) then if (do_recv) then
pnti = 1 pnti = 1
snd_pt = 1 snd_pt = 1
rcv_pt = 1 rcv_pt = 1
@ -564,6 +575,8 @@ subroutine psi_iswaptranv(flag,beta,y,desc_a,work,info,data)
idxr = max(idxr,1)
idxs = max(idxs,1)
if((idxr+idxs) < size(work)) then if((idxr+idxs) < size(work)) then
sndbuf => work(1:idxs) sndbuf => work(1:idxs)
rcvbuf => work(idxs+1:idxs+idxr) rcvbuf => work(idxs+1:idxs+idxr)
@ -625,11 +638,15 @@ subroutine psi_iswaptranv(flag,beta,y,desc_a,work,info,data)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
if (proc_to_comm < me) then if (proc_to_comm < me) then
call psb_snd(ictxt,rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm) if (nerv>0) call psb_snd(ictxt,&
call psb_rcv(ictxt,sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm) & rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm)
if (nesd>0) call psb_rcv(ictxt,&
& sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm)
else if (proc_to_comm > me) then else if (proc_to_comm > me) then
call psb_rcv(ictxt,sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm) if (nesd>0) call psb_rcv(ictxt,&
call psb_snd(ictxt,rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm) & sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm)
if (nerv>0) call psb_snd(ictxt,&
& rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm)
end if end if
rcv_pt = rcv_pt + nerv rcv_pt = rcv_pt + nerv
snd_pt = snd_pt + nesd snd_pt = snd_pt + nesd
@ -648,12 +665,13 @@ subroutine psi_iswaptranv(flag,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
if (nesd>0) then
p2ptag = krecvid(ictxt,proc_to_comm,me) p2ptag = krecvid(ictxt,proc_to_comm,me)
call psb_get_rank(prcid(i),ictxt,proc_to_comm) call psb_get_rank(prcid(i),ictxt,proc_to_comm)
call mpi_irecv(sndbuf(snd_pt),nesd,& call mpi_irecv(sndbuf(snd_pt),nesd,&
& mpi_integer,prcid(i),& & mpi_integer,prcid(i),&
& p2ptag,icomm,rvhd(i),iret) & p2ptag,icomm,rvhd(i),iret)
end if
rcv_pt = rcv_pt + nerv rcv_pt = rcv_pt + nerv
snd_pt = snd_pt + nesd snd_pt = snd_pt + nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -670,8 +688,9 @@ subroutine psi_iswaptranv(flag,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
p2ptag=ksendid(ictxt,proc_to_comm,me)
if (nerv>0) then
p2ptag=ksendid(ictxt,proc_to_comm,me)
if (usersend) then if (usersend) then
call mpi_rsend(rcvbuf(rcv_pt),nerv,& call mpi_rsend(rcvbuf(rcv_pt),nerv,&
& mpi_integer,prcid(i),& & mpi_integer,prcid(i),&
@ -688,6 +707,7 @@ subroutine psi_iswaptranv(flag,beta,y,desc_a,work,info,data)
call psb_errpush(info,name,i_err=int_err) call psb_errpush(info,name,i_err=int_err)
goto 9999 goto 9999
end if end if
end if
rcv_pt = rcv_pt + nerv rcv_pt = rcv_pt + nerv
snd_pt = snd_pt + nesd snd_pt = snd_pt + nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -702,7 +722,7 @@ subroutine psi_iswaptranv(flag,beta,y,desc_a,work,info,data)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
p2ptag = krecvid(ictxt,proc_to_comm,me) p2ptag = krecvid(ictxt,proc_to_comm,me)
if (proc_to_comm /= me) then if ((proc_to_comm /= me).and.(nesd>0)) then
call mpi_wait(rvhd(i),p2pstat,iret) call mpi_wait(rvhd(i),p2pstat,iret)
if(iret /= mpi_success) then if(iret /= mpi_success) then
int_err(1) = iret int_err(1) = iret
@ -724,7 +744,8 @@ subroutine psi_iswaptranv(flag,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
call psb_snd(ictxt,rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm) if (nerv>0) call psb_snd(ictxt,&
& rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm)
rcv_pt = rcv_pt + nerv rcv_pt = rcv_pt + nerv
snd_pt = snd_pt + nesd snd_pt = snd_pt + nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -740,7 +761,8 @@ subroutine psi_iswaptranv(flag,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
call psb_rcv(ictxt,sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm) if (nesd>0) call psb_rcv(ictxt,&
& sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm)
rcv_pt = rcv_pt + nerv rcv_pt = rcv_pt + nerv
snd_pt = snd_pt + nesd snd_pt = snd_pt + nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3

@ -225,11 +225,15 @@ subroutine psi_zswapdatam(flag,n,beta,y,desc_a,work,info,data)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
if (proc_to_comm < me) then if (proc_to_comm < me) then
call psb_snd(ictxt,sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm) if (nesd>0) call psb_snd(ictxt,&
call psb_rcv(ictxt,rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm) & sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm)
if (nerv>0) call psb_rcv(ictxt,&
& rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm)
else if (proc_to_comm > me) then else if (proc_to_comm > me) then
call psb_rcv(ictxt,rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm) if (nerv>0) call psb_rcv(ictxt,&
call psb_snd(ictxt,sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm) & rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm)
if (nesd>0) call psb_snd(ictxt,&
& sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm)
end if end if
rcv_pt = rcv_pt + n*nerv rcv_pt = rcv_pt + n*nerv
@ -250,13 +254,13 @@ subroutine psi_zswapdatam(flag,n,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
if (nerv > 0) then
p2ptag = krecvid(ictxt,proc_to_comm,me) p2ptag = krecvid(ictxt,proc_to_comm,me)
call psb_get_rank(prcid(i),ictxt,proc_to_comm) call psb_get_rank(prcid(i),ictxt,proc_to_comm)
call mpi_irecv(rcvbuf(rcv_pt),n*nerv,& call mpi_irecv(rcvbuf(rcv_pt),n*nerv,&
& mpi_double_complex,prcid(i),& & mpi_double_complex,prcid(i),&
& p2ptag, icomm,rvhd(i),iret) & p2ptag, icomm,rvhd(i),iret)
endif
rcv_pt = rcv_pt + n*nerv rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd snd_pt = snd_pt + n*nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -276,7 +280,7 @@ subroutine psi_zswapdatam(flag,n,beta,y,desc_a,work,info,data)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
p2ptag=ksendid(ictxt,proc_to_comm,me) p2ptag=ksendid(ictxt,proc_to_comm,me)
if (nesd>0) then
if (usersend) then if (usersend) then
call mpi_rsend(sndbuf(snd_pt),n*nesd,& call mpi_rsend(sndbuf(snd_pt),n*nesd,&
& mpi_double_complex,prcid(i),& & mpi_double_complex,prcid(i),&
@ -293,7 +297,7 @@ subroutine psi_zswapdatam(flag,n,beta,y,desc_a,work,info,data)
call psb_errpush(info,name,i_err=int_err) call psb_errpush(info,name,i_err=int_err)
goto 9999 goto 9999
end if end if
end if
rcv_pt = rcv_pt + n*nerv rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd snd_pt = snd_pt + n*nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -310,7 +314,7 @@ subroutine psi_zswapdatam(flag,n,beta,y,desc_a,work,info,data)
p2ptag = krecvid(ictxt,proc_to_comm,me) p2ptag = krecvid(ictxt,proc_to_comm,me)
if (proc_to_comm /= me) then if ((proc_to_comm /= me).and.(nerv>0)) then
call mpi_wait(rvhd(i),p2pstat,iret) call mpi_wait(rvhd(i),p2pstat,iret)
if(iret /= mpi_success) then if(iret /= mpi_success) then
int_err(1) = iret int_err(1) = iret
@ -333,7 +337,8 @@ subroutine psi_zswapdatam(flag,n,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
call psb_snd(ictxt,sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm) if (nesd>0) call psb_snd(ictxt,&
& sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm)
rcv_pt = rcv_pt + n*nerv rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd snd_pt = snd_pt + n*nesd
@ -351,7 +356,8 @@ subroutine psi_zswapdatam(flag,n,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
call psb_rcv(ictxt,rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm) if (nerv>0) call psb_rcv(ictxt,&
& rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm)
rcv_pt = rcv_pt + n*nerv rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd snd_pt = snd_pt + n*nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -640,11 +646,15 @@ subroutine psi_zswapdatav(flag,beta,y,desc_a,work,info,data)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
if (proc_to_comm < me) then if (proc_to_comm < me) then
call psb_snd(ictxt,sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm) if (nesd>0) call psb_snd(ictxt,&
call psb_rcv(ictxt,rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm) & sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm)
if (nerv>0) call psb_rcv(ictxt,&
& rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm)
else if (proc_to_comm > me) then else if (proc_to_comm > me) then
call psb_rcv(ictxt,rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm) if (nerv>0) call psb_rcv(ictxt,&
call psb_snd(ictxt,sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm) & rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm)
if (nesd>0) call psb_snd(ictxt,&
& sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm)
end if end if
rcv_pt = rcv_pt + nerv rcv_pt = rcv_pt + nerv
snd_pt = snd_pt + nesd snd_pt = snd_pt + nesd
@ -663,11 +673,13 @@ subroutine psi_zswapdatav(flag,beta,y,desc_a,work,info,data)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
if (nerv>0) then
p2ptag = krecvid(ictxt,proc_to_comm,me) p2ptag = krecvid(ictxt,proc_to_comm,me)
call psb_get_rank(prcid(i),ictxt,proc_to_comm) call psb_get_rank(prcid(i),ictxt,proc_to_comm)
call mpi_irecv(rcvbuf(rcv_pt),nerv,& call mpi_irecv(rcvbuf(rcv_pt),nerv,&
& mpi_double_complex,prcid(i),& & mpi_double_complex,prcid(i),&
& p2ptag, icomm,rvhd(i),iret) & p2ptag, icomm,rvhd(i),iret)
endif
rcv_pt = rcv_pt + nerv rcv_pt = rcv_pt + nerv
snd_pt = snd_pt + nesd snd_pt = snd_pt + nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -687,6 +699,7 @@ subroutine psi_zswapdatav(flag,beta,y,desc_a,work,info,data)
p2ptag=ksendid(ictxt,proc_to_comm,me) p2ptag=ksendid(ictxt,proc_to_comm,me)
if (nesd>0) then
if (usersend) then if (usersend) then
call mpi_rsend(sndbuf(snd_pt),nesd,& call mpi_rsend(sndbuf(snd_pt),nesd,&
& mpi_double_complex,prcid(i),& & mpi_double_complex,prcid(i),&
@ -703,6 +716,7 @@ subroutine psi_zswapdatav(flag,beta,y,desc_a,work,info,data)
call psb_errpush(info,name,i_err=int_err) call psb_errpush(info,name,i_err=int_err)
goto 9999 goto 9999
end if end if
end if
rcv_pt = rcv_pt + nerv rcv_pt = rcv_pt + nerv
snd_pt = snd_pt + nesd snd_pt = snd_pt + nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -717,7 +731,7 @@ subroutine psi_zswapdatav(flag,beta,y,desc_a,work,info,data)
p2ptag = krecvid(ictxt,proc_to_comm,me) p2ptag = krecvid(ictxt,proc_to_comm,me)
if (proc_to_comm /= me) then if ((proc_to_comm /= me).and.(nerv>0)) then
call mpi_wait(rvhd(i),p2pstat,iret) call mpi_wait(rvhd(i),p2pstat,iret)
if(iret /= mpi_success) then if(iret /= mpi_success) then
int_err(1) = iret int_err(1) = iret
@ -739,7 +753,8 @@ subroutine psi_zswapdatav(flag,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
call psb_snd(ictxt,sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm) if (nesd>0) call psb_snd(ictxt,&
& sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm)
rcv_pt = rcv_pt + nerv rcv_pt = rcv_pt + nerv
snd_pt = snd_pt + nesd snd_pt = snd_pt + nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -754,7 +769,8 @@ subroutine psi_zswapdatav(flag,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
call psb_rcv(ictxt,rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm) if (nerv>0) call psb_rcv(ictxt,&
& rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm)
rcv_pt = rcv_pt + nerv rcv_pt = rcv_pt + nerv
snd_pt = snd_pt + nesd snd_pt = snd_pt + nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3

@ -161,6 +161,8 @@ subroutine psi_zswaptranm(flag,n,beta,y,desc_a,work,info,data)
end if end if
idxr = max(idxr,1)
idxs = max(idxs,1)
if((idxr+idxs) < size(work)) then if((idxr+idxs) < size(work)) then
sndbuf => work(1:idxs) sndbuf => work(1:idxs)
rcvbuf => work(idxs+1:idxs+idxr) rcvbuf => work(idxs+1:idxs+idxr)
@ -223,11 +225,15 @@ subroutine psi_zswaptranm(flag,n,beta,y,desc_a,work,info,data)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
if (proc_to_comm < me) then if (proc_to_comm < me) then
call psb_snd(ictxt,rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm) if (nerv>0) call psb_snd(ictxt,&
call psb_rcv(ictxt,sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm) & rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm)
if (nesd>0) call psb_rcv(ictxt,&
& sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm)
else if (proc_to_comm > me) then else if (proc_to_comm > me) then
call psb_rcv(ictxt,sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm) if (nesd>0) call psb_rcv(ictxt,&
call psb_snd(ictxt,rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm) & sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm)
if (nerv>0) call psb_snd(ictxt,&
& rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm)
end if end if
rcv_pt = rcv_pt + n*nerv rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd snd_pt = snd_pt + n*nesd
@ -246,11 +252,16 @@ subroutine psi_zswaptranm(flag,n,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
if (nesd>0) then
p2ptag = krecvid(ictxt,proc_to_comm,me) p2ptag = krecvid(ictxt,proc_to_comm,me)
call psb_get_rank(prcid(i),ictxt,proc_to_comm) call psb_get_rank(prcid(i),ictxt,proc_to_comm)
call mpi_irecv(sndbuf(snd_pt),n*nesd,& call mpi_irecv(sndbuf(snd_pt),n*nesd,&
& mpi_double_complex,prcid(i),& & mpi_double_complex,prcid(i),&
& p2ptag,icomm,rvhd(i),iret) & p2ptag,icomm,rvhd(i),iret)
end if
rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd
pnti = pnti + nerv + nesd + 3
end do end do
@ -264,8 +275,9 @@ subroutine psi_zswaptranm(flag,n,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
p2ptag=ksendid(ictxt,proc_to_comm,me)
if (nerv>0) then
p2ptag=ksendid(ictxt,proc_to_comm,me)
if (usersend) then if (usersend) then
call mpi_rsend(rcvbuf(rcv_pt),n*nerv,& call mpi_rsend(rcvbuf(rcv_pt),n*nerv,&
& mpi_double_complex,prcid(i),& & mpi_double_complex,prcid(i),&
@ -282,6 +294,7 @@ subroutine psi_zswaptranm(flag,n,beta,y,desc_a,work,info,data)
call psb_errpush(info,name,i_err=int_err) call psb_errpush(info,name,i_err=int_err)
goto 9999 goto 9999
end if end if
end if
rcv_pt = rcv_pt + n*nerv rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd snd_pt = snd_pt + n*nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -297,7 +310,7 @@ subroutine psi_zswaptranm(flag,n,beta,y,desc_a,work,info,data)
p2ptag = krecvid(ictxt,proc_to_comm,me) p2ptag = krecvid(ictxt,proc_to_comm,me)
if (proc_to_comm /= me) then if ((proc_to_comm /= me).and.(nesd>0)) then
call mpi_wait(rvhd(i),p2pstat,iret) call mpi_wait(rvhd(i),p2pstat,iret)
if(iret /= mpi_success) then if(iret /= mpi_success) then
int_err(1) = iret int_err(1) = iret
@ -309,7 +322,9 @@ subroutine psi_zswaptranm(flag,n,beta,y,desc_a,work,info,data)
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
end do end do
else if (swap_send) then else if (swap_send) then
pnti = 1 pnti = 1
snd_pt = 1 snd_pt = 1
rcv_pt = 1 rcv_pt = 1
@ -317,10 +332,12 @@ subroutine psi_zswaptranm(flag,n,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
call psb_snd(ictxt,rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm) if (nerv>0) call psb_snd(ictxt,&
& rcvbuf(rcv_pt:rcv_pt+n*nerv-1), proc_to_comm)
rcv_pt = rcv_pt + n*nerv rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd snd_pt = snd_pt + n*nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
end do end do
else if (swap_recv) then else if (swap_recv) then
@ -332,7 +349,8 @@ subroutine psi_zswaptranm(flag,n,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
call psb_rcv(ictxt,sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm) if (nesd>0) call psb_rcv(ictxt,&
& sndbuf(snd_pt:snd_pt+n*nesd-1), proc_to_comm)
rcv_pt = rcv_pt + n*nerv rcv_pt = rcv_pt + n*nerv
snd_pt = snd_pt + n*nesd snd_pt = snd_pt + n*nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -340,7 +358,6 @@ subroutine psi_zswaptranm(flag,n,beta,y,desc_a,work,info,data)
end if end if
if (do_recv) then if (do_recv) then
pnti = 1 pnti = 1
@ -558,6 +575,8 @@ subroutine psi_zswaptranv(flag,beta,y,desc_a,work,info,data)
idxr = max(idxr,1)
idxs = max(idxs,1)
if((idxr+idxs) < size(work)) then if((idxr+idxs) < size(work)) then
sndbuf => work(1:idxs) sndbuf => work(1:idxs)
rcvbuf => work(idxs+1:idxs+idxr) rcvbuf => work(idxs+1:idxs+idxr)
@ -619,11 +638,15 @@ subroutine psi_zswaptranv(flag,beta,y,desc_a,work,info,data)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
if (proc_to_comm < me) then if (proc_to_comm < me) then
call psb_snd(ictxt,rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm) if (nerv>0) call psb_snd(ictxt,&
call psb_rcv(ictxt,sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm) & rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm)
if (nesd>0) call psb_rcv(ictxt,&
& sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm)
else if (proc_to_comm > me) then else if (proc_to_comm > me) then
call psb_rcv(ictxt,sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm) if (nesd>0) call psb_rcv(ictxt,&
call psb_snd(ictxt,rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm) & sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm)
if (nerv>0) call psb_snd(ictxt,&
& rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm)
end if end if
rcv_pt = rcv_pt + nerv rcv_pt = rcv_pt + nerv
snd_pt = snd_pt + nesd snd_pt = snd_pt + nesd
@ -642,12 +665,13 @@ subroutine psi_zswaptranv(flag,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
if (nesd>0) then
p2ptag = krecvid(ictxt,proc_to_comm,me) p2ptag = krecvid(ictxt,proc_to_comm,me)
call psb_get_rank(prcid(i),ictxt,proc_to_comm) call psb_get_rank(prcid(i),ictxt,proc_to_comm)
call mpi_irecv(sndbuf(snd_pt),nesd,& call mpi_irecv(sndbuf(snd_pt),nesd,&
& mpi_double_complex,prcid(i),& & mpi_double_complex,prcid(i),&
& p2ptag,icomm,rvhd(i),iret) & p2ptag,icomm,rvhd(i),iret)
end if
rcv_pt = rcv_pt + nerv rcv_pt = rcv_pt + nerv
snd_pt = snd_pt + nesd snd_pt = snd_pt + nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -664,8 +688,9 @@ subroutine psi_zswaptranv(flag,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
p2ptag=ksendid(ictxt,proc_to_comm,me)
if (nerv>0) then
p2ptag=ksendid(ictxt,proc_to_comm,me)
if (usersend) then if (usersend) then
call mpi_rsend(rcvbuf(rcv_pt),nerv,& call mpi_rsend(rcvbuf(rcv_pt),nerv,&
& mpi_double_complex,prcid(i),& & mpi_double_complex,prcid(i),&
@ -682,6 +707,7 @@ subroutine psi_zswaptranv(flag,beta,y,desc_a,work,info,data)
call psb_errpush(info,name,i_err=int_err) call psb_errpush(info,name,i_err=int_err)
goto 9999 goto 9999
end if end if
end if
rcv_pt = rcv_pt + nerv rcv_pt = rcv_pt + nerv
snd_pt = snd_pt + nesd snd_pt = snd_pt + nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -696,7 +722,7 @@ subroutine psi_zswaptranv(flag,beta,y,desc_a,work,info,data)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
p2ptag = krecvid(ictxt,proc_to_comm,me) p2ptag = krecvid(ictxt,proc_to_comm,me)
if (proc_to_comm /= me) then if ((proc_to_comm /= me).and.(nesd>0)) then
call mpi_wait(rvhd(i),p2pstat,iret) call mpi_wait(rvhd(i),p2pstat,iret)
if(iret /= mpi_success) then if(iret /= mpi_success) then
int_err(1) = iret int_err(1) = iret
@ -718,7 +744,8 @@ subroutine psi_zswaptranv(flag,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
call psb_snd(ictxt,rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm) if (nerv>0) call psb_snd(ictxt,&
& rcvbuf(rcv_pt:rcv_pt+nerv-1), proc_to_comm)
rcv_pt = rcv_pt + nerv rcv_pt = rcv_pt + nerv
snd_pt = snd_pt + nesd snd_pt = snd_pt + nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -734,7 +761,8 @@ subroutine psi_zswaptranv(flag,beta,y,desc_a,work,info,data)
proc_to_comm = d_idx(pnti+psb_proc_id_) proc_to_comm = d_idx(pnti+psb_proc_id_)
nerv = d_idx(pnti+psb_n_elem_recv_) nerv = d_idx(pnti+psb_n_elem_recv_)
nesd = d_idx(pnti+nerv+psb_n_elem_send_) nesd = d_idx(pnti+nerv+psb_n_elem_send_)
call psb_rcv(ictxt,sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm) if (nesd>0) call psb_rcv(ictxt,&
& sndbuf(snd_pt:snd_pt+nesd-1), proc_to_comm)
rcv_pt = rcv_pt + nerv rcv_pt = rcv_pt + nerv
snd_pt = snd_pt + nesd snd_pt = snd_pt + nesd
pnti = pnti + nerv + nesd + 3 pnti = pnti + nerv + nesd + 3
@ -770,6 +798,10 @@ subroutine psi_zswaptranv(flag,beta,y,desc_a,work,info,data)
else else
deallocate(rvhd,prcid,stat=info) deallocate(rvhd,prcid,stat=info)
end if end if
if(info /= 0) then
call psb_errpush(4000,name)
goto 9999
end if
if(albf) deallocate(sndbuf,rcvbuf,stat=info) if(albf) deallocate(sndbuf,rcvbuf,stat=info)
if(info /= 0) then if(info /= 0) then
call psb_errpush(4000,name) call psb_errpush(4000,name)

Loading…
Cancel
Save