diff --git a/base/internals/psi_desc_impl.f90 b/base/internals/psi_desc_impl.f90 index deaf2a67..1d465878 100644 --- a/base/internals/psi_desc_impl.f90 +++ b/base/internals/psi_desc_impl.f90 @@ -110,6 +110,7 @@ subroutine psi_cnv_v2xch(ictxt, vidx_in, xch_idx,info) if (info == 0) call psb_realloc(nsnd,xch_idx%loc_snd_idx,info) if (info == 0) call psb_realloc(nrcv,xch_idx%loc_rcv_idx,info) if (info == 0) call psb_realloc(nrcv,xch_idx%rmt_rcv_idx,info) + if (info == 0) call psb_realloc(nsnd,xch_idx%rmt_snd_idx,info) if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') @@ -137,11 +138,11 @@ subroutine psi_cnv_v2xch(ictxt, vidx_in, xch_idx,info) if (vidx_in(ip) == -1) exit xch_idx%prcs_xch(ixch) = vidx_in(ip) nerv = vidx_in(ip+psb_n_elem_recv_) -!!$ write(*,*) 'Check on receive option ',ip,nerv,xch_idx%loc_rcv_bnd(ixch) + write(*,*) 'Check on receive option ',ip,nerv,xch_idx%loc_rcv_bnd(ixch) xch_idx%loc_rcv_idx(xch_idx%loc_rcv_bnd(ixch):xch_idx%loc_rcv_bnd(ixch)+nerv-1) = & & vidx_in(ip+psb_n_elem_recv_+1:ip+psb_n_elem_recv_+nerv) nesd = vidx_in(ip+nerv+psb_n_elem_send_) -!!$ write(*,*) 'Check on send option ',ip,nesd,xch_idx%loc_snd_bnd(ixch) + write(*,*) 'Check on send option ',ip,nesd,xch_idx%loc_snd_bnd(ixch) xch_idx%loc_snd_idx(xch_idx%loc_snd_bnd(ixch):xch_idx%loc_snd_bnd(ixch)+nesd-1) = & & vidx_in(ip+nerv+psb_n_elem_send_+1:ip+nerv+psb_n_elem_send_+nesd) xch_idx%loc_rcv_bnd(ixch+1) = xch_idx%loc_rcv_bnd(ixch) + nerv @@ -151,12 +152,18 @@ subroutine psi_cnv_v2xch(ictxt, vidx_in, xch_idx,info) sync images(img) buf_rmt_rcv_bnd(me*2 - 1 : me*2)[img]= xch_idx%loc_rcv_bnd(ixch:ixch+1) buf_rmt_snd_bnd(me*2 - 1 : me*2)[img]= xch_idx%loc_snd_bnd(ixch:ixch+1) -!!$ if (img == 2) write(*,*) this_image(),'Send idx to 2:',& -!!$ & xch_idx%loc_snd_idx(xch_idx%loc_snd_bnd(ixch):xch_idx%loc_snd_bnd(ixch)+nesd-1) + if (img == 2) write(*,*) this_image(),nesd,'Send idx to 2:',& + & xch_idx%loc_snd_idx(xch_idx%loc_snd_bnd(ixch):xch_idx%loc_snd_bnd(ixch)+nesd-1) + if (img == 2) write(*,*) this_image(),nerv,'Recv idx fr 2:',& + & xch_idx%loc_rcv_idx(xch_idx%loc_rcv_bnd(ixch):xch_idx%loc_rcv_bnd(ixch)+nerv-1) buf_rmt_rcv_idx(1:nesd)[img] = xch_idx%loc_snd_idx(xch_idx%loc_snd_bnd(ixch):xch_idx%loc_snd_bnd(ixch)+nesd-1) buf_rmt_snd_idx(1:nerv)[img] = xch_idx%loc_rcv_idx(xch_idx%loc_rcv_bnd(ixch):xch_idx%loc_rcv_bnd(ixch)+nerv-1) event post(snd_done(me)[img]) event wait(snd_done(img)) + if (this_image() == 2) write(*,*) img,nesd,'Send idx on 2:',& + & buf_rmt_snd_idx(1:nesd),':',xch_idx%loc_rcv_bnd(ixch),xch_idx%loc_rcv_bnd(ixch)+nerv-1 + if (this_image() == 2) write(*,*) img,nerv,'Recv idx on 2:',& + & buf_rmt_rcv_idx(1:nerv),':',xch_idx%loc_snd_bnd(ixch),xch_idx%loc_snd_bnd(ixch)+nesd-1 xch_idx%rmt_rcv_idx(xch_idx%loc_rcv_bnd(ixch):xch_idx%loc_rcv_bnd(ixch)+nerv-1) = & & buf_rmt_rcv_idx(1:nerv) xch_idx%rmt_snd_idx(xch_idx%loc_snd_bnd(ixch):xch_idx%loc_snd_bnd(ixch)+nesd-1) = & @@ -170,6 +177,8 @@ subroutine psi_cnv_v2xch(ictxt, vidx_in, xch_idx,info) xch_idx%rmt_snd_bnd(:,2) = xch_idx%rmt_snd_bnd(:,2) - 1 if (allocated(buf_rmt_rcv_bnd)) deallocate(buf_rmt_rcv_bnd) if (allocated(buf_rmt_snd_bnd)) deallocate(buf_rmt_snd_bnd) + if (allocated(buf_rmt_rcv_idx)) deallocate(buf_rmt_rcv_idx) + if (allocated(buf_rmt_snd_idx)) deallocate(buf_rmt_snd_idx) if (allocated(snd_done)) deallocate(snd_done) !sync all call psb_erractionrestore(err_act) diff --git a/test/kernel/pdgenspmv.f90 b/test/kernel/pdgenspmv.f90 index 027d0970..5518f58f 100644 --- a/test/kernel/pdgenspmv.f90 +++ b/test/kernel/pdgenspmv.f90 @@ -160,32 +160,43 @@ program pdgenspmv ! FIXME: cache flush needed here nxch = size(xchg%prcs_xch) !!$ if (this_image()==2) write(0,*) this_image(),nxch,nrl,ncl,' Exchanging with ',xchg%prcs_xch+1 + temp = 0.d0 + xvc(nrl+1:) = 0.d0 do i=1,iwarm - temp = 0.d0 - xvc(nrl+1:) = 0.d0 ! Sync images - !sync images(xchg%prcs_xch+1) - do ip=1,nxch - img = xchg%prcs_xch(ip) + 1 - event post(ready[img]) - end do - event wait(ready, until_count=nxch) - do ip = 1, nxch - img = xchg%prcs_xch(ip) + 1 - p1 = xchg%loc_rcv_bnd(ip) - p2 = xchg%loc_rcv_bnd(ip+1)-1 + sync images(xchg%prcs_xch+1) + if (.false.) then + do ip = 1, nxch + img = xchg%prcs_xch(ip) + 1 + p1 = xchg%loc_rcv_bnd(ip) + p2 = xchg%loc_rcv_bnd(ip+1)-1 !!$ if (this_image()==2) write(0,*) this_image(),'Boundaries ',& !!$ & p1,p2,' :',xchg%loc_rcv_idx(p1:p2),':',xchg%rmt_rcv_idx(p1:p2) !!$ xvc(xchg%loc_rcv_idx(p1:p2)) = xvc(xchg%rmt_rcv_idx(p1:p2))[img] - - temp(p1:p2) = xvc(xchg%rmt_rcv_idx(p1:p2))[img] + + temp(p1:p2) = xvc(xchg%rmt_rcv_idx(p1:p2))[img] !!$ xvc(xchg%loc_rcv_idx(p1:p2)) = temp(p1:p2) !!$ if (this_image()==2) write(0,*) this_image(),' :x: ',ip,' : ',& !!$ &xvc(xchg%loc_rcv_idx(p1:p2)),' : ',xv%v%v(xchg%loc_rcv_idx(p1:p2)) - end do + end do !!$ if (this_image()==2) write(0,*) this_image(),' :x: ',& !!$ &xvc(nrl+1:ncl),' : ',xv%v%v(nrl+1:ncl) - call a%csmv(done,xvc,dzero,bvc,info) + call a%csmv(done,xvc,dzero,bvc,info) + else + do ip = 1, nxch + img = xchg%prcs_xch(ip) + 1 + p1 = xchg%loc_snd_bnd(ip) + p2 = xchg%loc_snd_bnd(ip+1)-1 + !xvc(xchg%loc_rcv_idx(p1:p2)) = xvc(xchg%rmt_rcv_idx(p1:p2))[img] + write(0,*) this_image(),'Boundaries ',& + & p1,p2,' :',xchg%loc_snd_idx(p1:p2),':',xchg%rmt_snd_idx(p1:p2) + temp(p1:p2) = xvc(xchg%loc_snd_idx(p1:p2)) + xvc(xchg%rmt_snd_idx(p1:p2))[img] = temp(p1:p2) + end do + sync images(xchg%prcs_xch+1) + call a%csmv(done,xvc,dzero,bvc,info) + + end if end do call psb_barrier(ictxt) tt1 = psb_wtime() @@ -197,16 +208,27 @@ program pdgenspmv !!$ event post(ready[img]) !!$ end do !!$ event wait(ready, until_count=nxch) - - do ip = 1, nxch - img = xchg%prcs_xch(ip) + 1 - sync images (img) - p1 = xchg%loc_rcv_bnd(ip) - p2 = xchg%loc_rcv_bnd(ip+1)-1 - !xvc(xchg%loc_rcv_idx(p1:p2)) = xvc(xchg%rmt_rcv_idx(p1:p2))[img] - temp(p1:p2) = xvc(xchg%rmt_rcv_idx(p1:p2))[img] - xvc(xchg%loc_rcv_idx(p1:p2)) = temp(p1:p2) - end do + if (.true.) then + do ip = 1, nxch + img = xchg%prcs_xch(ip) + 1 + sync images (img) + p1 = xchg%loc_rcv_bnd(ip) + p2 = xchg%loc_rcv_bnd(ip+1)-1 + !xvc(xchg%loc_rcv_idx(p1:p2)) = xvc(xchg%rmt_rcv_idx(p1:p2))[img] + temp(p1:p2) = xvc(xchg%rmt_rcv_idx(p1:p2))[img] + xvc(xchg%loc_rcv_idx(p1:p2)) = temp(p1:p2) + end do + else + do ip = 1, nxch + img = xchg%prcs_xch(ip) + 1 + p1 = xchg%loc_snd_bnd(ip) + p2 = xchg%loc_snd_bnd(ip+1)-1 + !xvc(xchg%loc_rcv_idx(p1:p2)) = xvc(xchg%rmt_rcv_idx(p1:p2))[img] + temp(p1:p2) = xvc(xchg%loc_snd_idx(p1:p2)) + xvc(xchg%rmt_snd_idx(p1:p2))[img] = temp(p1:p2) + end do + sync images(xchg%prcs_xch+1) + end if call a%csmv(done,xvc,dzero,bvc,info) end do call psb_barrier(ictxt) diff --git a/test/kernel/runs/spmv.inp b/test/kernel/runs/spmv.inp index e0656746..18488d5b 100644 --- a/test/kernel/runs/spmv.inp +++ b/test/kernel/runs/spmv.inp @@ -1,3 +1,3 @@ CSR -200 -40 \ No newline at end of file +004 +01