This now works when IDIM is even, not when it is odd. Needs to be sorted..

psblas-caf-xp
sfilippone 9 years ago
parent e684fe27fb
commit 333ebbbb2f

@ -110,6 +110,7 @@ subroutine psi_cnv_v2xch(ictxt, vidx_in, xch_idx,info)
if (info == 0) call psb_realloc(nsnd,xch_idx%loc_snd_idx,info) if (info == 0) call psb_realloc(nsnd,xch_idx%loc_snd_idx,info)
if (info == 0) call psb_realloc(nrcv,xch_idx%loc_rcv_idx,info) if (info == 0) call psb_realloc(nrcv,xch_idx%loc_rcv_idx,info)
if (info == 0) call psb_realloc(nrcv,xch_idx%rmt_rcv_idx,info) if (info == 0) call psb_realloc(nrcv,xch_idx%rmt_rcv_idx,info)
if (info == 0) call psb_realloc(nsnd,xch_idx%rmt_snd_idx,info)
if (info /= psb_success_) then if (info /= psb_success_) then
info = psb_err_from_subroutine_ info = psb_err_from_subroutine_
call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
@ -137,11 +138,11 @@ subroutine psi_cnv_v2xch(ictxt, vidx_in, xch_idx,info)
if (vidx_in(ip) == -1) exit if (vidx_in(ip) == -1) exit
xch_idx%prcs_xch(ixch) = vidx_in(ip) xch_idx%prcs_xch(ixch) = vidx_in(ip)
nerv = vidx_in(ip+psb_n_elem_recv_) nerv = vidx_in(ip+psb_n_elem_recv_)
!!$ write(*,*) 'Check on receive option ',ip,nerv,xch_idx%loc_rcv_bnd(ixch) write(*,*) 'Check on receive option ',ip,nerv,xch_idx%loc_rcv_bnd(ixch)
xch_idx%loc_rcv_idx(xch_idx%loc_rcv_bnd(ixch):xch_idx%loc_rcv_bnd(ixch)+nerv-1) = & xch_idx%loc_rcv_idx(xch_idx%loc_rcv_bnd(ixch):xch_idx%loc_rcv_bnd(ixch)+nerv-1) = &
& vidx_in(ip+psb_n_elem_recv_+1:ip+psb_n_elem_recv_+nerv) & vidx_in(ip+psb_n_elem_recv_+1:ip+psb_n_elem_recv_+nerv)
nesd = vidx_in(ip+nerv+psb_n_elem_send_) nesd = vidx_in(ip+nerv+psb_n_elem_send_)
!!$ write(*,*) 'Check on send option ',ip,nesd,xch_idx%loc_snd_bnd(ixch) write(*,*) 'Check on send option ',ip,nesd,xch_idx%loc_snd_bnd(ixch)
xch_idx%loc_snd_idx(xch_idx%loc_snd_bnd(ixch):xch_idx%loc_snd_bnd(ixch)+nesd-1) = & xch_idx%loc_snd_idx(xch_idx%loc_snd_bnd(ixch):xch_idx%loc_snd_bnd(ixch)+nesd-1) = &
& vidx_in(ip+nerv+psb_n_elem_send_+1:ip+nerv+psb_n_elem_send_+nesd) & vidx_in(ip+nerv+psb_n_elem_send_+1:ip+nerv+psb_n_elem_send_+nesd)
xch_idx%loc_rcv_bnd(ixch+1) = xch_idx%loc_rcv_bnd(ixch) + nerv xch_idx%loc_rcv_bnd(ixch+1) = xch_idx%loc_rcv_bnd(ixch) + nerv
@ -151,12 +152,18 @@ subroutine psi_cnv_v2xch(ictxt, vidx_in, xch_idx,info)
sync images(img) sync images(img)
buf_rmt_rcv_bnd(me*2 - 1 : me*2)[img]= xch_idx%loc_rcv_bnd(ixch:ixch+1) buf_rmt_rcv_bnd(me*2 - 1 : me*2)[img]= xch_idx%loc_rcv_bnd(ixch:ixch+1)
buf_rmt_snd_bnd(me*2 - 1 : me*2)[img]= xch_idx%loc_snd_bnd(ixch:ixch+1) buf_rmt_snd_bnd(me*2 - 1 : me*2)[img]= xch_idx%loc_snd_bnd(ixch:ixch+1)
!!$ if (img == 2) write(*,*) this_image(),'Send idx to 2:',& if (img == 2) write(*,*) this_image(),nesd,'Send idx to 2:',&
!!$ & xch_idx%loc_snd_idx(xch_idx%loc_snd_bnd(ixch):xch_idx%loc_snd_bnd(ixch)+nesd-1) & xch_idx%loc_snd_idx(xch_idx%loc_snd_bnd(ixch):xch_idx%loc_snd_bnd(ixch)+nesd-1)
if (img == 2) write(*,*) this_image(),nerv,'Recv idx fr 2:',&
& xch_idx%loc_rcv_idx(xch_idx%loc_rcv_bnd(ixch):xch_idx%loc_rcv_bnd(ixch)+nerv-1)
buf_rmt_rcv_idx(1:nesd)[img] = xch_idx%loc_snd_idx(xch_idx%loc_snd_bnd(ixch):xch_idx%loc_snd_bnd(ixch)+nesd-1) buf_rmt_rcv_idx(1:nesd)[img] = xch_idx%loc_snd_idx(xch_idx%loc_snd_bnd(ixch):xch_idx%loc_snd_bnd(ixch)+nesd-1)
buf_rmt_snd_idx(1:nerv)[img] = xch_idx%loc_rcv_idx(xch_idx%loc_rcv_bnd(ixch):xch_idx%loc_rcv_bnd(ixch)+nerv-1) buf_rmt_snd_idx(1:nerv)[img] = xch_idx%loc_rcv_idx(xch_idx%loc_rcv_bnd(ixch):xch_idx%loc_rcv_bnd(ixch)+nerv-1)
event post(snd_done(me)[img]) event post(snd_done(me)[img])
event wait(snd_done(img)) event wait(snd_done(img))
if (this_image() == 2) write(*,*) img,nesd,'Send idx on 2:',&
& buf_rmt_snd_idx(1:nesd),':',xch_idx%loc_rcv_bnd(ixch),xch_idx%loc_rcv_bnd(ixch)+nerv-1
if (this_image() == 2) write(*,*) img,nerv,'Recv idx on 2:',&
& buf_rmt_rcv_idx(1:nerv),':',xch_idx%loc_snd_bnd(ixch),xch_idx%loc_snd_bnd(ixch)+nesd-1
xch_idx%rmt_rcv_idx(xch_idx%loc_rcv_bnd(ixch):xch_idx%loc_rcv_bnd(ixch)+nerv-1) = & xch_idx%rmt_rcv_idx(xch_idx%loc_rcv_bnd(ixch):xch_idx%loc_rcv_bnd(ixch)+nerv-1) = &
& buf_rmt_rcv_idx(1:nerv) & buf_rmt_rcv_idx(1:nerv)
xch_idx%rmt_snd_idx(xch_idx%loc_snd_bnd(ixch):xch_idx%loc_snd_bnd(ixch)+nesd-1) = & xch_idx%rmt_snd_idx(xch_idx%loc_snd_bnd(ixch):xch_idx%loc_snd_bnd(ixch)+nesd-1) = &
@ -170,6 +177,8 @@ subroutine psi_cnv_v2xch(ictxt, vidx_in, xch_idx,info)
xch_idx%rmt_snd_bnd(:,2) = xch_idx%rmt_snd_bnd(:,2) - 1 xch_idx%rmt_snd_bnd(:,2) = xch_idx%rmt_snd_bnd(:,2) - 1
if (allocated(buf_rmt_rcv_bnd)) deallocate(buf_rmt_rcv_bnd) if (allocated(buf_rmt_rcv_bnd)) deallocate(buf_rmt_rcv_bnd)
if (allocated(buf_rmt_snd_bnd)) deallocate(buf_rmt_snd_bnd) if (allocated(buf_rmt_snd_bnd)) deallocate(buf_rmt_snd_bnd)
if (allocated(buf_rmt_rcv_idx)) deallocate(buf_rmt_rcv_idx)
if (allocated(buf_rmt_snd_idx)) deallocate(buf_rmt_snd_idx)
if (allocated(snd_done)) deallocate(snd_done) if (allocated(snd_done)) deallocate(snd_done)
!sync all !sync all
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)

@ -160,32 +160,43 @@ program pdgenspmv
! FIXME: cache flush needed here ! FIXME: cache flush needed here
nxch = size(xchg%prcs_xch) nxch = size(xchg%prcs_xch)
!!$ if (this_image()==2) write(0,*) this_image(),nxch,nrl,ncl,' Exchanging with ',xchg%prcs_xch+1 !!$ if (this_image()==2) write(0,*) this_image(),nxch,nrl,ncl,' Exchanging with ',xchg%prcs_xch+1
temp = 0.d0
xvc(nrl+1:) = 0.d0
do i=1,iwarm do i=1,iwarm
temp = 0.d0
xvc(nrl+1:) = 0.d0
! Sync images ! Sync images
!sync images(xchg%prcs_xch+1) sync images(xchg%prcs_xch+1)
do ip=1,nxch if (.false.) then
img = xchg%prcs_xch(ip) + 1 do ip = 1, nxch
event post(ready[img]) img = xchg%prcs_xch(ip) + 1
end do p1 = xchg%loc_rcv_bnd(ip)
event wait(ready, until_count=nxch) p2 = xchg%loc_rcv_bnd(ip+1)-1
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
!!$ if (this_image()==2) write(0,*) this_image(),'Boundaries ',& !!$ if (this_image()==2) write(0,*) this_image(),'Boundaries ',&
!!$ & p1,p2,' :',xchg%loc_rcv_idx(p1:p2),':',xchg%rmt_rcv_idx(p1:p2) !!$ & p1,p2,' :',xchg%loc_rcv_idx(p1:p2),':',xchg%rmt_rcv_idx(p1:p2)
!!$ xvc(xchg%loc_rcv_idx(p1:p2)) = xvc(xchg%rmt_rcv_idx(p1:p2))[img] !!$ xvc(xchg%loc_rcv_idx(p1:p2)) = xvc(xchg%rmt_rcv_idx(p1:p2))[img]
temp(p1:p2) = xvc(xchg%rmt_rcv_idx(p1:p2))[img] temp(p1:p2) = xvc(xchg%rmt_rcv_idx(p1:p2))[img]
!!$ xvc(xchg%loc_rcv_idx(p1:p2)) = temp(p1:p2) !!$ xvc(xchg%loc_rcv_idx(p1:p2)) = temp(p1:p2)
!!$ if (this_image()==2) write(0,*) this_image(),' :x: ',ip,' : ',& !!$ if (this_image()==2) write(0,*) this_image(),' :x: ',ip,' : ',&
!!$ &xvc(xchg%loc_rcv_idx(p1:p2)),' : ',xv%v%v(xchg%loc_rcv_idx(p1:p2)) !!$ &xvc(xchg%loc_rcv_idx(p1:p2)),' : ',xv%v%v(xchg%loc_rcv_idx(p1:p2))
end do end do
!!$ if (this_image()==2) write(0,*) this_image(),' :x: ',& !!$ if (this_image()==2) write(0,*) this_image(),' :x: ',&
!!$ &xvc(nrl+1:ncl),' : ',xv%v%v(nrl+1:ncl) !!$ &xvc(nrl+1:ncl),' : ',xv%v%v(nrl+1:ncl)
call a%csmv(done,xvc,dzero,bvc,info) call a%csmv(done,xvc,dzero,bvc,info)
else
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
!xvc(xchg%loc_rcv_idx(p1:p2)) = xvc(xchg%rmt_rcv_idx(p1:p2))[img]
write(0,*) this_image(),'Boundaries ',&
& p1,p2,' :',xchg%loc_snd_idx(p1:p2),':',xchg%rmt_snd_idx(p1:p2)
temp(p1:p2) = xvc(xchg%loc_snd_idx(p1:p2))
xvc(xchg%rmt_snd_idx(p1:p2))[img] = temp(p1:p2)
end do
sync images(xchg%prcs_xch+1)
call a%csmv(done,xvc,dzero,bvc,info)
end if
end do end do
call psb_barrier(ictxt) call psb_barrier(ictxt)
tt1 = psb_wtime() tt1 = psb_wtime()
@ -197,16 +208,27 @@ program pdgenspmv
!!$ event post(ready[img]) !!$ event post(ready[img])
!!$ end do !!$ end do
!!$ event wait(ready, until_count=nxch) !!$ event wait(ready, until_count=nxch)
if (.true.) then
do ip = 1, nxch do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1 img = xchg%prcs_xch(ip) + 1
sync images (img) sync images (img)
p1 = xchg%loc_rcv_bnd(ip) p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1 p2 = xchg%loc_rcv_bnd(ip+1)-1
!xvc(xchg%loc_rcv_idx(p1:p2)) = xvc(xchg%rmt_rcv_idx(p1:p2))[img] !xvc(xchg%loc_rcv_idx(p1:p2)) = xvc(xchg%rmt_rcv_idx(p1:p2))[img]
temp(p1:p2) = xvc(xchg%rmt_rcv_idx(p1:p2))[img] temp(p1:p2) = xvc(xchg%rmt_rcv_idx(p1:p2))[img]
xvc(xchg%loc_rcv_idx(p1:p2)) = temp(p1:p2) xvc(xchg%loc_rcv_idx(p1:p2)) = temp(p1:p2)
end do end do
else
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
!xvc(xchg%loc_rcv_idx(p1:p2)) = xvc(xchg%rmt_rcv_idx(p1:p2))[img]
temp(p1:p2) = xvc(xchg%loc_snd_idx(p1:p2))
xvc(xchg%rmt_snd_idx(p1:p2))[img] = temp(p1:p2)
end do
sync images(xchg%prcs_xch+1)
end if
call a%csmv(done,xvc,dzero,bvc,info) call a%csmv(done,xvc,dzero,bvc,info)
end do end do
call psb_barrier(ictxt) call psb_barrier(ictxt)

@ -1,3 +1,3 @@
CSR CSR
200 004
40 01

Loading…
Cancel
Save