added psb_xchg_v psb_xchg_vect psb_xchg_m psb_xchgtran_v psb_xchg_tran_vect psb_xchgtran_m for single/double real/complex

psblas3-caf
Ambra Abdullahi 10 years ago
parent 0f574ffec0
commit 6b2eb8aae6

@ -152,6 +152,167 @@ subroutine psi_dswapdatam(flag,n,beta,y,desc_a,work,info,data)
return return
end subroutine psi_dswapdatam end subroutine psi_dswapdatam
subroutine psi_dswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info)
use psi_mod, psb_protect_name => psi_dswap_xchg_m
use psb_error_mod
use psb_realloc_mod
use psb_desc_mod
use psb_penv_mod
use psb_d_base_vect_mod
use iso_fortran_env
implicit none
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag, m
integer(psb_ipk_), intent(out) :: info
real(psb_dpk_) :: y(:,:)
real(psb_dpk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
! locals
integer(psb_mpik_) :: ictxt, icomm, np, me,&
& proc_to_comm, p2ptag, iret
integer(psb_ipk_) :: nesd, nerv,&
& err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,&
& snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself
integer :: count
real(psb_dpk_), allocatable, save :: buffer(:)[:], sndbuf(:)
type(event_type), allocatable, save :: ufg(:)[:]
type(event_type), allocatable, save :: clear[:]
integer, save :: last_clear_count = 0
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
print*,' call psi_dswap_xchg_m'
info=psb_success_
name='psi_swap_datav'
call psb_erractionsave(err_act)
ictxt = iictxt
icomm = iicomm
call psb_info(ictxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
n=1
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
if (.not.(do_send.and.do_recv)) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Unimplemented case in xchg_vect')
goto 9999
end if
if (.not.allocated(ufg)) then
!write(*,*) 'Allocating events',np
allocate(ufg(np)[*],stat=info)
if (info == 0) allocate(clear[*],stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray events allocation')
goto 9999
end if
else
if (last_clear_count>0) &
& event wait(clear,until_count=last_clear_count)
end if
if (psb_size(buffer) < xchg%max_buffer_size) then
!
! By construction, max_buffer_size was computed with a collective.
!
if (allocated(buffer)) deallocate(buffer)
if (allocated(sndbuf)) deallocate(sndbuf)
!write(*,*) 'Allocating buffer',xchg%max_buffer_size
allocate(buffer(xchg%max_buffer_size)[*],stat=info)
if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray buffer allocation')
goto 9999
end if
end if
if (.false.) then
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,m,xchg%loc_snd_idx(p1:p2),y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,m,xchg%loc_rcv_idx(p1:p2),buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
else
!sync all
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,m,xchg%loc_snd_idx(p1:p2),&
& y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,m,xchg%loc_rcv_idx(p1:p2),&
& buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ictxt,err_act)
return
end subroutine psi_dswap_xchg_m
subroutine psi_dswapidxm(iictxt,iicomm,flag,n,beta,y,idx, & subroutine psi_dswapidxm(iictxt,iicomm,flag,n,beta,y,idx, &
& totxch,totsnd,totrcv,work,info) & totxch,totsnd,totrcv,work,info)
@ -643,6 +804,165 @@ subroutine psi_dswapdatav(flag,beta,y,desc_a,work,info,data)
return return
end subroutine psi_dswapdatav end subroutine psi_dswapdatav
subroutine psi_dswap_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info)
use psi_mod, psb_protect_name => psi_dswap_xchg_v
use psb_error_mod
use psb_realloc_mod
use psb_desc_mod
use psb_penv_mod
use psb_d_base_vect_mod
use iso_fortran_env
implicit none
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
real(psb_dpk_) :: y(:)
real(psb_dpk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
! locals
integer(psb_mpik_) :: ictxt, icomm, np, me,&
& proc_to_comm, p2ptag, iret
integer(psb_ipk_) :: nesd, nerv,&
& err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,&
& snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself
integer :: count
real(psb_dpk_), allocatable, save :: buffer(:)[:], sndbuf(:)
type(event_type), allocatable, save :: ufg(:)[:]
type(event_type), allocatable, save :: clear[:]
integer, save :: last_clear_count = 0
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
info=psb_success_
name='psi_swap_datav'
call psb_erractionsave(err_act)
ictxt = iictxt
icomm = iicomm
call psb_info(ictxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
n=1
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
if (.not.(do_send.and.do_recv)) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Unimplemented case in xchg_vect')
goto 9999
end if
if (.not.allocated(ufg)) then
!write(*,*) 'Allocating events',np
allocate(ufg(np)[*],stat=info)
if (info == 0) allocate(clear[*],stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray events allocation')
goto 9999
end if
else
if (last_clear_count>0) &
& event wait(clear,until_count=last_clear_count)
end if
if (psb_size(buffer) < xchg%max_buffer_size) then
!
! By construction, max_buffer_size was computed with a collective.
!
if (allocated(buffer)) deallocate(buffer)
if (allocated(sndbuf)) deallocate(sndbuf)
!write(*,*) 'Allocating buffer',xchg%max_buffer_size
allocate(buffer(xchg%max_buffer_size)[*],stat=info)
if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray buffer allocation')
goto 9999
end if
end if
if (.false.) then
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,xchg%loc_snd_idx(p1:p2),y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,xchg%loc_rcv_idx(p1:p2),buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
else
!sync all
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,xchg%loc_snd_idx(p1:p2),&
& y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,xchg%loc_rcv_idx(p1:p2),&
& buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ictxt,err_act)
return
end subroutine psi_dswap_xchg_v
! !
@ -1045,7 +1365,6 @@ subroutine psi_dswapdata_vect(flag,beta,y,desc_a,work,info,data)
! locals ! locals
integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, data_, err_act integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, data_, err_act
class(psb_i_base_vect_type), pointer :: d_vidx class(psb_i_base_vect_type), pointer :: d_vidx
class(psb_xch_idx_type), pointer :: d_xchg
character(len=20) :: name character(len=20) :: name
info=psb_success_ info=psb_success_
@ -1075,17 +1394,12 @@ subroutine psi_dswapdata_vect(flag,beta,y,desc_a,work,info,data)
end if end if
call desc_a%get_list(data_,d_vidx,totxch,idxr,idxs,info) call desc_a%get_list(data_,d_vidx,totxch,idxr,idxs,info)
if (info == 0) call desc_a%get_list(data_,d_xchg,info)
if (info /= psb_success_) then if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999 goto 9999
end if end if
if (.false.) then call psi_swapdata(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info)
call psi_swapdata(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info)
else
call psi_swapdata(ictxt,icomm,flag,beta,y,d_xchg,info)
end if
if (info /= psb_success_) goto 9999 if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
@ -1191,7 +1505,7 @@ subroutine psi_dswap_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info)
goto 9999 goto 9999
end if end if
end if end if
if (.true.) then if (.false.) then
!sync all !sync all
nxch = size(xchg%prcs_xch) nxch = size(xchg%prcs_xch)
myself = this_image() myself = this_image()
@ -1279,7 +1593,6 @@ subroutine psi_dswap_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info)
end subroutine psi_dswap_xchg_vect end subroutine psi_dswap_xchg_vect
! !
! !
! Subroutine: psi_dswap_vidx_vect ! Subroutine: psi_dswap_vidx_vect
@ -1883,3 +2196,4 @@ subroutine psi_dswap_vidx_multivect(iictxt,iicomm,flag,beta,y,idx, &
return return
end subroutine psi_dswap_vidx_multivect end subroutine psi_dswap_vidx_multivect

@ -112,6 +112,7 @@ subroutine psi_dswaptranm(flag,n,beta,y,desc_a,work,info,data)
integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, err_act, totxch, data_ integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, err_act, totxch, data_
integer(psb_ipk_), pointer :: d_idx(:) integer(psb_ipk_), pointer :: d_idx(:)
integer(psb_ipk_) :: ierr(5) integer(psb_ipk_) :: ierr(5)
class(psb_xch_idx_type), pointer :: d_xchg
character(len=20) :: name character(len=20) :: name
info=psb_success_ info=psb_success_
@ -141,12 +142,17 @@ subroutine psi_dswaptranm(flag,n,beta,y,desc_a,work,info,data)
end if end if
call desc_a%get_list(data_,d_idx,totxch,idxr,idxs,info) call desc_a%get_list(data_,d_idx,totxch,idxr,idxs,info)
if (info == 0) call desc_a%get_list(data_,d_xchg,info)
if (info /= psb_success_) then if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999 goto 9999
end if end if
if (.false.) then
call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info)
else
call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_xchg,info)
endif
call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info)
if (info /= psb_success_) goto 9999 if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
@ -157,6 +163,167 @@ subroutine psi_dswaptranm(flag,n,beta,y,desc_a,work,info,data)
return return
end subroutine psi_dswaptranm end subroutine psi_dswaptranm
subroutine psi_dswaptran_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info)
use psi_mod, psb_protect_name => psi_dswaptran_xchg_m
use psb_error_mod
use psb_realloc_mod
use psb_desc_mod
use psb_penv_mod
use psb_d_base_vect_mod
use iso_fortran_env
implicit none
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag, m
integer(psb_ipk_), intent(out) :: info
real(psb_dpk_) :: y(:,:)
real(psb_dpk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
! locals
integer(psb_mpik_) :: ictxt, icomm, np, me,&
& proc_to_comm, p2ptag, iret
integer(psb_ipk_) :: nesd, nerv,&
& err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,&
& snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself
integer :: count
real(psb_dpk_), allocatable, save :: buffer(:)[:], sndbuf(:)
type(event_type), allocatable, save :: ufg(:)[:]
type(event_type), allocatable, save :: clear[:]
integer, save :: last_clear_count = 0
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
print*,' call psi_dswaptran_xchg_m'
info=psb_success_
name='psi_swaptran_datam'
call psb_erractionsave(err_act)
ictxt = iictxt
icomm = iicomm
call psb_info(ictxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
n=1
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
if (.not.(do_send.and.do_recv)) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Unimplemented case in tran_xchg_vm')
goto 9999
end if
if (.not.allocated(ufg)) then
!write(*,*) 'Allocating events',np
allocate(ufg(np)[*],stat=info)
if (info == 0) allocate(clear[*],stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray events allocation')
goto 9999
end if
else
if (last_clear_count>0) &
& event wait(clear,until_count=last_clear_count)
end if
if (psb_size(buffer) < xchg%max_buffer_size) then
!
! By construction, max_buffer_size was computed with a collective.
!
if (allocated(buffer)) deallocate(buffer)
if (allocated(sndbuf)) deallocate(sndbuf)
!write(*,*) 'Allocating buffer',xchg%max_buffer_size
allocate(buffer(xchg%max_buffer_size)[*],stat=info)
if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray buffer allocation')
goto 9999
end if
end if
if (.true.) then
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,m,xchg%loc_rcv_idx(p1:p2),y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,m,xchg%loc_snd_idx(p1:p2),buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
else
!sync all
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,m,xchg%loc_rcv_idx(p1:p2),&
& y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,m,xchg%loc_snd_idx(p1:p2),&
& buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ictxt,err_act)
return
end subroutine psi_dswaptran_xchg_m
subroutine psi_dtranidxm(iictxt,iicomm,flag,n,beta,y,idx,totxch,totsnd,totrcv,work,info) subroutine psi_dtranidxm(iictxt,iicomm,flag,n,beta,y,idx,totxch,totsnd,totrcv,work,info)
use psi_mod, psb_protect_name => psi_dtranidxm use psi_mod, psb_protect_name => psi_dtranidxm
@ -601,6 +768,7 @@ subroutine psi_dswaptranv(flag,beta,y,desc_a,work,info,data)
! locals ! locals
integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, err_act, data_ integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, err_act, data_
integer(psb_ipk_), pointer :: d_idx(:) integer(psb_ipk_), pointer :: d_idx(:)
class(psb_xch_idx_type), pointer :: d_xchg
integer(psb_ipk_) :: ierr(5) integer(psb_ipk_) :: ierr(5)
character(len=20) :: name character(len=20) :: name
@ -630,12 +798,16 @@ subroutine psi_dswaptranv(flag,beta,y,desc_a,work,info,data)
end if end if
call desc_a%get_list(data_,d_idx,totxch,idxr,idxs,info) call desc_a%get_list(data_,d_idx,totxch,idxr,idxs,info)
if (info == 0) call desc_a%get_list(data_,d_xchg,info)
if (info /= psb_success_) then if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999 goto 9999
end if end if
if (.false.) then
call psi_swaptran(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info) call psi_swaptran(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info)
else
call psi_swaptran(ictxt,icomm,flag,beta,y,d_xchg,info)
end if
if (info /= psb_success_) goto 9999 if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
@ -646,6 +818,166 @@ subroutine psi_dswaptranv(flag,beta,y,desc_a,work,info,data)
return return
end subroutine psi_dswaptranv end subroutine psi_dswaptranv
subroutine psi_dswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info)
use psi_mod, psb_protect_name => psi_dswaptran_xchg_v
use psb_error_mod
use psb_realloc_mod
use psb_desc_mod
use psb_penv_mod
use psb_d_base_vect_mod
use iso_fortran_env
implicit none
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
real(psb_dpk_) :: y(:)
real(psb_dpk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
! locals
integer(psb_mpik_) :: ictxt, icomm, np, me,&
& proc_to_comm, p2ptag, iret
integer(psb_ipk_) :: nesd, nerv,&
& err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,&
& snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself
integer :: count
real(psb_dpk_), allocatable, save :: buffer(:)[:], sndbuf(:)
type(event_type), allocatable, save :: ufg(:)[:]
type(event_type), allocatable, save :: clear[:]
integer, save :: last_clear_count = 0
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
print*,' call psi_dswaptran_xchg_v'
info=psb_success_
name='psi_swap_datav'
call psb_erractionsave(err_act)
ictxt = iictxt
icomm = iicomm
call psb_info(ictxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
n=1
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
if (.not.(do_send.and.do_recv)) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Unimplemented case in xchg_vect')
goto 9999
end if
if (.not.allocated(ufg)) then
!write(*,*) 'Allocating events',np
allocate(ufg(np)[*],stat=info)
if (info == 0) allocate(clear[*],stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray events allocation')
goto 9999
end if
else
if (last_clear_count>0) &
& event wait(clear,until_count=last_clear_count)
end if
if (psb_size(buffer) < xchg%max_buffer_size) then
!
! By construction, max_buffer_size was computed with a collective.
!
if (allocated(buffer)) deallocate(buffer)
if (allocated(sndbuf)) deallocate(sndbuf)
!write(*,*) 'Allocating buffer',xchg%max_buffer_size
allocate(buffer(xchg%max_buffer_size)[*],stat=info)
if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray buffer allocation')
goto 9999
end if
end if
if (.true.) then
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,xchg%loc_rcv_idx(p1:p2),y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,xchg%loc_snd_idx(p1:p2),buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
else
!sync all
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,xchg%loc_rcv_idx(p1:p2),&
& y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,xchg%loc_snd_idx(p1:p2),&
& buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ictxt,err_act)
return
end subroutine psi_dswaptran_xchg_v
! !
! !
@ -1067,10 +1399,12 @@ subroutine psi_dswaptran_vect(flag,beta,y,desc_a,work,info,data)
integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, err_act, data_ integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, err_act, data_
class(psb_i_base_vect_type), pointer :: d_vidx class(psb_i_base_vect_type), pointer :: d_vidx
integer(psb_ipk_) :: ierr(5) integer(psb_ipk_) :: ierr(5)
class(psb_xch_idx_type), pointer :: d_xchg
character(len=20) :: name character(len=20) :: name
info=psb_success_ info=psb_success_
name='psi_swap_tranv' print*,'calling psi_tran_xch_vect'
name='psi_tran_xch_vect'
call psb_erractionsave(err_act) call psb_erractionsave(err_act)
ictxt = desc_a%get_context() ictxt = desc_a%get_context()
@ -1095,12 +1429,17 @@ subroutine psi_dswaptran_vect(flag,beta,y,desc_a,work,info,data)
end if end if
call desc_a%get_list(data_,d_vidx,totxch,idxr,idxs,info) call desc_a%get_list(data_,d_vidx,totxch,idxr,idxs,info)
if (info == 0) call desc_a%get_list(data_,d_xchg,info)
if (info /= psb_success_) then if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999 goto 9999
end if end if
if (.false.) then
call psi_swaptran(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info)
else
call psi_swaptran(ictxt,icomm,flag,beta,y,d_xchg,info)
end if
call psi_swaptran(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info)
if (info /= psb_success_) goto 9999 if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
@ -1111,7 +1450,187 @@ subroutine psi_dswaptran_vect(flag,beta,y,desc_a,work,info,data)
return return
end subroutine psi_dswaptran_vect end subroutine psi_dswaptran_vect
subroutine psi_dswaptran_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info)
use psi_mod, psb_protect_name => psi_dswaptran_xchg_vect
use psb_error_mod
use psb_realloc_mod
use psb_desc_mod
use psb_penv_mod
use psb_d_base_vect_mod
use iso_fortran_env
implicit none
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
class(psb_d_base_vect_type) :: y
real(psb_dpk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
! locals
integer(psb_mpik_) :: ictxt, icomm, np, me,&
& proc_to_comm, p2ptag, iret
integer(psb_ipk_) :: nesd, nerv,&
& err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,&
& snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself
integer :: count
real(psb_dpk_), allocatable, save :: buffer(:)[:], sndbuf(:)
type(event_type), allocatable, save :: ufg(:)[:]
type(event_type), allocatable, save :: clear[:]
integer, save :: last_clear_count = 0
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
info=psb_success_
name='psi_tran_xchg_vect'
call psb_erractionsave(err_act)
ictxt = iictxt
icomm = iicomm
call psb_info(ictxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
if (np /= num_images()) then
write(*,*) 'Something is wrong MPI vs CAF ', np, num_images()
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Num_images /= np')
goto 9999
end if
n=1
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
if (.not.(do_send.and.do_recv)) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Unimplemented case in xchg_vect')
goto 9999
end if
if (.not.allocated(ufg)) then
!write(*,*) 'Allocating events',np
allocate(ufg(np)[*],stat=info)
if (info == 0) allocate(clear[*],stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray events allocation')
goto 9999
end if
else
if (last_clear_count>0) &
& event wait(clear,until_count=last_clear_count)
end if
if (psb_size(buffer) < xchg%max_buffer_size) then
!
! By construction, max_buffer_size was computed with a collective.
!
if (allocated(buffer)) deallocate(buffer)
!write(*,*) 'Allocating buffer',xchg%max_buffer_size
allocate(buffer(xchg%max_buffer_size)[*],stat=info)
if (allocated(sndbuf)) deallocate(sndbuf)
if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray buffer allocation')
goto 9999
end if
end if
if (.false.) then
!sync all
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call y%gth(isz,xchg%loc_rcv_idx(p1:p2),buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call y%sct(isz,xchg%loc_snd_idx(p1:p2),buffer(rp1:rp2)[img],beta)
event post(clear[img])
end do
last_clear_count = nxch
else
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',rp1,rp2
if (.false.) then
call y%gth(isz,xchg%loc_rcv_idx(p1:p2),buffer(rp1:rp2)[img])
else
call y%gth(isz,xchg%loc_rcv_idx(p1:p2),sndbuf(p1:p2))
buffer(rp1:rp2)[img] = sndbuf(p1:p2)
end if
end do
!
! Doing event post later should provide more opportunities for
! overlap
!
do ip= 1, nxch
img = xchg%prcs_xch(ip) + 1
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,' boundaries: ',p1,p2
call y%sct(isz,xchg%loc_snd_idx(p1:p2),buffer(p1:p2),beta)
event post(clear[img])
end do
last_clear_count = nxch
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ictxt,err_act)
return
end subroutine psi_dswaptran_xchg_vect
! !
! !

@ -152,6 +152,166 @@ subroutine psi_sswapdatam(flag,n,beta,y,desc_a,work,info,data)
return return
end subroutine psi_sswapdatam end subroutine psi_sswapdatam
subroutine psi_sswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info)
use psi_mod, psb_protect_name => psi_sswap_xchg_m
use psb_error_mod
use psb_realloc_mod
use psb_desc_mod
use psb_penv_mod
use psb_s_base_vect_mod
use iso_fortran_env
implicit none
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag, m
integer(psb_ipk_), intent(out) :: info
real(psb_spk_) :: y(:,:)
real(psb_spk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
! locals
integer(psb_mpik_) :: ictxt, icomm, np, me,&
& proc_to_comm, p2ptag, iret
integer(psb_ipk_) :: nesd, nerv,&
& err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,&
& snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself
integer :: count
real(psb_spk_), allocatable, save :: buffer(:)[:], sndbuf(:)
type(event_type), allocatable, save :: ufg(:)[:]
type(event_type), allocatable, save :: clear[:]
integer, save :: last_clear_count = 0
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
info=psb_success_
name='psi_swap_datav'
call psb_erractionsave(err_act)
ictxt = iictxt
icomm = iicomm
call psb_info(ictxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
n=1
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
if (.not.(do_send.and.do_recv)) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Unimplemented case in xchg_vect')
goto 9999
end if
if (.not.allocated(ufg)) then
!write(*,*) 'Allocating events',np
allocate(ufg(np)[*],stat=info)
if (info == 0) allocate(clear[*],stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray events allocation')
goto 9999
end if
else
if (last_clear_count>0) &
& event wait(clear,until_count=last_clear_count)
end if
if (psb_size(buffer) < xchg%max_buffer_size) then
!
! By construction, max_buffer_size was computed with a collective.
!
if (allocated(buffer)) deallocate(buffer)
if (allocated(sndbuf)) deallocate(sndbuf)
!write(*,*) 'Allocating buffer',xchg%max_buffer_size
allocate(buffer(xchg%max_buffer_size)[*],stat=info)
if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray buffer allocation')
goto 9999
end if
end if
if (.false.) then
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,m,xchg%loc_snd_idx(p1:p2),y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,m,xchg%loc_rcv_idx(p1:p2),buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
else
!sync all
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,m,xchg%loc_snd_idx(p1:p2),&
& y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,m,xchg%loc_rcv_idx(p1:p2),&
& buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ictxt,err_act)
return
end subroutine psi_sswap_xchg_m
subroutine psi_sswapidxm(iictxt,iicomm,flag,n,beta,y,idx, & subroutine psi_sswapidxm(iictxt,iicomm,flag,n,beta,y,idx, &
& totxch,totsnd,totrcv,work,info) & totxch,totsnd,totrcv,work,info)
@ -643,6 +803,165 @@ subroutine psi_sswapdatav(flag,beta,y,desc_a,work,info,data)
return return
end subroutine psi_sswapdatav end subroutine psi_sswapdatav
subroutine psi_sswap_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info)
use psi_mod, psb_protect_name => psi_sswap_xchg_v
use psb_error_mod
use psb_realloc_mod
use psb_desc_mod
use psb_penv_mod
use psb_s_base_vect_mod
use iso_fortran_env
implicit none
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
real(psb_spk_) :: y(:)
real(psb_spk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
! locals
integer(psb_mpik_) :: ictxt, icomm, np, me,&
& proc_to_comm, p2ptag, iret
integer(psb_ipk_) :: nesd, nerv,&
& err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,&
& snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself
integer :: count
real(psb_spk_), allocatable, save :: buffer(:)[:], sndbuf(:)
type(event_type), allocatable, save :: ufg(:)[:]
type(event_type), allocatable, save :: clear[:]
integer, save :: last_clear_count = 0
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
info=psb_success_
name='psi_swap_datav'
call psb_erractionsave(err_act)
ictxt = iictxt
icomm = iicomm
call psb_info(ictxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
n=1
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
if (.not.(do_send.and.do_recv)) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Unimplemented case in xchg_vect')
goto 9999
end if
if (.not.allocated(ufg)) then
!write(*,*) 'Allocating events',np
allocate(ufg(np)[*],stat=info)
if (info == 0) allocate(clear[*],stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray events allocation')
goto 9999
end if
else
if (last_clear_count>0) &
& event wait(clear,until_count=last_clear_count)
end if
if (psb_size(buffer) < xchg%max_buffer_size) then
!
! By construction, max_buffer_size was computed with a collective.
!
if (allocated(buffer)) deallocate(buffer)
if (allocated(sndbuf)) deallocate(sndbuf)
!write(*,*) 'Allocating buffer',xchg%max_buffer_size
allocate(buffer(xchg%max_buffer_size)[*],stat=info)
if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray buffer allocation')
goto 9999
end if
end if
if (.false.) then
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,xchg%loc_snd_idx(p1:p2),y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,xchg%loc_rcv_idx(p1:p2),buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
else
!sync all
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,xchg%loc_snd_idx(p1:p2),&
& y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,xchg%loc_rcv_idx(p1:p2),&
& buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ictxt,err_act)
return
end subroutine psi_sswap_xchg_v
! !
@ -1090,6 +1409,188 @@ subroutine psi_sswapdata_vect(flag,beta,y,desc_a,work,info,data)
return return
end subroutine psi_sswapdata_vect end subroutine psi_sswapdata_vect
subroutine psi_sswap_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info)
use psi_mod, psb_protect_name => psi_sswap_xchg_vect
use psb_error_mod
use psb_realloc_mod
use psb_desc_mod
use psb_penv_mod
use psb_d_base_vect_mod
use iso_fortran_env
implicit none
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
class(psb_s_base_vect_type) :: y
real(psb_spk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
! locals
integer(psb_mpik_) :: ictxt, icomm, np, me,&
& proc_to_comm, p2ptag, iret
integer(psb_ipk_) :: nesd, nerv,&
& err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,&
& snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself
integer :: count
real(psb_spk_), allocatable, save :: buffer(:)[:], sndbuf(:)
type(event_type), allocatable, save :: ufg(:)[:]
type(event_type), allocatable, save :: clear[:]
integer, save :: last_clear_count = 0
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
info=psb_success_
name='psi_xchg_vect'
call psb_erractionsave(err_act)
ictxt = iictxt
icomm = iicomm
call psb_info(ictxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
if (np /= num_images()) then
write(*,*) 'Something is wrong MPI vs CAF ', np, num_images()
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Num_images /= np')
goto 9999
end if
n=1
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
if (.not.(do_send.and.do_recv)) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Unimplemented case in xchg_vect')
goto 9999
end if
if (.not.allocated(ufg)) then
!write(*,*) 'Allocating events',np
allocate(ufg(np)[*],stat=info)
if (info == 0) allocate(clear[*],stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray events allocation')
goto 9999
end if
else
if (last_clear_count>0) &
& event wait(clear,until_count=last_clear_count)
end if
if (psb_size(buffer) < xchg%max_buffer_size) then
!
! By construction, max_buffer_size was computed with a collective.
!
if (allocated(buffer)) deallocate(buffer)
!write(*,*) 'Allocating buffer',xchg%max_buffer_size
allocate(buffer(xchg%max_buffer_size)[*],stat=info)
if (allocated(sndbuf)) deallocate(sndbuf)
if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray buffer allocation')
goto 9999
end if
end if
if (.false.) then
!sync all
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call y%gth(isz,xchg%loc_snd_idx(p1:p2),buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call y%sct(isz,xchg%loc_rcv_idx(p1:p2),buffer(rp1:rp2)[img],beta)
event post(clear[img])
end do
last_clear_count = nxch
else
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',rp1,rp2
if (.false.) then
call y%gth(isz,xchg%loc_snd_idx(p1:p2),buffer(rp1:rp2)[img])
else
call y%gth(isz,xchg%loc_snd_idx(p1:p2),sndbuf(p1:p2))
buffer(rp1:rp2)[img] = sndbuf(p1:p2)
end if
end do
!
! Doing event post later should provide more opportunities for
! overlap
!
do ip= 1, nxch
img = xchg%prcs_xch(ip) + 1
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,' boundaries: ',p1,p2
call y%sct(isz,xchg%loc_rcv_idx(p1:p2),buffer(p1:p2),beta)
event post(clear[img])
end do
last_clear_count = nxch
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ictxt,err_act)
return
end subroutine psi_sswap_xchg_vect
! !
! !

@ -157,6 +157,166 @@ subroutine psi_sswaptranm(flag,n,beta,y,desc_a,work,info,data)
return return
end subroutine psi_sswaptranm end subroutine psi_sswaptranm
subroutine psi_sswaptran_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info)
use psi_mod, psb_protect_name => psi_sswaptran_xchg_m
use psb_error_mod
use psb_realloc_mod
use psb_desc_mod
use psb_penv_mod
use psb_s_base_vect_mod
use iso_fortran_env
implicit none
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag, m
integer(psb_ipk_), intent(out) :: info
real(psb_spk_) :: y(:,:)
real(psb_spk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
! locals
integer(psb_mpik_) :: ictxt, icomm, np, me,&
& proc_to_comm, p2ptag, iret
integer(psb_ipk_) :: nesd, nerv,&
& err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,&
& snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself
integer :: count
real(psb_spk_), allocatable, save :: buffer(:)[:], sndbuf(:)
type(event_type), allocatable, save :: ufg(:)[:]
type(event_type), allocatable, save :: clear[:]
integer, save :: last_clear_count = 0
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
info=psb_success_
name='psi_swaptran_datam'
call psb_erractionsave(err_act)
ictxt = iictxt
icomm = iicomm
call psb_info(ictxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
n=1
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
if (.not.(do_send.and.do_recv)) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Unimplemented case in tran_xchg_vm')
goto 9999
end if
if (.not.allocated(ufg)) then
!write(*,*) 'Allocating events',np
allocate(ufg(np)[*],stat=info)
if (info == 0) allocate(clear[*],stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray events allocation')
goto 9999
end if
else
if (last_clear_count>0) &
& event wait(clear,until_count=last_clear_count)
end if
if (psb_size(buffer) < xchg%max_buffer_size) then
!
! By construction, max_buffer_size was computed with a collective.
!
if (allocated(buffer)) deallocate(buffer)
if (allocated(sndbuf)) deallocate(sndbuf)
!write(*,*) 'Allocating buffer',xchg%max_buffer_size
allocate(buffer(xchg%max_buffer_size)[*],stat=info)
if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray buffer allocation')
goto 9999
end if
end if
if (.true.) then
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,m,xchg%loc_rcv_idx(p1:p2),y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,m,xchg%loc_snd_idx(p1:p2),buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
else
!sync all
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,m,xchg%loc_rcv_idx(p1:p2),&
& y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,m,xchg%loc_snd_idx(p1:p2),&
& buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ictxt,err_act)
return
end subroutine psi_sswaptran_xchg_m
subroutine psi_stranidxm(iictxt,iicomm,flag,n,beta,y,idx,totxch,totsnd,totrcv,work,info) subroutine psi_stranidxm(iictxt,iicomm,flag,n,beta,y,idx,totxch,totsnd,totrcv,work,info)
use psi_mod, psb_protect_name => psi_stranidxm use psi_mod, psb_protect_name => psi_stranidxm
@ -646,6 +806,166 @@ subroutine psi_sswaptranv(flag,beta,y,desc_a,work,info,data)
return return
end subroutine psi_sswaptranv end subroutine psi_sswaptranv
subroutine psi_sswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info)
use psi_mod, psb_protect_name => psi_sswaptran_xchg_v
use psb_error_mod
use psb_realloc_mod
use psb_desc_mod
use psb_penv_mod
use psb_s_base_vect_mod
use iso_fortran_env
implicit none
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
real(psb_spk_) :: y(:)
real(psb_spk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
! locals
integer(psb_mpik_) :: ictxt, icomm, np, me,&
& proc_to_comm, p2ptag, iret
integer(psb_ipk_) :: nesd, nerv,&
& err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,&
& snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself
integer :: count
real(psb_spk_), allocatable, save :: buffer(:)[:], sndbuf(:)
type(event_type), allocatable, save :: ufg(:)[:]
type(event_type), allocatable, save :: clear[:]
integer, save :: last_clear_count = 0
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
print*,' call psi_dswaptran_xchg_v'
info=psb_success_
name='psi_swap_datav'
call psb_erractionsave(err_act)
ictxt = iictxt
icomm = iicomm
call psb_info(ictxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
n=1
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
if (.not.(do_send.and.do_recv)) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Unimplemented case in xchg_vect')
goto 9999
end if
if (.not.allocated(ufg)) then
!write(*,*) 'Allocating events',np
allocate(ufg(np)[*],stat=info)
if (info == 0) allocate(clear[*],stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray events allocation')
goto 9999
end if
else
if (last_clear_count>0) &
& event wait(clear,until_count=last_clear_count)
end if
if (psb_size(buffer) < xchg%max_buffer_size) then
!
! By construction, max_buffer_size was computed with a collective.
!
if (allocated(buffer)) deallocate(buffer)
if (allocated(sndbuf)) deallocate(sndbuf)
!write(*,*) 'Allocating buffer',xchg%max_buffer_size
allocate(buffer(xchg%max_buffer_size)[*],stat=info)
if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray buffer allocation')
goto 9999
end if
end if
if (.true.) then
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,xchg%loc_rcv_idx(p1:p2),y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,xchg%loc_snd_idx(p1:p2),buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
else
!sync all
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,xchg%loc_rcv_idx(p1:p2),&
& y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,xchg%loc_snd_idx(p1:p2),&
& buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ictxt,err_act)
return
end subroutine psi_sswaptran_xchg_v
! !
! !
@ -1111,6 +1431,187 @@ subroutine psi_sswaptran_vect(flag,beta,y,desc_a,work,info,data)
return return
end subroutine psi_sswaptran_vect end subroutine psi_sswaptran_vect
subroutine psi_sswaptran_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info)
use psi_mod, psb_protect_name => psi_sswaptran_xchg_vect
use psb_error_mod
use psb_realloc_mod
use psb_desc_mod
use psb_penv_mod
use psb_s_base_vect_mod
use iso_fortran_env
implicit none
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
class(psb_s_base_vect_type) :: y
real(psb_spk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
! locals
integer(psb_mpik_) :: ictxt, icomm, np, me,&
& proc_to_comm, p2ptag, iret
integer(psb_ipk_) :: nesd, nerv,&
& err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,&
& snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself
integer :: count
real(psb_spk_), allocatable, save :: buffer(:)[:], sndbuf(:)
type(event_type), allocatable, save :: ufg(:)[:]
type(event_type), allocatable, save :: clear[:]
integer, save :: last_clear_count = 0
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
info=psb_success_
name='psi_tran_xchg_vect'
call psb_erractionsave(err_act)
ictxt = iictxt
icomm = iicomm
call psb_info(ictxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
if (np /= num_images()) then
write(*,*) 'Something is wrong MPI vs CAF ', np, num_images()
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Num_images /= np')
goto 9999
end if
n=1
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
if (.not.(do_send.and.do_recv)) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Unimplemented case in xchg_vect')
goto 9999
end if
if (.not.allocated(ufg)) then
!write(*,*) 'Allocating events',np
allocate(ufg(np)[*],stat=info)
if (info == 0) allocate(clear[*],stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray events allocation')
goto 9999
end if
else
if (last_clear_count>0) &
& event wait(clear,until_count=last_clear_count)
end if
if (psb_size(buffer) < xchg%max_buffer_size) then
!
! By construction, max_buffer_size was computed with a collective.
!
if (allocated(buffer)) deallocate(buffer)
!write(*,*) 'Allocating buffer',xchg%max_buffer_size
allocate(buffer(xchg%max_buffer_size)[*],stat=info)
if (allocated(sndbuf)) deallocate(sndbuf)
if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray buffer allocation')
goto 9999
end if
end if
if (.false.) then
!sync all
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call y%gth(isz,xchg%loc_rcv_idx(p1:p2),buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call y%sct(isz,xchg%loc_snd_idx(p1:p2),buffer(rp1:rp2)[img],beta)
event post(clear[img])
end do
last_clear_count = nxch
else
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',rp1,rp2
if (.false.) then
call y%gth(isz,xchg%loc_rcv_idx(p1:p2),buffer(rp1:rp2)[img])
else
call y%gth(isz,xchg%loc_rcv_idx(p1:p2),sndbuf(p1:p2))
buffer(rp1:rp2)[img] = sndbuf(p1:p2)
end if
end do
!
! Doing event post later should provide more opportunities for
! overlap
!
do ip= 1, nxch
img = xchg%prcs_xch(ip) + 1
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,' boundaries: ',p1,p2
call y%sct(isz,xchg%loc_snd_idx(p1:p2),buffer(p1:p2),beta)
event post(clear[img])
end do
last_clear_count = nxch
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ictxt,err_act)
return
end subroutine psi_sswaptran_xchg_vect
! !

@ -74,6 +74,14 @@ module psi_d_mod
type(psb_desc_type), target :: desc_a type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data integer(psb_ipk_), optional :: data
end subroutine psi_dswapdata_multivect end subroutine psi_dswapdata_multivect
subroutine psi_dswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info)
import
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag,m
integer(psb_ipk_), intent(out) :: info
real(psb_dpk_) :: y(:,:)
real(psb_dpk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
end subroutine psi_dswap_xchg_m
subroutine psi_dswapidxm(ictxt,icomm,flag,n,beta,y,idx,& subroutine psi_dswapidxm(ictxt,icomm,flag,n,beta,y,idx,&
& totxch,totsnd,totrcv,work,info) & totxch,totsnd,totrcv,work,info)
import import
@ -83,6 +91,14 @@ module psi_d_mod
real(psb_dpk_),target :: work(:) real(psb_dpk_),target :: work(:)
integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd,totrcv integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd,totrcv
end subroutine psi_dswapidxm end subroutine psi_dswapidxm
subroutine psi_dswap_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info)
import
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
real(psb_dpk_) :: y(:)
real(psb_dpk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
end subroutine psi_dswap_xchg_v
subroutine psi_dswapidxv(ictxt,icomm,flag,beta,y,idx,& subroutine psi_dswapidxv(ictxt,icomm,flag,beta,y,idx,&
& totxch,totsnd,totrcv,work,info) & totxch,totsnd,totrcv,work,info)
import import
@ -164,6 +180,14 @@ module psi_d_mod
type(psb_desc_type), target :: desc_a type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data integer(psb_ipk_), optional :: data
end subroutine psi_dswaptran_multivect end subroutine psi_dswaptran_multivect
subroutine psi_dswaptran_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info)
import
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag,m
integer(psb_ipk_), intent(out) :: info
real(psb_dpk_) :: y(:,:)
real(psb_dpk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
end subroutine psi_dswaptran_xchg_m
subroutine psi_dtranidxm(ictxt,icomm,flag,n,beta,y,idx,& subroutine psi_dtranidxm(ictxt,icomm,flag,n,beta,y,idx,&
& totxch,totsnd,totrcv,work,info) & totxch,totsnd,totrcv,work,info)
import import
@ -173,6 +197,14 @@ module psi_d_mod
real(psb_dpk_),target :: work(:) real(psb_dpk_),target :: work(:)
integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd,totrcv integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd,totrcv
end subroutine psi_dtranidxm end subroutine psi_dtranidxm
subroutine psi_dswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info)
import
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
real(psb_dpk_) :: y(:)
real(psb_dpk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
end subroutine psi_dswaptran_xchg_v
subroutine psi_dtranidxv(ictxt,icomm,flag,beta,y,idx,& subroutine psi_dtranidxv(ictxt,icomm,flag,beta,y,idx,&
& totxch,totsnd,totrcv,work,info) & totxch,totsnd,totrcv,work,info)
import import
@ -182,6 +214,14 @@ module psi_d_mod
real(psb_dpk_),target :: work(:) real(psb_dpk_),target :: work(:)
integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd,totrcv integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd,totrcv
end subroutine psi_dtranidxv end subroutine psi_dtranidxv
subroutine psi_dswaptran_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info)
import
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
class(psb_d_base_vect_type) :: y
real(psb_dpk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
end subroutine psi_dswaptran_xchg_vect
subroutine psi_dtran_vidx_vect(iictxt,iicomm,flag,beta,y,idx,& subroutine psi_dtran_vidx_vect(iictxt,iicomm,flag,beta,y,idx,&
& totxch,totsnd,totrcv,work,info) & totxch,totsnd,totrcv,work,info)
import import

@ -30,7 +30,7 @@
!!$ !!$
!!$ !!$
module psi_s_mod module psi_s_mod
use psb_desc_mod, only : psb_desc_type, psb_ipk_, psb_spk_, psb_i_base_vect_type use psb_desc_mod, only : psb_desc_type, psb_ipk_, psb_spk_, psb_i_base_vect_type, psb_xch_idx_type
use psb_s_base_vect_mod, only : psb_s_base_vect_type use psb_s_base_vect_mod, only : psb_s_base_vect_type
use psb_s_base_multivect_mod, only : psb_s_base_multivect_type use psb_s_base_multivect_mod, only : psb_s_base_multivect_type
@ -74,6 +74,14 @@ module psi_s_mod
type(psb_desc_type), target :: desc_a type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data integer(psb_ipk_), optional :: data
end subroutine psi_sswapdata_multivect end subroutine psi_sswapdata_multivect
subroutine psi_sswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info)
import
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag,m
integer(psb_ipk_), intent(out) :: info
real(psb_spk_) :: y(:,:)
real(psb_spk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
end subroutine psi_sswap_xchg_m
subroutine psi_sswapidxm(ictxt,icomm,flag,n,beta,y,idx,& subroutine psi_sswapidxm(ictxt,icomm,flag,n,beta,y,idx,&
& totxch,totsnd,totrcv,work,info) & totxch,totsnd,totrcv,work,info)
import import
@ -83,6 +91,14 @@ module psi_s_mod
real(psb_spk_),target :: work(:) real(psb_spk_),target :: work(:)
integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd,totrcv integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd,totrcv
end subroutine psi_sswapidxm end subroutine psi_sswapidxm
subroutine psi_sswap_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info)
import
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
real(psb_spk_) :: y(:)
real(psb_spk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
end subroutine psi_sswap_xchg_v
subroutine psi_sswapidxv(ictxt,icomm,flag,beta,y,idx,& subroutine psi_sswapidxv(ictxt,icomm,flag,beta,y,idx,&
& totxch,totsnd,totrcv,work,info) & totxch,totsnd,totrcv,work,info)
import import
@ -92,6 +108,14 @@ module psi_s_mod
real(psb_spk_),target :: work(:) real(psb_spk_),target :: work(:)
integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd,totrcv integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd,totrcv
end subroutine psi_sswapidxv end subroutine psi_sswapidxv
subroutine psi_sswap_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info)
import
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
class(psb_s_base_vect_type) :: y
real(psb_spk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
end subroutine psi_sswap_xchg_vect
subroutine psi_sswap_vidx_vect(iictxt,iicomm,flag,beta,y,idx,& subroutine psi_sswap_vidx_vect(iictxt,iicomm,flag,beta,y,idx,&
& totxch,totsnd,totrcv,work,info) & totxch,totsnd,totrcv,work,info)
import import
@ -156,6 +180,14 @@ module psi_s_mod
type(psb_desc_type), target :: desc_a type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data integer(psb_ipk_), optional :: data
end subroutine psi_sswaptran_multivect end subroutine psi_sswaptran_multivect
subroutine psi_sswaptran_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info)
import
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag,m
integer(psb_ipk_), intent(out) :: info
real(psb_spk_) :: y(:,:)
real(psb_spk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
end subroutine psi_sswaptran_xchg_m
subroutine psi_stranidxm(ictxt,icomm,flag,n,beta,y,idx,& subroutine psi_stranidxm(ictxt,icomm,flag,n,beta,y,idx,&
& totxch,totsnd,totrcv,work,info) & totxch,totsnd,totrcv,work,info)
import import
@ -165,6 +197,14 @@ module psi_s_mod
real(psb_spk_),target :: work(:) real(psb_spk_),target :: work(:)
integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd,totrcv integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd,totrcv
end subroutine psi_stranidxm end subroutine psi_stranidxm
subroutine psi_sswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info)
import
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
real(psb_spk_) :: y(:)
real(psb_spk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
end subroutine psi_sswaptran_xchg_v
subroutine psi_stranidxv(ictxt,icomm,flag,beta,y,idx,& subroutine psi_stranidxv(ictxt,icomm,flag,beta,y,idx,&
& totxch,totsnd,totrcv,work,info) & totxch,totsnd,totrcv,work,info)
import import
@ -174,6 +214,14 @@ module psi_s_mod
real(psb_spk_),target :: work(:) real(psb_spk_),target :: work(:)
integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd,totrcv integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd,totrcv
end subroutine psi_stranidxv end subroutine psi_stranidxv
subroutine psi_sswaptran_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info)
import
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
class(psb_s_base_vect_type) :: y
real(psb_spk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
end subroutine psi_sswaptran_xchg_vect
subroutine psi_stran_vidx_vect(iictxt,iicomm,flag,beta,y,idx,& subroutine psi_stran_vidx_vect(iictxt,iicomm,flag,beta,y,idx,&
& totxch,totsnd,totrcv,work,info) & totxch,totsnd,totrcv,work,info)
import import

@ -298,7 +298,6 @@ subroutine psi_dgthzv(n,idx,x,y)
! Locals ! Locals
integer(psb_ipk_) :: i integer(psb_ipk_) :: i
do i=1,n do i=1,n
y(i)=x(idx(i)) y(i)=x(idx(i))
end do end do

Loading…
Cancel
Save