added psb_xchg_v psb_xchg_vect psb_xchg_m psb_xchgtran_v psb_xchg_tran_vect psb_xchgtran_m for single/double real/complex

psblas3-caf
Ambra Abdullahi 8 years ago
parent 6b2eb8aae6
commit b66e4a3492

@ -152,6 +152,167 @@ subroutine psi_cswapdatam(flag,n,beta,y,desc_a,work,info,data)
return
end subroutine psi_cswapdatam
subroutine psi_cswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info)
use psi_mod, psb_protect_name => psi_cswap_xchg_m
use psb_error_mod
use psb_realloc_mod
use psb_desc_mod
use psb_penv_mod
use psb_c_base_vect_mod
use iso_fortran_env
implicit none
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag, m
integer(psb_ipk_), intent(out) :: info
complex(psb_spk_) :: y(:,:)
complex(psb_spk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
! locals
integer(psb_mpik_) :: ictxt, icomm, np, me,&
& proc_to_comm, p2ptag, iret
integer(psb_ipk_) :: nesd, nerv,&
& err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,&
& snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself
integer :: count
complex(psb_spk_), allocatable, save :: buffer(:)[:], sndbuf(:)
type(event_type), allocatable, save :: ufg(:)[:]
type(event_type), allocatable, save :: clear[:]
integer, save :: last_clear_count = 0
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
print*,' call psi_cswap_xchg_m'
info=psb_success_
name='psi_swap_datav'
call psb_erractionsave(err_act)
ictxt = iictxt
icomm = iicomm
call psb_info(ictxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
n=1
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
if (.not.(do_send.and.do_recv)) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Unimplemented case in xchg_vect')
goto 9999
end if
if (.not.allocated(ufg)) then
!write(*,*) 'Allocating events',np
allocate(ufg(np)[*],stat=info)
if (info == 0) allocate(clear[*],stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray events allocation')
goto 9999
end if
else
if (last_clear_count>0) &
& event wait(clear,until_count=last_clear_count)
end if
if (psb_size(buffer) < xchg%max_buffer_size) then
!
! By construction, max_buffer_size was computed with a collective.
!
if (allocated(buffer)) deallocate(buffer)
if (allocated(sndbuf)) deallocate(sndbuf)
!write(*,*) 'Allocating buffer',xchg%max_buffer_size
allocate(buffer(xchg%max_buffer_size)[*],stat=info)
if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray buffer allocation')
goto 9999
end if
end if
if (.false.) then
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,m,xchg%loc_snd_idx(p1:p2),y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,m,xchg%loc_rcv_idx(p1:p2),buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
else
!sync all
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,m,xchg%loc_snd_idx(p1:p2),&
& y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,m,xchg%loc_rcv_idx(p1:p2),&
& buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ictxt,err_act)
return
end subroutine psi_cswap_xchg_m
subroutine psi_cswapidxm(iictxt,iicomm,flag,n,beta,y,idx, &
& totxch,totsnd,totrcv,work,info)
@ -643,6 +804,165 @@ subroutine psi_cswapdatav(flag,beta,y,desc_a,work,info,data)
return
end subroutine psi_cswapdatav
subroutine psi_cswap_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info)
use psi_mod, psb_protect_name => psi_cswap_xchg_v
use psb_error_mod
use psb_realloc_mod
use psb_desc_mod
use psb_penv_mod
use psb_c_base_vect_mod
use iso_fortran_env
implicit none
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
complex(psb_spk_) :: y(:)
complex(psb_spk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
! locals
integer(psb_mpik_) :: ictxt, icomm, np, me,&
& proc_to_comm, p2ptag, iret
integer(psb_ipk_) :: nesd, nerv,&
& err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,&
& snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself
integer :: count
complex(psb_spk_), allocatable, save :: buffer(:)[:], sndbuf(:)
type(event_type), allocatable, save :: ufg(:)[:]
type(event_type), allocatable, save :: clear[:]
integer, save :: last_clear_count = 0
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
info=psb_success_
name='psi_swap_datav'
call psb_erractionsave(err_act)
ictxt = iictxt
icomm = iicomm
call psb_info(ictxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
n=1
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
if (.not.(do_send.and.do_recv)) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Unimplemented case in xchg_vect')
goto 9999
end if
if (.not.allocated(ufg)) then
!write(*,*) 'Allocating events',np
allocate(ufg(np)[*],stat=info)
if (info == 0) allocate(clear[*],stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray events allocation')
goto 9999
end if
else
if (last_clear_count>0) &
& event wait(clear,until_count=last_clear_count)
end if
if (psb_size(buffer) < xchg%max_buffer_size) then
!
! By construction, max_buffer_size was computed with a collective.
!
if (allocated(buffer)) deallocate(buffer)
if (allocated(sndbuf)) deallocate(sndbuf)
!write(*,*) 'Allocating buffer',xchg%max_buffer_size
allocate(buffer(xchg%max_buffer_size)[*],stat=info)
if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray buffer allocation')
goto 9999
end if
end if
if (.false.) then
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,xchg%loc_snd_idx(p1:p2),y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,xchg%loc_rcv_idx(p1:p2),buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
else
!sync all
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,xchg%loc_snd_idx(p1:p2),&
& y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,xchg%loc_rcv_idx(p1:p2),&
& buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ictxt,err_act)
return
end subroutine psi_cswap_xchg_v
!
@ -1090,6 +1410,188 @@ subroutine psi_cswapdata_vect(flag,beta,y,desc_a,work,info,data)
return
end subroutine psi_cswapdata_vect
subroutine psi_cswap_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info)
use psi_mod, psb_protect_name => psi_cswap_xchg_vect
use psb_error_mod
use psb_realloc_mod
use psb_desc_mod
use psb_penv_mod
use psb_d_base_vect_mod
use iso_fortran_env
implicit none
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
class(psb_c_base_vect_type) :: y
complex(psb_spk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
! locals
integer(psb_mpik_) :: ictxt, icomm, np, me,&
& proc_to_comm, p2ptag, iret
integer(psb_ipk_) :: nesd, nerv,&
& err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,&
& snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself
integer :: count
complex(psb_spk_), allocatable, save :: buffer(:)[:], sndbuf(:)
type(event_type), allocatable, save :: ufg(:)[:]
type(event_type), allocatable, save :: clear[:]
integer, save :: last_clear_count = 0
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
info=psb_success_
name='psi_xchg_vect'
call psb_erractionsave(err_act)
ictxt = iictxt
icomm = iicomm
call psb_info(ictxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
if (np /= num_images()) then
write(*,*) 'Something is wrong MPI vs CAF ', np, num_images()
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Num_images /= np')
goto 9999
end if
n=1
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
if (.not.(do_send.and.do_recv)) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Unimplemented case in xchg_vect')
goto 9999
end if
if (.not.allocated(ufg)) then
!write(*,*) 'Allocating events',np
allocate(ufg(np)[*],stat=info)
if (info == 0) allocate(clear[*],stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray events allocation')
goto 9999
end if
else
if (last_clear_count>0) &
& event wait(clear,until_count=last_clear_count)
end if
if (psb_size(buffer) < xchg%max_buffer_size) then
!
! By construction, max_buffer_size was computed with a collective.
!
if (allocated(buffer)) deallocate(buffer)
!write(*,*) 'Allocating buffer',xchg%max_buffer_size
allocate(buffer(xchg%max_buffer_size)[*],stat=info)
if (allocated(sndbuf)) deallocate(sndbuf)
if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray buffer allocation')
goto 9999
end if
end if
if (.false.) then
!sync all
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call y%gth(isz,xchg%loc_snd_idx(p1:p2),buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call y%sct(isz,xchg%loc_rcv_idx(p1:p2),buffer(rp1:rp2)[img],beta)
event post(clear[img])
end do
last_clear_count = nxch
else
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',rp1,rp2
if (.false.) then
call y%gth(isz,xchg%loc_snd_idx(p1:p2),buffer(rp1:rp2)[img])
else
call y%gth(isz,xchg%loc_snd_idx(p1:p2),sndbuf(p1:p2))
buffer(rp1:rp2)[img] = sndbuf(p1:p2)
end if
end do
!
! Doing event post later should provide more opportunities for
! overlap
!
do ip= 1, nxch
img = xchg%prcs_xch(ip) + 1
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,' boundaries: ',p1,p2
call y%sct(isz,xchg%loc_rcv_idx(p1:p2),buffer(p1:p2),beta)
event post(clear[img])
end do
last_clear_count = nxch
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ictxt,err_act)
return
end subroutine psi_cswap_xchg_vect
!
!

@ -157,6 +157,166 @@ subroutine psi_cswaptranm(flag,n,beta,y,desc_a,work,info,data)
return
end subroutine psi_cswaptranm
subroutine psi_cswaptran_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info)
use psi_mod, psb_protect_name => psi_cswaptran_xchg_m
use psb_error_mod
use psb_realloc_mod
use psb_desc_mod
use psb_penv_mod
use psb_c_base_vect_mod
use iso_fortran_env
implicit none
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag, m
integer(psb_ipk_), intent(out) :: info
complex(psb_spk_) :: y(:,:)
complex(psb_spk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
! locals
integer(psb_mpik_) :: ictxt, icomm, np, me,&
& proc_to_comm, p2ptag, iret
integer(psb_ipk_) :: nesd, nerv,&
& err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,&
& snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself
integer :: count
complex(psb_spk_), allocatable, save :: buffer(:)[:], sndbuf(:)
type(event_type), allocatable, save :: ufg(:)[:]
type(event_type), allocatable, save :: clear[:]
integer, save :: last_clear_count = 0
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
info=psb_success_
name='psi_swaptran_datam'
call psb_erractionsave(err_act)
ictxt = iictxt
icomm = iicomm
call psb_info(ictxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
n=1
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
if (.not.(do_send.and.do_recv)) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Unimplemented case in tran_xchg_vm')
goto 9999
end if
if (.not.allocated(ufg)) then
!write(*,*) 'Allocating events',np
allocate(ufg(np)[*],stat=info)
if (info == 0) allocate(clear[*],stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray events allocation')
goto 9999
end if
else
if (last_clear_count>0) &
& event wait(clear,until_count=last_clear_count)
end if
if (psb_size(buffer) < xchg%max_buffer_size) then
!
! By construction, max_buffer_size was computed with a collective.
!
if (allocated(buffer)) deallocate(buffer)
if (allocated(sndbuf)) deallocate(sndbuf)
!write(*,*) 'Allocating buffer',xchg%max_buffer_size
allocate(buffer(xchg%max_buffer_size)[*],stat=info)
if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray buffer allocation')
goto 9999
end if
end if
if (.true.) then
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,m,xchg%loc_rcv_idx(p1:p2),y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,m,xchg%loc_snd_idx(p1:p2),buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
else
!sync all
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,m,xchg%loc_rcv_idx(p1:p2),&
& y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,m,xchg%loc_snd_idx(p1:p2),&
& buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ictxt,err_act)
return
end subroutine psi_cswaptran_xchg_m
subroutine psi_ctranidxm(iictxt,iicomm,flag,n,beta,y,idx,totxch,totsnd,totrcv,work,info)
use psi_mod, psb_protect_name => psi_ctranidxm
@ -646,6 +806,166 @@ subroutine psi_cswaptranv(flag,beta,y,desc_a,work,info,data)
return
end subroutine psi_cswaptranv
subroutine psi_cswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info)
use psi_mod, psb_protect_name => psi_cswaptran_xchg_v
use psb_error_mod
use psb_realloc_mod
use psb_desc_mod
use psb_penv_mod
use psb_c_base_vect_mod
use iso_fortran_env
implicit none
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
complex(psb_spk_) :: y(:)
complex(psb_spk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
! locals
integer(psb_mpik_) :: ictxt, icomm, np, me,&
& proc_to_comm, p2ptag, iret
integer(psb_ipk_) :: nesd, nerv,&
& err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,&
& snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself
integer :: count
complex(psb_spk_), allocatable, save :: buffer(:)[:], sndbuf(:)
type(event_type), allocatable, save :: ufg(:)[:]
type(event_type), allocatable, save :: clear[:]
integer, save :: last_clear_count = 0
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
print*,' call psi_dswaptran_xchg_v'
info=psb_success_
name='psi_swap_datav'
call psb_erractionsave(err_act)
ictxt = iictxt
icomm = iicomm
call psb_info(ictxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
n=1
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
if (.not.(do_send.and.do_recv)) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Unimplemented case in xchg_vect')
goto 9999
end if
if (.not.allocated(ufg)) then
!write(*,*) 'Allocating events',np
allocate(ufg(np)[*],stat=info)
if (info == 0) allocate(clear[*],stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray events allocation')
goto 9999
end if
else
if (last_clear_count>0) &
& event wait(clear,until_count=last_clear_count)
end if
if (psb_size(buffer) < xchg%max_buffer_size) then
!
! By construction, max_buffer_size was computed with a collective.
!
if (allocated(buffer)) deallocate(buffer)
if (allocated(sndbuf)) deallocate(sndbuf)
!write(*,*) 'Allocating buffer',xchg%max_buffer_size
allocate(buffer(xchg%max_buffer_size)[*],stat=info)
if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray buffer allocation')
goto 9999
end if
end if
if (.true.) then
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,xchg%loc_rcv_idx(p1:p2),y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,xchg%loc_snd_idx(p1:p2),buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
else
!sync all
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,xchg%loc_rcv_idx(p1:p2),&
& y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,xchg%loc_snd_idx(p1:p2),&
& buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ictxt,err_act)
return
end subroutine psi_cswaptran_xchg_v
!
!
@ -1111,6 +1431,187 @@ subroutine psi_cswaptran_vect(flag,beta,y,desc_a,work,info,data)
return
end subroutine psi_cswaptran_vect
subroutine psi_cswaptran_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info)
use psi_mod, psb_protect_name => psi_cswaptran_xchg_vect
use psb_error_mod
use psb_realloc_mod
use psb_desc_mod
use psb_penv_mod
use psb_c_base_vect_mod
use iso_fortran_env
implicit none
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
class(psb_c_base_vect_type) :: y
complex(psb_spk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
! locals
integer(psb_mpik_) :: ictxt, icomm, np, me,&
& proc_to_comm, p2ptag, iret
integer(psb_ipk_) :: nesd, nerv,&
& err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,&
& snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself
integer :: count
complex(psb_spk_), allocatable, save :: buffer(:)[:], sndbuf(:)
type(event_type), allocatable, save :: ufg(:)[:]
type(event_type), allocatable, save :: clear[:]
integer, save :: last_clear_count = 0
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
info=psb_success_
name='psi_tran_xchg_vect'
call psb_erractionsave(err_act)
ictxt = iictxt
icomm = iicomm
call psb_info(ictxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
if (np /= num_images()) then
write(*,*) 'Something is wrong MPI vs CAF ', np, num_images()
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Num_images /= np')
goto 9999
end if
n=1
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
if (.not.(do_send.and.do_recv)) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Unimplemented case in xchg_vect')
goto 9999
end if
if (.not.allocated(ufg)) then
!write(*,*) 'Allocating events',np
allocate(ufg(np)[*],stat=info)
if (info == 0) allocate(clear[*],stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray events allocation')
goto 9999
end if
else
if (last_clear_count>0) &
& event wait(clear,until_count=last_clear_count)
end if
if (psb_size(buffer) < xchg%max_buffer_size) then
!
! By construction, max_buffer_size was computed with a collective.
!
if (allocated(buffer)) deallocate(buffer)
!write(*,*) 'Allocating buffer',xchg%max_buffer_size
allocate(buffer(xchg%max_buffer_size)[*],stat=info)
if (allocated(sndbuf)) deallocate(sndbuf)
if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray buffer allocation')
goto 9999
end if
end if
if (.false.) then
!sync all
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call y%gth(isz,xchg%loc_rcv_idx(p1:p2),buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call y%sct(isz,xchg%loc_snd_idx(p1:p2),buffer(rp1:rp2)[img],beta)
event post(clear[img])
end do
last_clear_count = nxch
else
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',rp1,rp2
if (.false.) then
call y%gth(isz,xchg%loc_rcv_idx(p1:p2),buffer(rp1:rp2)[img])
else
call y%gth(isz,xchg%loc_rcv_idx(p1:p2),sndbuf(p1:p2))
buffer(rp1:rp2)[img] = sndbuf(p1:p2)
end if
end do
!
! Doing event post later should provide more opportunities for
! overlap
!
do ip= 1, nxch
img = xchg%prcs_xch(ip) + 1
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,' boundaries: ',p1,p2
call y%sct(isz,xchg%loc_snd_idx(p1:p2),buffer(p1:p2),beta)
event post(clear[img])
end do
last_clear_count = nxch
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ictxt,err_act)
return
end subroutine psi_cswaptran_xchg_vect
!

@ -152,6 +152,167 @@ subroutine psi_zswapdatam(flag,n,beta,y,desc_a,work,info,data)
return
end subroutine psi_zswapdatam
subroutine psi_zswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info)
use psi_mod, psb_protect_name => psi_zswap_xchg_m
use psb_error_mod
use psb_realloc_mod
use psb_desc_mod
use psb_penv_mod
use psb_z_base_vect_mod
use iso_fortran_env
implicit none
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag, m
integer(psb_ipk_), intent(out) :: info
complex(psb_dpk_) :: y(:,:)
complex(psb_dpk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
! locals
integer(psb_mpik_) :: ictxt, icomm, np, me,&
& proc_to_comm, p2ptag, iret
integer(psb_ipk_) :: nesd, nerv,&
& err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,&
& snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself
integer :: count
complex(psb_dpk_), allocatable, save :: buffer(:)[:], sndbuf(:)
type(event_type), allocatable, save :: ufg(:)[:]
type(event_type), allocatable, save :: clear[:]
integer, save :: last_clear_count = 0
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
print*,' call psi_zswap_xchg_m'
info=psb_success_
name='psi_swap_datav'
call psb_erractionsave(err_act)
ictxt = iictxt
icomm = iicomm
call psb_info(ictxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
n=1
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
if (.not.(do_send.and.do_recv)) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Unimplemented case in xchg_vect')
goto 9999
end if
if (.not.allocated(ufg)) then
!write(*,*) 'Allocating events',np
allocate(ufg(np)[*],stat=info)
if (info == 0) allocate(clear[*],stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray events allocation')
goto 9999
end if
else
if (last_clear_count>0) &
& event wait(clear,until_count=last_clear_count)
end if
if (psb_size(buffer) < xchg%max_buffer_size) then
!
! By construction, max_buffer_size was computed with a collective.
!
if (allocated(buffer)) deallocate(buffer)
if (allocated(sndbuf)) deallocate(sndbuf)
!write(*,*) 'Allocating buffer',xchg%max_buffer_size
allocate(buffer(xchg%max_buffer_size)[*],stat=info)
if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray buffer allocation')
goto 9999
end if
end if
if (.false.) then
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,m,xchg%loc_snd_idx(p1:p2),y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,m,xchg%loc_rcv_idx(p1:p2),buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
else
!sync all
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,m,xchg%loc_snd_idx(p1:p2),&
& y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,m,xchg%loc_rcv_idx(p1:p2),&
& buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ictxt,err_act)
return
end subroutine psi_zswap_xchg_m
subroutine psi_zswapidxm(iictxt,iicomm,flag,n,beta,y,idx, &
& totxch,totsnd,totrcv,work,info)
@ -643,6 +804,165 @@ subroutine psi_zswapdatav(flag,beta,y,desc_a,work,info,data)
return
end subroutine psi_zswapdatav
subroutine psi_zswap_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info)
use psi_mod, psb_protect_name => psi_zswap_xchg_v
use psb_error_mod
use psb_realloc_mod
use psb_desc_mod
use psb_penv_mod
use psb_z_base_vect_mod
use iso_fortran_env
implicit none
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
complex(psb_dpk_) :: y(:)
complex(psb_dpk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
! locals
integer(psb_mpik_) :: ictxt, icomm, np, me,&
& proc_to_comm, p2ptag, iret
integer(psb_ipk_) :: nesd, nerv,&
& err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,&
& snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself
integer :: count
complex(psb_dpk_), allocatable, save :: buffer(:)[:], sndbuf(:)
type(event_type), allocatable, save :: ufg(:)[:]
type(event_type), allocatable, save :: clear[:]
integer, save :: last_clear_count = 0
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
info=psb_success_
name='psi_swap_datav'
call psb_erractionsave(err_act)
ictxt = iictxt
icomm = iicomm
call psb_info(ictxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
n=1
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
if (.not.(do_send.and.do_recv)) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Unimplemented case in xchg_vect')
goto 9999
end if
if (.not.allocated(ufg)) then
!write(*,*) 'Allocating events',np
allocate(ufg(np)[*],stat=info)
if (info == 0) allocate(clear[*],stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray events allocation')
goto 9999
end if
else
if (last_clear_count>0) &
& event wait(clear,until_count=last_clear_count)
end if
if (psb_size(buffer) < xchg%max_buffer_size) then
!
! By construction, max_buffer_size was computed with a collective.
!
if (allocated(buffer)) deallocate(buffer)
if (allocated(sndbuf)) deallocate(sndbuf)
!write(*,*) 'Allocating buffer',xchg%max_buffer_size
allocate(buffer(xchg%max_buffer_size)[*],stat=info)
if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray buffer allocation')
goto 9999
end if
end if
if (.false.) then
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,xchg%loc_snd_idx(p1:p2),y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,xchg%loc_rcv_idx(p1:p2),buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
else
!sync all
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,xchg%loc_snd_idx(p1:p2),&
& y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,xchg%loc_rcv_idx(p1:p2),&
& buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ictxt,err_act)
return
end subroutine psi_zswap_xchg_v
!
@ -1090,6 +1410,188 @@ subroutine psi_zswapdata_vect(flag,beta,y,desc_a,work,info,data)
return
end subroutine psi_zswapdata_vect
subroutine psi_zswap_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info)
use psi_mod, psb_protect_name => psi_zswap_xchg_vect
use psb_error_mod
use psb_realloc_mod
use psb_desc_mod
use psb_penv_mod
use psb_d_base_vect_mod
use iso_fortran_env
implicit none
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
class(psb_z_base_vect_type) :: y
complex(psb_dpk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
! locals
integer(psb_mpik_) :: ictxt, icomm, np, me,&
& proc_to_comm, p2ptag, iret
integer(psb_ipk_) :: nesd, nerv,&
& err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,&
& snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself
integer :: count
complex(psb_dpk_), allocatable, save :: buffer(:)[:], sndbuf(:)
type(event_type), allocatable, save :: ufg(:)[:]
type(event_type), allocatable, save :: clear[:]
integer, save :: last_clear_count = 0
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
info=psb_success_
name='psi_xchg_vect'
call psb_erractionsave(err_act)
ictxt = iictxt
icomm = iicomm
call psb_info(ictxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
if (np /= num_images()) then
write(*,*) 'Something is wrong MPI vs CAF ', np, num_images()
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Num_images /= np')
goto 9999
end if
n=1
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
if (.not.(do_send.and.do_recv)) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Unimplemented case in xchg_vect')
goto 9999
end if
if (.not.allocated(ufg)) then
!write(*,*) 'Allocating events',np
allocate(ufg(np)[*],stat=info)
if (info == 0) allocate(clear[*],stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray events allocation')
goto 9999
end if
else
if (last_clear_count>0) &
& event wait(clear,until_count=last_clear_count)
end if
if (psb_size(buffer) < xchg%max_buffer_size) then
!
! By construction, max_buffer_size was computed with a collective.
!
if (allocated(buffer)) deallocate(buffer)
!write(*,*) 'Allocating buffer',xchg%max_buffer_size
allocate(buffer(xchg%max_buffer_size)[*],stat=info)
if (allocated(sndbuf)) deallocate(sndbuf)
if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray buffer allocation')
goto 9999
end if
end if
if (.false.) then
!sync all
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call y%gth(isz,xchg%loc_snd_idx(p1:p2),buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call y%sct(isz,xchg%loc_rcv_idx(p1:p2),buffer(rp1:rp2)[img],beta)
event post(clear[img])
end do
last_clear_count = nxch
else
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',rp1,rp2
if (.false.) then
call y%gth(isz,xchg%loc_snd_idx(p1:p2),buffer(rp1:rp2)[img])
else
call y%gth(isz,xchg%loc_snd_idx(p1:p2),sndbuf(p1:p2))
buffer(rp1:rp2)[img] = sndbuf(p1:p2)
end if
end do
!
! Doing event post later should provide more opportunities for
! overlap
!
do ip= 1, nxch
img = xchg%prcs_xch(ip) + 1
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,' boundaries: ',p1,p2
call y%sct(isz,xchg%loc_rcv_idx(p1:p2),buffer(p1:p2),beta)
event post(clear[img])
end do
last_clear_count = nxch
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ictxt,err_act)
return
end subroutine psi_zswap_xchg_vect
!
!

@ -157,6 +157,166 @@ subroutine psi_zswaptranm(flag,n,beta,y,desc_a,work,info,data)
return
end subroutine psi_zswaptranm
subroutine psi_zswaptran_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info)
use psi_mod, psb_protect_name => psi_zswaptran_xchg_m
use psb_error_mod
use psb_realloc_mod
use psb_desc_mod
use psb_penv_mod
use psb_z_base_vect_mod
use iso_fortran_env
implicit none
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag, m
integer(psb_ipk_), intent(out) :: info
complex(psb_dpk_) :: y(:,:)
complex(psb_dpk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
! locals
integer(psb_mpik_) :: ictxt, icomm, np, me,&
& proc_to_comm, p2ptag, iret
integer(psb_ipk_) :: nesd, nerv,&
& err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,&
& snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself
integer :: count
complex(psb_dpk_), allocatable, save :: buffer(:)[:], sndbuf(:)
type(event_type), allocatable, save :: ufg(:)[:]
type(event_type), allocatable, save :: clear[:]
integer, save :: last_clear_count = 0
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
info=psb_success_
name='psi_swaptran_datam'
call psb_erractionsave(err_act)
ictxt = iictxt
icomm = iicomm
call psb_info(ictxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
n=1
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
if (.not.(do_send.and.do_recv)) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Unimplemented case in tran_xchg_vm')
goto 9999
end if
if (.not.allocated(ufg)) then
!write(*,*) 'Allocating events',np
allocate(ufg(np)[*],stat=info)
if (info == 0) allocate(clear[*],stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray events allocation')
goto 9999
end if
else
if (last_clear_count>0) &
& event wait(clear,until_count=last_clear_count)
end if
if (psb_size(buffer) < xchg%max_buffer_size) then
!
! By construction, max_buffer_size was computed with a collective.
!
if (allocated(buffer)) deallocate(buffer)
if (allocated(sndbuf)) deallocate(sndbuf)
!write(*,*) 'Allocating buffer',xchg%max_buffer_size
allocate(buffer(xchg%max_buffer_size)[*],stat=info)
if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray buffer allocation')
goto 9999
end if
end if
if (.true.) then
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,m,xchg%loc_rcv_idx(p1:p2),y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,m,xchg%loc_snd_idx(p1:p2),buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
else
!sync all
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,m,xchg%loc_rcv_idx(p1:p2),&
& y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,m,xchg%loc_snd_idx(p1:p2),&
& buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ictxt,err_act)
return
end subroutine psi_zswaptran_xchg_m
subroutine psi_ztranidxm(iictxt,iicomm,flag,n,beta,y,idx,totxch,totsnd,totrcv,work,info)
use psi_mod, psb_protect_name => psi_ztranidxm
@ -646,6 +806,166 @@ subroutine psi_zswaptranv(flag,beta,y,desc_a,work,info,data)
return
end subroutine psi_zswaptranv
subroutine psi_zswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info)
use psi_mod, psb_protect_name => psi_zswaptran_xchg_v
use psb_error_mod
use psb_realloc_mod
use psb_desc_mod
use psb_penv_mod
use psb_z_base_vect_mod
use iso_fortran_env
implicit none
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
complex(psb_dpk_) :: y(:)
complex(psb_dpk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
! locals
integer(psb_mpik_) :: ictxt, icomm, np, me,&
& proc_to_comm, p2ptag, iret
integer(psb_ipk_) :: nesd, nerv,&
& err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,&
& snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself
integer :: count
complex(psb_dpk_), allocatable, save :: buffer(:)[:], sndbuf(:)
type(event_type), allocatable, save :: ufg(:)[:]
type(event_type), allocatable, save :: clear[:]
integer, save :: last_clear_count = 0
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
print*,' call psi_dswaptran_xchg_v'
info=psb_success_
name='psi_swap_datav'
call psb_erractionsave(err_act)
ictxt = iictxt
icomm = iicomm
call psb_info(ictxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
n=1
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
if (.not.(do_send.and.do_recv)) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Unimplemented case in xchg_vect')
goto 9999
end if
if (.not.allocated(ufg)) then
!write(*,*) 'Allocating events',np
allocate(ufg(np)[*],stat=info)
if (info == 0) allocate(clear[*],stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray events allocation')
goto 9999
end if
else
if (last_clear_count>0) &
& event wait(clear,until_count=last_clear_count)
end if
if (psb_size(buffer) < xchg%max_buffer_size) then
!
! By construction, max_buffer_size was computed with a collective.
!
if (allocated(buffer)) deallocate(buffer)
if (allocated(sndbuf)) deallocate(sndbuf)
!write(*,*) 'Allocating buffer',xchg%max_buffer_size
allocate(buffer(xchg%max_buffer_size)[*],stat=info)
if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray buffer allocation')
goto 9999
end if
end if
if (.true.) then
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,xchg%loc_rcv_idx(p1:p2),y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,xchg%loc_snd_idx(p1:p2),buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
else
!sync all
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,xchg%loc_rcv_idx(p1:p2),&
& y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,xchg%loc_snd_idx(p1:p2),&
& buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ictxt,err_act)
return
end subroutine psi_zswaptran_xchg_v
!
!
@ -1111,6 +1431,187 @@ subroutine psi_zswaptran_vect(flag,beta,y,desc_a,work,info,data)
return
end subroutine psi_zswaptran_vect
subroutine psi_zswaptran_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info)
use psi_mod, psb_protect_name => psi_zswaptran_xchg_vect
use psb_error_mod
use psb_realloc_mod
use psb_desc_mod
use psb_penv_mod
use psb_z_base_vect_mod
use iso_fortran_env
implicit none
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
class(psb_z_base_vect_type) :: y
complex(psb_dpk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
! locals
integer(psb_mpik_) :: ictxt, icomm, np, me,&
& proc_to_comm, p2ptag, iret
integer(psb_ipk_) :: nesd, nerv,&
& err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,&
& snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself
integer :: count
complex(psb_dpk_), allocatable, save :: buffer(:)[:], sndbuf(:)
type(event_type), allocatable, save :: ufg(:)[:]
type(event_type), allocatable, save :: clear[:]
integer, save :: last_clear_count = 0
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
info=psb_success_
name='psi_tran_xchg_vect'
call psb_erractionsave(err_act)
ictxt = iictxt
icomm = iicomm
call psb_info(ictxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
if (np /= num_images()) then
write(*,*) 'Something is wrong MPI vs CAF ', np, num_images()
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Num_images /= np')
goto 9999
end if
n=1
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
if (.not.(do_send.and.do_recv)) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Unimplemented case in xchg_vect')
goto 9999
end if
if (.not.allocated(ufg)) then
!write(*,*) 'Allocating events',np
allocate(ufg(np)[*],stat=info)
if (info == 0) allocate(clear[*],stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray events allocation')
goto 9999
end if
else
if (last_clear_count>0) &
& event wait(clear,until_count=last_clear_count)
end if
if (psb_size(buffer) < xchg%max_buffer_size) then
!
! By construction, max_buffer_size was computed with a collective.
!
if (allocated(buffer)) deallocate(buffer)
!write(*,*) 'Allocating buffer',xchg%max_buffer_size
allocate(buffer(xchg%max_buffer_size)[*],stat=info)
if (allocated(sndbuf)) deallocate(sndbuf)
if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray buffer allocation')
goto 9999
end if
end if
if (.false.) then
!sync all
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call y%gth(isz,xchg%loc_rcv_idx(p1:p2),buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call y%sct(isz,xchg%loc_snd_idx(p1:p2),buffer(rp1:rp2)[img],beta)
event post(clear[img])
end do
last_clear_count = nxch
else
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',rp1,rp2
if (.false.) then
call y%gth(isz,xchg%loc_rcv_idx(p1:p2),buffer(rp1:rp2)[img])
else
call y%gth(isz,xchg%loc_rcv_idx(p1:p2),sndbuf(p1:p2))
buffer(rp1:rp2)[img] = sndbuf(p1:p2)
end if
end do
!
! Doing event post later should provide more opportunities for
! overlap
!
do ip= 1, nxch
img = xchg%prcs_xch(ip) + 1
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,' boundaries: ',p1,p2
call y%sct(isz,xchg%loc_snd_idx(p1:p2),buffer(p1:p2),beta)
event post(clear[img])
end do
last_clear_count = nxch
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ictxt,err_act)
return
end subroutine psi_zswaptran_xchg_vect
!

@ -30,7 +30,7 @@
!!$
!!$
module psi_c_mod
use psb_desc_mod, only : psb_desc_type, psb_ipk_, psb_spk_, psb_i_base_vect_type
use psb_desc_mod, only : psb_desc_type, psb_ipk_, psb_spk_, psb_i_base_vect_type, psb_xch_idx_type
use psb_c_base_vect_mod, only : psb_c_base_vect_type
use psb_c_base_multivect_mod, only : psb_c_base_multivect_type
@ -74,6 +74,14 @@ module psi_c_mod
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
end subroutine psi_cswapdata_multivect
subroutine psi_cswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info)
import
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag,m
integer(psb_ipk_), intent(out) :: info
complex(psb_spk_) :: y(:,:)
complex(psb_spk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
end subroutine psi_cswap_xchg_m
subroutine psi_cswapidxm(ictxt,icomm,flag,n,beta,y,idx,&
& totxch,totsnd,totrcv,work,info)
import
@ -83,6 +91,14 @@ module psi_c_mod
complex(psb_spk_),target :: work(:)
integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd,totrcv
end subroutine psi_cswapidxm
subroutine psi_cswap_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info)
import
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
complex(psb_spk_) :: y(:)
complex(psb_spk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
end subroutine psi_cswap_xchg_v
subroutine psi_cswapidxv(ictxt,icomm,flag,beta,y,idx,&
& totxch,totsnd,totrcv,work,info)
import
@ -92,6 +108,14 @@ module psi_c_mod
complex(psb_spk_),target :: work(:)
integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd,totrcv
end subroutine psi_cswapidxv
subroutine psi_cswap_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info)
import
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
class(psb_c_base_vect_type) :: y
complex(psb_spk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
end subroutine psi_cswap_xchg_vect
subroutine psi_cswap_vidx_vect(iictxt,iicomm,flag,beta,y,idx,&
& totxch,totsnd,totrcv,work,info)
import
@ -156,6 +180,14 @@ module psi_c_mod
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
end subroutine psi_cswaptran_multivect
subroutine psi_cswaptran_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info)
import
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag,m
integer(psb_ipk_), intent(out) :: info
complex(psb_spk_) :: y(:,:)
complex(psb_spk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
end subroutine psi_cswaptran_xchg_m
subroutine psi_ctranidxm(ictxt,icomm,flag,n,beta,y,idx,&
& totxch,totsnd,totrcv,work,info)
import
@ -165,6 +197,14 @@ module psi_c_mod
complex(psb_spk_),target :: work(:)
integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd,totrcv
end subroutine psi_ctranidxm
subroutine psi_cswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info)
import
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
complex(psb_spk_) :: y(:)
complex(psb_spk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
end subroutine psi_cswaptran_xchg_v
subroutine psi_ctranidxv(ictxt,icomm,flag,beta,y,idx,&
& totxch,totsnd,totrcv,work,info)
import
@ -174,6 +214,14 @@ module psi_c_mod
complex(psb_spk_),target :: work(:)
integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd,totrcv
end subroutine psi_ctranidxv
subroutine psi_cswaptran_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info)
import
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
class(psb_c_base_vect_type) :: y
complex(psb_spk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
end subroutine psi_cswaptran_xchg_vect
subroutine psi_ctran_vidx_vect(iictxt,iicomm,flag,beta,y,idx,&
& totxch,totsnd,totrcv,work,info)
import

@ -30,7 +30,7 @@
!!$
!!$
module psi_z_mod
use psb_desc_mod, only : psb_desc_type, psb_ipk_, psb_dpk_, psb_i_base_vect_type
use psb_desc_mod, only : psb_desc_type, psb_ipk_, psb_dpk_, psb_i_base_vect_type, psb_xch_idx_type
use psb_z_base_vect_mod, only : psb_z_base_vect_type
use psb_z_base_multivect_mod, only : psb_z_base_multivect_type
@ -74,6 +74,14 @@ module psi_z_mod
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
end subroutine psi_zswapdata_multivect
subroutine psi_zswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info)
import
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag,m
integer(psb_ipk_), intent(out) :: info
complex(psb_dpk_) :: y(:,:)
complex(psb_dpk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
end subroutine psi_zswap_xchg_m
subroutine psi_zswapidxm(ictxt,icomm,flag,n,beta,y,idx,&
& totxch,totsnd,totrcv,work,info)
import
@ -83,6 +91,14 @@ module psi_z_mod
complex(psb_dpk_),target :: work(:)
integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd,totrcv
end subroutine psi_zswapidxm
subroutine psi_zswap_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info)
import
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
complex(psb_dpk_) :: y(:)
complex(psb_dpk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
end subroutine psi_zswap_xchg_v
subroutine psi_zswapidxv(ictxt,icomm,flag,beta,y,idx,&
& totxch,totsnd,totrcv,work,info)
import
@ -92,6 +108,14 @@ module psi_z_mod
complex(psb_dpk_),target :: work(:)
integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd,totrcv
end subroutine psi_zswapidxv
subroutine psi_zswap_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info)
import
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
class(psb_z_base_vect_type) :: y
complex(psb_dpk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
end subroutine psi_zswap_xchg_vect
subroutine psi_zswap_vidx_vect(iictxt,iicomm,flag,beta,y,idx,&
& totxch,totsnd,totrcv,work,info)
import
@ -156,6 +180,14 @@ module psi_z_mod
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
end subroutine psi_zswaptran_multivect
subroutine psi_zswaptran_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info)
import
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag,m
integer(psb_ipk_), intent(out) :: info
complex(psb_dpk_) :: y(:,:)
complex(psb_dpk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
end subroutine psi_zswaptran_xchg_m
subroutine psi_ztranidxm(ictxt,icomm,flag,n,beta,y,idx,&
& totxch,totsnd,totrcv,work,info)
import
@ -165,6 +197,14 @@ module psi_z_mod
complex(psb_dpk_),target :: work(:)
integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd,totrcv
end subroutine psi_ztranidxm
subroutine psi_zswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info)
import
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
complex(psb_dpk_) :: y(:)
complex(psb_dpk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
end subroutine psi_zswaptran_xchg_v
subroutine psi_ztranidxv(ictxt,icomm,flag,beta,y,idx,&
& totxch,totsnd,totrcv,work,info)
import
@ -174,6 +214,14 @@ module psi_z_mod
complex(psb_dpk_),target :: work(:)
integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd,totrcv
end subroutine psi_ztranidxv
subroutine psi_zswaptran_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info)
import
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
class(psb_z_base_vect_type) :: y
complex(psb_dpk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
end subroutine psi_zswaptran_xchg_vect
subroutine psi_ztran_vidx_vect(iictxt,iicomm,flag,beta,y,idx,&
& totxch,totsnd,totrcv,work,info)
import

Loading…
Cancel
Save