Added CAF collective subroutines and matdist

psblas3-caf
Ambra Abdullahi 8 years ago
parent 50d57a1acf
commit bcc9a0f39a

@ -89,6 +89,7 @@ subroutine psi_cswapdatam(flag,n,beta,y,desc_a,work,info,data)
use psb_error_mod
use psb_desc_mod
use psb_penv_mod
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -142,7 +143,7 @@ subroutine psi_cswapdatam(flag,n,beta,y,desc_a,work,info,data)
goto 9999
end if
if (.false.) then
if (.not.(if_caf2)) then
call psi_swapdata(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info)
else
call psi_swapdata(ictxt,icomm,flag,n,beta,y,d_xchg,info)
@ -188,7 +189,6 @@ subroutine psi_cswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info)
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
print*,' call psi_cswap_xchg_m'
info=psb_success_
name='psi_swap_datav'
call psb_erractionsave(err_act)
@ -755,6 +755,7 @@ subroutine psi_cswapdatav(flag,beta,y,desc_a,work,info,data)
use psb_error_mod
use psb_desc_mod
use psb_penv_mod
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -808,7 +809,7 @@ subroutine psi_cswapdatav(flag,beta,y,desc_a,work,info,data)
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
if (.false.) then
if (.not.(if_caf2)) then
call psi_swapdata(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info)
else
call psi_swapdata(ictxt,icomm,flag,beta,y,d_xchg,info)
@ -1374,6 +1375,7 @@ subroutine psi_cswapdata_vect(flag,beta,y,desc_a,work,info,data)
use psb_error_mod
use psb_desc_mod
use psb_penv_mod
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -1428,7 +1430,7 @@ subroutine psi_cswapdata_vect(flag,beta,y,desc_a,work,info,data)
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
if (.false.) then
if (.not.(if_caf2)) then
call psi_swapdata(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info)
else
call psi_swapdata(ictxt,icomm,flag,beta,y,d_xchg,info)

@ -93,6 +93,7 @@ subroutine psi_cswaptranm(flag,n,beta,y,desc_a,work,info,data)
use psb_error_mod
use psb_desc_mod
use psb_penv_mod
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -147,7 +148,7 @@ subroutine psi_cswaptranm(flag,n,beta,y,desc_a,work,info,data)
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
if (.false.) then
if (.not.(if_caf2)) then
call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info)
else
call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_xchg,info)
@ -756,6 +757,7 @@ subroutine psi_cswaptranv(flag,beta,y,desc_a,work,info,data)
use psb_error_mod
use psb_desc_mod
use psb_penv_mod
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -809,7 +811,7 @@ subroutine psi_cswaptranv(flag,beta,y,desc_a,work,info,data)
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
if (.false.) then
if (.not.(if_caf2)) then
call psi_swaptran(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info)
else
call psi_swaptran(ictxt,icomm,flag,beta,y,d_xchg,info)
@ -855,7 +857,6 @@ subroutine psi_cswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info)
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
print*,' call psi_dswaptran_xchg_v'
info=psb_success_
name='psi_swap_datav'
call psb_erractionsave(err_act)
@ -1393,6 +1394,7 @@ subroutine psi_cswaptran_vect(flag,beta,y,desc_a,work,info,data)
use psb_error_mod
use psb_desc_mod
use psb_penv_mod
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -1447,7 +1449,7 @@ subroutine psi_cswaptran_vect(flag,beta,y,desc_a,work,info,data)
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
if (.false.) then
if (.not.(if_caf2)) then
call psi_swaptran(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info)
else
call psi_swaptran(ictxt,icomm,flag,beta,y,d_xchg,info)

@ -89,6 +89,7 @@ subroutine psi_dswapdatam(flag,n,beta,y,desc_a,work,info,data)
use psb_error_mod
use psb_desc_mod
use psb_penv_mod
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -142,7 +143,7 @@ subroutine psi_dswapdatam(flag,n,beta,y,desc_a,work,info,data)
goto 9999
end if
if (.false.) then
if (.not.(if_caf2)) then
call psi_swapdata(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info)
else
call psi_swapdata(ictxt,icomm,flag,n,beta,y,d_xchg,info)
@ -190,7 +191,6 @@ subroutine psi_dswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info)
info=psb_success_
name='psi_swap_xchg_m'
print*,me
call psb_erractionsave(err_act)
ictxt = iictxt
icomm = iicomm
@ -755,6 +755,7 @@ subroutine psi_dswapdatav(flag,beta,y,desc_a,work,info,data)
use psb_error_mod
use psb_desc_mod
use psb_penv_mod
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -808,7 +809,7 @@ subroutine psi_dswapdatav(flag,beta,y,desc_a,work,info,data)
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
if (.false.) then
if (.not.(if_caf2)) then
call psi_swapdata(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info)
else
call psi_swapdata(ictxt,icomm,flag,beta,y,d_xchg,info)
@ -856,7 +857,6 @@ subroutine psi_dswap_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info)
info=psb_success_
name='psi_swap_xchg_v'
print*, name
call psb_erractionsave(err_act)
ictxt = iictxt
icomm = iicomm
@ -1375,6 +1375,7 @@ subroutine psi_dswapdata_vect(flag,beta,y,desc_a,work,info,data)
use psb_error_mod
use psb_desc_mod
use psb_penv_mod
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -1429,7 +1430,7 @@ subroutine psi_dswapdata_vect(flag,beta,y,desc_a,work,info,data)
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
if (.false.) then
if (.not.(if_caf2)) then
call psi_swapdata(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info)
else
call psi_swapdata(ictxt,icomm,flag,beta,y,d_xchg,info)
@ -1478,7 +1479,6 @@ subroutine psi_dswap_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info)
info=psb_success_
name='psi_xchg_vect'
print*,name
call psb_erractionsave(err_act)
ictxt = iictxt
icomm = iicomm
@ -1533,9 +1533,7 @@ subroutine psi_dswap_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info)
!
if (allocated(buffer)) deallocate(buffer)
!write(*,*) 'Allocating buffer',xchg%max_buffer_size
print*,'allocating buffer', me
allocate(buffer(xchg%max_buffer_size)[*],stat=info)
print*,'buffer allocated', me
if (allocated(sndbuf)) deallocate(sndbuf)
if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info)
if (info /= 0) then

@ -93,6 +93,7 @@ subroutine psi_dswaptranm(flag,n,beta,y,desc_a,work,info,data)
use psb_error_mod
use psb_desc_mod
use psb_penv_mod
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -147,7 +148,7 @@ subroutine psi_dswaptranm(flag,n,beta,y,desc_a,work,info,data)
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
if (.false.) then
if (.not.(if_caf2)) then
call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info)
else
call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_xchg,info)
@ -195,7 +196,6 @@ subroutine psi_dswaptran_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info)
info=psb_success_
name='psi_swaptran_datam'
print*,name
call psb_erractionsave(err_act)
ictxt = iictxt
icomm = iicomm
@ -757,6 +757,7 @@ subroutine psi_dswaptranv(flag,beta,y,desc_a,work,info,data)
use psb_error_mod
use psb_desc_mod
use psb_penv_mod
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -810,7 +811,7 @@ subroutine psi_dswaptranv(flag,beta,y,desc_a,work,info,data)
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
if (.false.) then
if (.not.(if_caf2)) then
call psi_swaptran(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info)
else
call psi_swaptran(ictxt,icomm,flag,beta,y,d_xchg,info)
@ -856,10 +857,8 @@ subroutine psi_dswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info)
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
print*,' call psi_dswaptran_xchg_v'
info=psb_success_
name='psi_swaptran_xchg_v'
print*,name
call psb_erractionsave(err_act)
ictxt = iictxt
icomm = iicomm
@ -1395,6 +1394,7 @@ subroutine psi_dswaptran_vect(flag,beta,y,desc_a,work,info,data)
use psb_error_mod
use psb_desc_mod
use psb_penv_mod
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -1449,7 +1449,7 @@ subroutine psi_dswaptran_vect(flag,beta,y,desc_a,work,info,data)
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
if (.false.) then
if (.not.(if_caf2)) then
call psi_swaptran(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info)
else
call psi_swaptran(ictxt,icomm,flag,beta,y,d_xchg,info)
@ -1498,7 +1498,6 @@ subroutine psi_dswaptran_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info)
info=psb_success_
name='psi_tran_xchg_vect'
print*,name
call psb_erractionsave(err_act)
ictxt = iictxt
icomm = iicomm

@ -89,6 +89,7 @@ subroutine psi_iswapdatam(flag,n,beta,y,desc_a,work,info,data)
use psb_error_mod
use psb_desc_mod
use psb_penv_mod
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -107,6 +108,7 @@ subroutine psi_iswapdatam(flag,n,beta,y,desc_a,work,info,data)
! locals
integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, data_, err_act
integer(psb_ipk_), pointer :: d_idx(:)
class(psb_xch_idx_type), pointer :: d_xchg
character(len=20) :: name
info=psb_success_
@ -134,14 +136,19 @@ subroutine psi_iswapdatam(flag,n,beta,y,desc_a,work,info,data)
data_ = psb_comm_halo_
end if
call desc_a%get_list(data_,d_idx,totxch,idxr,idxs,info)
call desc_a%get_list(data_,d_idx,totxch,idxr,idxs,info)
if (info == 0) call desc_a%get_list(data_,d_xchg,info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
if (.not.(if_caf2)) then
call psi_swapdata(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info)
else
call psi_swapdata(ictxt,icomm,flag,n,beta,y,d_xchg,info)
endif
call psi_swapdata(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info)
if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act)
@ -152,6 +159,174 @@ subroutine psi_iswapdatam(flag,n,beta,y,desc_a,work,info,data)
return
end subroutine psi_iswapdatam
subroutine psi_iswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info)
use psi_mod, psb_protect_name => psi_iswap_xchg_m
use psb_error_mod
use psb_realloc_mod
use psb_desc_mod
use psb_penv_mod
use psb_i_base_vect_mod
use iso_fortran_env
implicit none
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag, m
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: y(:,:)
integer(psb_ipk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
! locals
integer(psb_mpik_) :: ictxt, icomm, np, me,&
& proc_to_comm, p2ptag, iret
integer(psb_ipk_) :: nesd, nerv,&
& err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,&
& snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself
integer :: count
integer(psb_ipk_), allocatable, save :: buffer(:)[:], sndbuf(:)
type(event_type), allocatable, save :: ufg(:)[:]
type(event_type), allocatable, save :: clear[:]
integer, save :: last_clear_count = 0
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
info=psb_success_
name='psi_swap_datav'
call psb_erractionsave(err_act)
ictxt = iictxt
icomm = iicomm
call psb_info(ictxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
n=1
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
if (.not.(do_send.and.do_recv)) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Unimplemented case in xchg_vect')
goto 9999
end if
if (.not.allocated(ufg)) then
!write(*,*) 'Allocating events',np
allocate(ufg(np)[*],stat=info)
if (info == 0) allocate(clear[*],stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray events allocation')
goto 9999
end if
else
if (last_clear_count>0) &
& event wait(clear,until_count=last_clear_count)
end if
if (psb_size(buffer) < xchg%max_buffer_size) then
!
! By construction, max_buffer_size was computed with a collective.
!
if (allocated(buffer)) deallocate(buffer)
if (allocated(sndbuf)) deallocate(sndbuf)
!write(*,*) 'Allocating buffer',xchg%max_buffer_size
allocate(buffer(xchg%max_buffer_size)[*],stat=info)
if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray buffer allocation')
goto 9999
end if
end if
if (.false.) then
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,m,xchg%loc_snd_idx(p1:p2),y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,m,xchg%loc_rcv_idx(p1:p2),buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
else
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
isz = p2-p1+1
call psi_gth(isz,m,xchg%loc_snd_idx(p1:p2),&
& y,sndbuf(p1:p2))
buffer(rp1:rp2)[img] = sndbuf(p1:p2)
end do
!
! Doing event post later should provide more opportunities for
! overlap
!
do ip= 1, nxch
img = xchg%prcs_xch(ip) + 1
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,' boundaries: ',p1,p2
call psi_sct(isz,m,xchg%loc_rcv_idx(p1:p2),&
& buffer(p1:p2),beta, y)
event post(clear[img])
end do
last_clear_count = nxch
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ictxt,err_act)
return
end subroutine psi_iswap_xchg_m
subroutine psi_iswapidxm(iictxt,iicomm,flag,n,beta,y,idx, &
& totxch,totsnd,totrcv,work,info)
@ -580,6 +755,7 @@ subroutine psi_iswapdatav(flag,beta,y,desc_a,work,info,data)
use psb_error_mod
use psb_desc_mod
use psb_penv_mod
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -598,6 +774,7 @@ subroutine psi_iswapdatav(flag,beta,y,desc_a,work,info,data)
! locals
integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, data_, err_act
integer(psb_ipk_), pointer :: d_idx(:)
class(psb_xch_idx_type), pointer :: d_xchg
character(len=20) :: name
info=psb_success_
@ -626,13 +803,20 @@ subroutine psi_iswapdatav(flag,beta,y,desc_a,work,info,data)
data_ = psb_comm_halo_
end if
call desc_a%get_list(data_,d_idx,totxch,idxr,idxs,info)
if (info == 0) call desc_a%get_list(data_,d_xchg,info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
call psi_swapdata(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info)
if (.not.(if_caf2)) then
call psi_swapdata(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info)
else
call psi_swapdata(ictxt,icomm,flag,beta,y,d_xchg,info)
endif
if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act)
@ -644,6 +828,173 @@ subroutine psi_iswapdatav(flag,beta,y,desc_a,work,info,data)
end subroutine psi_iswapdatav
subroutine psi_iswap_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info)
use psi_mod, psb_protect_name => psi_iswap_xchg_v
use psb_error_mod
use psb_realloc_mod
use psb_desc_mod
use psb_penv_mod
use psb_i_base_vect_mod
use iso_fortran_env
implicit none
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: y(:)
integer(psb_ipk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
! locals
integer(psb_mpik_) :: ictxt, icomm, np, me,&
& proc_to_comm, p2ptag, iret
integer(psb_ipk_) :: nesd, nerv,&
& err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,&
& snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself
integer :: count
integer(psb_ipk_), allocatable, save :: buffer(:)[:], sndbuf(:)
type(event_type), allocatable, save :: ufg(:)[:]
type(event_type), allocatable, save :: clear[:]
integer, save :: last_clear_count = 0
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
info=psb_success_
name='psi_swap_datav'
call psb_erractionsave(err_act)
ictxt = iictxt
icomm = iicomm
call psb_info(ictxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
n=1
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
if (.not.(do_send.and.do_recv)) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Unimplemented case in xchg_vect')
goto 9999
end if
if (.not.allocated(ufg)) then
!write(*,*) 'Allocating events',np
allocate(ufg(np)[*],stat=info)
if (info == 0) allocate(clear[*],stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray events allocation')
goto 9999
end if
else
if (last_clear_count>0) &
& event wait(clear,until_count=last_clear_count)
end if
if (psb_size(buffer) < xchg%max_buffer_size) then
!
! By construction, max_buffer_size was computed with a collective.
!
if (allocated(buffer)) deallocate(buffer)
if (allocated(sndbuf)) deallocate(sndbuf)
!write(*,*) 'Allocating buffer',xchg%max_buffer_size
allocate(buffer(xchg%max_buffer_size)[*],stat=info)
if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray buffer allocation')
goto 9999
end if
end if
if (.false.) then
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,xchg%loc_snd_idx(p1:p2),y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,xchg%loc_rcv_idx(p1:p2),buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
else
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
isz = p2-p1+1
call psi_gth(isz,xchg%loc_snd_idx(p1:p2),&
& y,sndbuf(p1:p2))
buffer(rp1:rp2)[img] = sndbuf(p1:p2)
end do
!
! Doing event post later should provide more opportunities for
! overlap
!
do ip= 1, nxch
img = xchg%prcs_xch(ip) + 1
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,' boundaries: ',p1,p2
call psi_sct(isz,xchg%loc_rcv_idx(p1:p2),&
& buffer(p1:p2),beta, y)
event post(clear[img])
end do
last_clear_count = nxch
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ictxt,err_act)
return
end subroutine psi_iswap_xchg_v
!
!
@ -1026,6 +1377,7 @@ subroutine psi_iswapdata_vect(flag,beta,y,desc_a,work,info,data)
use psb_error_mod
use psb_desc_mod
use psb_penv_mod
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -1045,6 +1397,7 @@ subroutine psi_iswapdata_vect(flag,beta,y,desc_a,work,info,data)
! locals
integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, data_, err_act
class(psb_i_base_vect_type), pointer :: d_vidx
class(psb_xch_idx_type), pointer :: d_xchg
character(len=20) :: name
info=psb_success_
@ -1074,12 +1427,18 @@ subroutine psi_iswapdata_vect(flag,beta,y,desc_a,work,info,data)
end if
call desc_a%get_list(data_,d_vidx,totxch,idxr,idxs,info)
if (info == 0) call desc_a%get_list(data_,d_xchg,info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
call psi_swapdata(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info)
if (.not.(if_caf2)) then
call psi_swapdata(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info)
else
call psi_swapdata(ictxt,icomm,flag,beta,y,d_xchg,info)
endif
if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act)
@ -1090,6 +1449,187 @@ subroutine psi_iswapdata_vect(flag,beta,y,desc_a,work,info,data)
return
end subroutine psi_iswapdata_vect
subroutine psi_iswap_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info)
use psi_mod, psb_protect_name => psi_iswap_xchg_vect
use psb_error_mod
use psb_realloc_mod
use psb_desc_mod
use psb_penv_mod
use psb_i_base_vect_mod
use iso_fortran_env
implicit none
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
class(psb_i_base_vect_type) :: y
integer(psb_ipk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
! locals
integer(psb_mpik_) :: ictxt, icomm, np, me,&
& proc_to_comm, p2ptag, iret
integer(psb_ipk_) :: nesd, nerv,&
& err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,&
& snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself
integer :: count
integer(psb_ipk_), allocatable, save :: buffer(:)[:], sndbuf(:)
type(event_type), allocatable, save :: ufg(:)[:]
type(event_type), allocatable, save :: clear[:]
integer, save :: last_clear_count = 0
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
info=psb_success_
name='psi_xchg_vect'
call psb_erractionsave(err_act)
ictxt = iictxt
icomm = iicomm
call psb_info(ictxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
if (np /= num_images()) then
write(*,*) 'Something is wrong MPI vs CAF ', np, num_images()
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Num_images /= np')
goto 9999
end if
n=1
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
if (.not.(do_send.and.do_recv)) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Unimplemented case in xchg_vect')
goto 9999
end if
if (.not.allocated(ufg)) then
!write(*,*) 'Allocating events',np
allocate(ufg(np)[*],stat=info)
if (info == 0) allocate(clear[*],stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray events allocation')
goto 9999
end if
else
if (last_clear_count>0) &
& event wait(clear,until_count=last_clear_count)
end if
me = this_image()
if (psb_size(buffer) < xchg%max_buffer_size) then
!
! By construction, max_buffer_size was computed with a collective.
!
if (allocated(buffer)) deallocate(buffer)
!write(*,*) 'Allocating buffer',xchg%max_buffer_size, me
allocate(buffer(xchg%max_buffer_size)[*],stat=info)
if (allocated(sndbuf)) deallocate(sndbuf)
if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray buffer allocation')
goto 9999
end if
end if
if (.false.) then
!sync all
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call y%gth(isz,xchg%loc_snd_idx(p1:p2),buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call y%sct(isz,xchg%loc_rcv_idx(p1:p2),buffer(rp1:rp2)[img],beta)
event post(clear[img])
end do
last_clear_count = nxch
else
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',rp1,rp2
if (.false.) then
call y%gth(isz,xchg%loc_snd_idx(p1:p2),buffer(rp1:rp2)[img])
else
call y%gth(isz,xchg%loc_snd_idx(p1:p2),sndbuf(p1:p2))
buffer(rp1:rp2)[img] = sndbuf(p1:p2)
end if
end do
!
! Doing event post later should provide more opportunities for
! overlap
!
do ip= 1, nxch
img = xchg%prcs_xch(ip) + 1
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,' boundaries: ',p1,p2
call y%sct(isz,xchg%loc_rcv_idx(p1:p2),buffer(p1:p2),beta)
event post(clear[img])
end do
last_clear_count = nxch
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ictxt,err_act)
return
end subroutine psi_iswap_xchg_vect
!
!

@ -93,6 +93,7 @@ subroutine psi_iswaptranm(flag,n,beta,y,desc_a,work,info,data)
use psb_error_mod
use psb_desc_mod
use psb_penv_mod
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -111,6 +112,7 @@ subroutine psi_iswaptranm(flag,n,beta,y,desc_a,work,info,data)
! locals
integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, err_act, totxch, data_
integer(psb_ipk_), pointer :: d_idx(:)
class(psb_xch_idx_type), pointer :: d_xchg
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
@ -141,12 +143,17 @@ subroutine psi_iswaptranm(flag,n,beta,y,desc_a,work,info,data)
end if
call desc_a%get_list(data_,d_idx,totxch,idxr,idxs,info)
if (info == 0) call desc_a%get_list(data_,d_xchg,info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
if (.not.(if_caf2)) then
call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info)
else
call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_xchg,info)
endif
call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info)
if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act)
@ -157,6 +164,174 @@ subroutine psi_iswaptranm(flag,n,beta,y,desc_a,work,info,data)
return
end subroutine psi_iswaptranm
subroutine psi_iswaptran_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info)
use psi_mod, psb_protect_name => psi_iswaptran_xchg_m
use psb_error_mod
use psb_realloc_mod
use psb_desc_mod
use psb_penv_mod
use psb_i_base_vect_mod
use iso_fortran_env
implicit none
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag, m
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: y(:,:)
integer(psb_ipk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
! locals
integer(psb_mpik_) :: ictxt, icomm, np, me,&
& proc_to_comm, p2ptag, iret
integer(psb_ipk_) :: nesd, nerv,&
& err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,&
& snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself
integer :: count
integer(psb_ipk_), allocatable, save :: buffer(:)[:], sndbuf(:)
type(event_type), allocatable, save :: ufg(:)[:]
type(event_type), allocatable, save :: clear[:]
integer, save :: last_clear_count = 0
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
info=psb_success_
name='psi_swaptran_datam'
call psb_erractionsave(err_act)
ictxt = iictxt
icomm = iicomm
call psb_info(ictxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
n=1
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
if (.not.(do_send.and.do_recv)) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Unimplemented case in tran_xchg_vm')
goto 9999
end if
if (.not.allocated(ufg)) then
!write(*,*) 'Allocating events',np
allocate(ufg(np)[*],stat=info)
if (info == 0) allocate(clear[*],stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray events allocation')
goto 9999
end if
else
if (last_clear_count>0) &
& event wait(clear,until_count=last_clear_count)
end if
if (psb_size(buffer) < xchg%max_buffer_size) then
!
! By construction, max_buffer_size was computed with a collective.
!
if (allocated(buffer)) deallocate(buffer)
if (allocated(sndbuf)) deallocate(sndbuf)
!write(*,*) 'Allocating buffer',xchg%max_buffer_size
allocate(buffer(xchg%max_buffer_size)[*],stat=info)
if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray buffer allocation')
goto 9999
end if
end if
if (.false.) then
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,m,xchg%loc_rcv_idx(p1:p2),y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,m,xchg%loc_snd_idx(p1:p2),buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
else
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
isz = p2-p1+1
call psi_gth(isz,m,xchg%loc_rcv_idx(p1:p2),&
& y,sndbuf(p1:p2))
buffer(rp1:rp2)[img] = sndbuf(p1:p2)
end do
!
! Doing event post later should provide more opportunities for
! overlap
!
do ip= 1, nxch
img = xchg%prcs_xch(ip) + 1
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,' boundaries: ',p1,p2
call psi_sct(isz,m,xchg%loc_snd_idx(p1:p2),&
& buffer(p1:p2),beta, y)
event post(clear[img])
end do
last_clear_count = nxch
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ictxt,err_act)
return
end subroutine psi_iswaptran_xchg_m
subroutine psi_itranidxm(iictxt,iicomm,flag,n,beta,y,idx,totxch,totsnd,totrcv,work,info)
use psi_mod, psb_protect_name => psi_itranidxm
@ -583,6 +758,7 @@ subroutine psi_iswaptranv(flag,beta,y,desc_a,work,info,data)
use psb_error_mod
use psb_desc_mod
use psb_penv_mod
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -601,6 +777,7 @@ subroutine psi_iswaptranv(flag,beta,y,desc_a,work,info,data)
! locals
integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, err_act, data_
integer(psb_ipk_), pointer :: d_idx(:)
class(psb_xch_idx_type), pointer :: d_xchg
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
@ -630,12 +807,18 @@ subroutine psi_iswaptranv(flag,beta,y,desc_a,work,info,data)
end if
call desc_a%get_list(data_,d_idx,totxch,idxr,idxs,info)
if (info == 0) call desc_a%get_list(data_,d_xchg,info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
call psi_swaptran(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info)
if (.not.(if_caf2)) then
call psi_swaptran(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info)
else
call psi_swaptran(ictxt,icomm,flag,beta,y,d_xchg,info)
endif
if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act)
@ -646,6 +829,172 @@ subroutine psi_iswaptranv(flag,beta,y,desc_a,work,info,data)
return
end subroutine psi_iswaptranv
subroutine psi_iswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info)
use psi_mod, psb_protect_name => psi_iswaptran_xchg_v
use psb_error_mod
use psb_realloc_mod
use psb_desc_mod
use psb_penv_mod
use psb_i_base_vect_mod
use iso_fortran_env
implicit none
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: y(:)
integer(psb_ipk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
! locals
integer(psb_mpik_) :: ictxt, icomm, np, me,&
& proc_to_comm, p2ptag, iret
integer(psb_ipk_) :: nesd, nerv,&
& err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,&
& snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself
integer :: count
integer(psb_ipk_), allocatable, save :: buffer(:)[:], sndbuf(:)
type(event_type), allocatable, save :: ufg(:)[:]
type(event_type), allocatable, save :: clear[:]
integer, save :: last_clear_count = 0
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
info=psb_success_
name='psi_swap_datav'
call psb_erractionsave(err_act)
ictxt = iictxt
icomm = iicomm
call psb_info(ictxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
n=1
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
if (.not.(do_send.and.do_recv)) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Unimplemented case in xchg_vect')
goto 9999
end if
if (.not.allocated(ufg)) then
!write(*,*) 'Allocating events',np
allocate(ufg(np)[*],stat=info)
if (info == 0) allocate(clear[*],stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray events allocation')
goto 9999
end if
else
if (last_clear_count>0) &
& event wait(clear,until_count=last_clear_count)
end if
if (psb_size(buffer) < xchg%max_buffer_size) then
!
! By construction, max_buffer_size was computed with a collective.
!
if (allocated(buffer)) deallocate(buffer)
if (allocated(sndbuf)) deallocate(sndbuf)
!write(*,*) 'Allocating buffer',xchg%max_buffer_size
allocate(buffer(xchg%max_buffer_size)[*],stat=info)
if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray buffer allocation')
goto 9999
end if
end if
if (.false.) then
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call psi_gth(isz,xchg%loc_rcv_idx(p1:p2),y,buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call psi_sct(isz,xchg%loc_snd_idx(p1:p2),buffer(rp1:rp2)[img],beta,y)
event post(clear[img])
end do
last_clear_count = nxch
else
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
isz = p2-p1+1
call psi_gth(isz,xchg%loc_rcv_idx(p1:p2),&
& y,sndbuf(p1:p2))
buffer(rp1:rp2)[img] = sndbuf(p1:p2)
end do
!
! Doing event post later should provide more opportunities for
! overlap
!
do ip= 1, nxch
img = xchg%prcs_xch(ip) + 1
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,' boundaries: ',p1,p2
call psi_sct(isz,xchg%loc_snd_idx(p1:p2),&
& buffer(p1:p2),beta, y)
event post(clear[img])
end do
last_clear_count = nxch
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ictxt,err_act)
return
end subroutine psi_iswaptran_xchg_v
!
!
@ -658,7 +1007,6 @@ end subroutine psi_iswaptranv
!
!
subroutine psi_itranidxv(iictxt,iicomm,flag,beta,y,idx,totxch,totsnd,totrcv,work,info)
use psi_mod, psb_protect_name => psi_itranidxv
@ -1047,6 +1395,7 @@ subroutine psi_iswaptran_vect(flag,beta,y,desc_a,work,info,data)
use psb_error_mod
use psb_desc_mod
use psb_penv_mod
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -1066,6 +1415,7 @@ subroutine psi_iswaptran_vect(flag,beta,y,desc_a,work,info,data)
! locals
integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, err_act, data_
class(psb_i_base_vect_type), pointer :: d_vidx
class(psb_xch_idx_type), pointer :: d_xchg
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
@ -1095,12 +1445,18 @@ subroutine psi_iswaptran_vect(flag,beta,y,desc_a,work,info,data)
end if
call desc_a%get_list(data_,d_vidx,totxch,idxr,idxs,info)
if (info == 0) call desc_a%get_list(data_,d_xchg,info)
if (info /= psb_success_) then
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
call psi_swaptran(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info)
if (.not.(if_caf2)) then
call psi_swaptran(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info)
else
call psi_swaptran(ictxt,icomm,flag,beta,y,d_xchg,info)
endif
if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act)
@ -1112,7 +1468,187 @@ subroutine psi_iswaptran_vect(flag,beta,y,desc_a,work,info,data)
end subroutine psi_iswaptran_vect
subroutine psi_iswaptran_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info)
use psi_mod, psb_protect_name => psi_iswaptran_xchg_vect
use psb_error_mod
use psb_realloc_mod
use psb_desc_mod
use psb_penv_mod
use psb_i_base_vect_mod
use iso_fortran_env
implicit none
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
class(psb_i_base_vect_type) :: y
integer(psb_ipk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
! locals
integer(psb_mpik_) :: ictxt, icomm, np, me,&
& proc_to_comm, p2ptag, iret
integer(psb_ipk_) :: nesd, nerv,&
& err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,&
& snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself
integer :: count
integer(psb_ipk_), allocatable, save :: buffer(:)[:], sndbuf(:)
type(event_type), allocatable, save :: ufg(:)[:]
type(event_type), allocatable, save :: clear[:]
integer, save :: last_clear_count = 0
logical :: swap_mpi, swap_sync, swap_send, swap_recv,&
& albf,do_send,do_recv
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
info=psb_success_
name='psi_tran_xchg_vect'
call psb_erractionsave(err_act)
ictxt = iictxt
icomm = iicomm
call psb_info(ictxt,me,np)
if (np == -1) then
info=psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
if (np /= num_images()) then
write(*,*) 'Something is wrong MPI vs CAF ', np, num_images()
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Num_images /= np')
goto 9999
end if
n=1
swap_mpi = iand(flag,psb_swap_mpi_) /= 0
swap_sync = iand(flag,psb_swap_sync_) /= 0
swap_send = iand(flag,psb_swap_send_) /= 0
swap_recv = iand(flag,psb_swap_recv_) /= 0
do_send = swap_mpi .or. swap_sync .or. swap_send
do_recv = swap_mpi .or. swap_sync .or. swap_recv
if (.not.(do_send.and.do_recv)) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Unimplemented case in xchg_vect')
goto 9999
end if
if (.not.allocated(ufg)) then
!write(*,*) 'Allocating events',np
allocate(ufg(np)[*],stat=info)
if (info == 0) allocate(clear[*],stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray events allocation')
goto 9999
end if
else
if (last_clear_count>0) &
& event wait(clear,until_count=last_clear_count)
end if
if (psb_size(buffer) < xchg%max_buffer_size) then
!
! By construction, max_buffer_size was computed with a collective.
!
if (allocated(buffer)) deallocate(buffer)
!write(*,*) 'Allocating buffer',xchg%max_buffer_size
allocate(buffer(xchg%max_buffer_size)[*],stat=info)
if (allocated(sndbuf)) deallocate(sndbuf)
if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info)
if (info /= 0) then
info = psb_err_internal_error_
call psb_errpush(info,name,a_err='Coarray buffer allocation')
goto 9999
end if
end if
if (.false.) then
!sync all
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2
call y%gth(isz,xchg%loc_rcv_idx(p1:p2),buffer(p1:p2))
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2
call y%sct(isz,xchg%loc_snd_idx(p1:p2),buffer(rp1:rp2)[img],beta)
event post(clear[img])
end do
last_clear_count = nxch
else
nxch = size(xchg%prcs_xch)
myself = this_image()
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_rcv_bnd(ip)
p2 = xchg%loc_rcv_bnd(ip+1)-1
rp1 = xchg%rmt_snd_bnd(ip,1)
rp2 = xchg%rmt_snd_bnd(ip,2)
isz = p2-p1+1
!write(0,*) myself,'Posting for ',img,' boundaries: ',rp1,rp2
if (.false.) then
call y%gth(isz,xchg%loc_rcv_idx(p1:p2),buffer(rp1:rp2)[img])
else
call y%gth(isz,xchg%loc_rcv_idx(p1:p2),sndbuf(p1:p2))
buffer(rp1:rp2)[img] = sndbuf(p1:p2)
end if
end do
!
! Doing event post later should provide more opportunities for
! overlap
!
do ip= 1, nxch
img = xchg%prcs_xch(ip) + 1
event post(ufg(myself)[img])
end do
do ip = 1, nxch
img = xchg%prcs_xch(ip) + 1
event wait(ufg(img))
img = xchg%prcs_xch(ip) + 1
p1 = xchg%loc_snd_bnd(ip)
p2 = xchg%loc_snd_bnd(ip+1)-1
isz = p2-p1+1
rp1 = xchg%rmt_rcv_bnd(ip,1)
rp2 = xchg%rmt_rcv_bnd(ip,2)
!write(0,*) myself,'Getting from ',img,' boundaries: ',p1,p2
call y%sct(isz,xchg%loc_snd_idx(p1:p2),buffer(p1:p2),beta)
event post(clear[img])
end do
last_clear_count = nxch
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ictxt,err_act)
return
end subroutine psi_iswaptran_xchg_vect
!
!
! Subroutine: psi_itran_vidx_vect

@ -89,6 +89,7 @@ subroutine psi_sswapdatam(flag,n,beta,y,desc_a,work,info,data)
use psb_error_mod
use psb_desc_mod
use psb_penv_mod
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -142,7 +143,7 @@ subroutine psi_sswapdatam(flag,n,beta,y,desc_a,work,info,data)
goto 9999
end if
if (.false.) then
if (.not.(if_caf2)) then
call psi_swapdata(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info)
else
call psi_swapdata(ictxt,icomm,flag,n,beta,y,d_xchg,info)
@ -188,7 +189,6 @@ subroutine psi_sswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info)
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
print*,' call psi_sswap_xchg_m'
info=psb_success_
name='psi_swap_datav'
call psb_erractionsave(err_act)
@ -755,6 +755,7 @@ subroutine psi_sswapdatav(flag,beta,y,desc_a,work,info,data)
use psb_error_mod
use psb_desc_mod
use psb_penv_mod
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -808,7 +809,7 @@ subroutine psi_sswapdatav(flag,beta,y,desc_a,work,info,data)
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
if (.false.) then
if (.not.(if_caf2)) then
call psi_swapdata(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info)
else
call psi_swapdata(ictxt,icomm,flag,beta,y,d_xchg,info)
@ -1374,6 +1375,7 @@ subroutine psi_sswapdata_vect(flag,beta,y,desc_a,work,info,data)
use psb_error_mod
use psb_desc_mod
use psb_penv_mod
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -1428,7 +1430,7 @@ subroutine psi_sswapdata_vect(flag,beta,y,desc_a,work,info,data)
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
if (.false.) then
if (.not.(if_caf2)) then
call psi_swapdata(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info)
else
call psi_swapdata(ictxt,icomm,flag,beta,y,d_xchg,info)

@ -93,6 +93,7 @@ subroutine psi_sswaptranm(flag,n,beta,y,desc_a,work,info,data)
use psb_error_mod
use psb_desc_mod
use psb_penv_mod
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -147,7 +148,7 @@ subroutine psi_sswaptranm(flag,n,beta,y,desc_a,work,info,data)
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
if (.false.) then
if (.not.(if_caf2)) then
call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info)
else
call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_xchg,info)
@ -756,6 +757,7 @@ subroutine psi_sswaptranv(flag,beta,y,desc_a,work,info,data)
use psb_error_mod
use psb_desc_mod
use psb_penv_mod
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -809,7 +811,7 @@ subroutine psi_sswaptranv(flag,beta,y,desc_a,work,info,data)
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
if (.false.) then
if (.not.(if_caf2)) then
call psi_swaptran(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info)
else
call psi_swaptran(ictxt,icomm,flag,beta,y,d_xchg,info)
@ -855,7 +857,6 @@ subroutine psi_sswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info)
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
print*,' call psi_dswaptran_xchg_v'
info=psb_success_
name='psi_swap_datav'
call psb_erractionsave(err_act)
@ -1393,6 +1394,7 @@ subroutine psi_sswaptran_vect(flag,beta,y,desc_a,work,info,data)
use psb_error_mod
use psb_desc_mod
use psb_penv_mod
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -1447,7 +1449,7 @@ subroutine psi_sswaptran_vect(flag,beta,y,desc_a,work,info,data)
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
if (.false.) then
if (.not.(if_caf2)) then
call psi_swaptran(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info)
else
call psi_swaptran(ictxt,icomm,flag,beta,y,d_xchg,info)

@ -89,6 +89,7 @@ subroutine psi_zswapdatam(flag,n,beta,y,desc_a,work,info,data)
use psb_error_mod
use psb_desc_mod
use psb_penv_mod
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -142,7 +143,7 @@ subroutine psi_zswapdatam(flag,n,beta,y,desc_a,work,info,data)
goto 9999
end if
if (.false.) then
if (.not.(if_caf2)) then
call psi_swapdata(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info)
else
call psi_swapdata(ictxt,icomm,flag,n,beta,y,d_xchg,info)
@ -188,7 +189,6 @@ subroutine psi_zswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info)
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
print*,' call psi_zswap_xchg_m'
info=psb_success_
name='psi_swap_datav'
call psb_erractionsave(err_act)
@ -755,6 +755,7 @@ subroutine psi_zswapdatav(flag,beta,y,desc_a,work,info,data)
use psb_error_mod
use psb_desc_mod
use psb_penv_mod
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -808,7 +809,7 @@ subroutine psi_zswapdatav(flag,beta,y,desc_a,work,info,data)
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
if (.false.) then
if (.not.(if_caf2)) then
call psi_swapdata(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info)
else
call psi_swapdata(ictxt,icomm,flag,beta,y,d_xchg,info)
@ -1374,6 +1375,7 @@ subroutine psi_zswapdata_vect(flag,beta,y,desc_a,work,info,data)
use psb_error_mod
use psb_desc_mod
use psb_penv_mod
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -1428,7 +1430,7 @@ subroutine psi_zswapdata_vect(flag,beta,y,desc_a,work,info,data)
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
if (.false.) then
if (.not.(if_caf2)) then
call psi_swapdata(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info)
else
call psi_swapdata(ictxt,icomm,flag,beta,y,d_xchg,info)

@ -93,6 +93,7 @@ subroutine psi_zswaptranm(flag,n,beta,y,desc_a,work,info,data)
use psb_error_mod
use psb_desc_mod
use psb_penv_mod
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -147,7 +148,7 @@ subroutine psi_zswaptranm(flag,n,beta,y,desc_a,work,info,data)
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
if (.false.) then
if (.not.(if_caf2)) then
call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info)
else
call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_xchg,info)
@ -756,6 +757,7 @@ subroutine psi_zswaptranv(flag,beta,y,desc_a,work,info,data)
use psb_error_mod
use psb_desc_mod
use psb_penv_mod
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -809,7 +811,7 @@ subroutine psi_zswaptranv(flag,beta,y,desc_a,work,info,data)
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
if (.false.) then
if (.not.(if_caf2)) then
call psi_swaptran(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info)
else
call psi_swaptran(ictxt,icomm,flag,beta,y,d_xchg,info)
@ -855,7 +857,6 @@ subroutine psi_zswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info)
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name
print*,' call psi_dswaptran_xchg_v'
info=psb_success_
name='psi_swap_datav'
call psb_erractionsave(err_act)
@ -1393,6 +1394,7 @@ subroutine psi_zswaptran_vect(flag,beta,y,desc_a,work,info,data)
use psb_error_mod
use psb_desc_mod
use psb_penv_mod
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -1447,7 +1449,7 @@ subroutine psi_zswaptran_vect(flag,beta,y,desc_a,work,info,data)
call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list')
goto 9999
end if
if (.false.) then
if (.not.(if_caf2)) then
call psi_swaptran(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info)
else
call psi_swaptran(ictxt,icomm,flag,beta,y,d_xchg,info)

@ -37,7 +37,7 @@ lib: mpfobjs $(FOBJS) $(FOBJS2) $(COBJS) $(MPFOBJS2) $(MPFOBJS)
$(RANLIB) $(LIBDIR)/$(LIBNAME)
$(FOBJS) $(FBOJS2): $(MODDIR)/psi_mod.o
mpfobjs:
mpfobjs:
(make $(MPFOBJS) F90="$(MPF90)" FC="$(MPF90)" FCOPT="$(F90COPT)")
(make $(FOBJS2) F90="$(MPF77)" FC="$(MPF77)" FCOPT="$(FCOPT)")
clean:

@ -1,4 +1,4 @@
!!$
!ii!$
!!$ Parallel Sparse BLAS version 3.4
!!$ (C) Copyright 2006, 2010, 2015
!!$ Salvatore Filippone University of Rome Tor Vergata
@ -51,6 +51,7 @@ subroutine psb_indx_map_fnd_owner(idx,iprc,idxmap,info)
use psb_error_mod
use psb_penv_mod
use psb_realloc_mod
use psb_caf_mod
use psb_indx_map_mod, psb_protect_name => psb_indx_map_fnd_owner
#ifdef MPI_MOD
use mpi
@ -168,10 +169,13 @@ subroutine psb_indx_map_fnd_owner(idx,iprc,idxmap,info)
if (gettime) then
t3 = psb_wtime()
end if
call mpi_allgatherv(idx,hsz(me+1),psb_mpi_ipk_integer,&
& hproc,hsz,hidx,psb_mpi_ipk_integer,&
& icomm,minfo)
if (if_caf) then
call caf_allgatherv(idx, hsz(me+1), hproc, hsz, hidx, info)
else
call mpi_allgatherv(idx,hsz(me+1),psb_mpi_ipk_integer,&
& hproc,hsz,hidx,psb_mpi_ipk_integer,&
& icomm,minfo)
endif
if (gettime) then
tamx = psb_wtime() - t3
end if
@ -211,11 +215,15 @@ subroutine psb_indx_map_fnd_owner(idx,iprc,idxmap,info)
if (gettime) then
t3 = psb_wtime()
end if
! Collect all the answers with alltoallv (need sizes)
call mpi_alltoall(sdsz,1,psb_mpi_def_integer,&
& rvsz,1,psb_mpi_def_integer,icomm,minfo)
if (if_caf) then
call caf_alltoall(sdsz,rvsz, 1, minfo)
else
call mpi_alltoall(sdsz,1,psb_mpi_def_integer,&
& rvsz,1,psb_mpi_def_integer,icomm,minfo)
endif
isz = sum(rvsz)
allocate(answers(isz,2),idxsrch(nv,2),stat=info)
@ -228,9 +236,15 @@ subroutine psb_indx_map_fnd_owner(idx,iprc,idxmap,info)
rvidx(ip) = j
j = j + rvsz(ip)
end do
call mpi_alltoallv(hproc,sdsz,sdidx,psb_mpi_ipk_integer,&
& answers(:,1),rvsz,rvidx,psb_mpi_ipk_integer,&
& icomm,minfo)
print*,'---------hproc', hproc,'sdsz',sdsz,'sdidx',sdidx,'answers',answers(:,1), 'rvsz', rvsz,'rvidx', rvidx, this_image()
if (if_caf) then
call caf_alltoallv(hproc, sdsz,sdidx, answers(:,1),rvsz,rvidx, minfo)
else
call mpi_alltoallv(hproc,sdsz,sdidx,psb_mpi_ipk_integer,&
& answers(:,1),rvsz,rvidx,psb_mpi_ipk_integer,&
& icomm,minfo)
endif
if (gettime) then
tamx = psb_wtime() - t3 + tamx
end if

@ -63,20 +63,23 @@ end subroutine psi_renum_index
subroutine psi_cnv_v2xch(ictxt, vidx_in, xch_idx,info)
use psi_mod, psi_protect_name => psi_cnv_v2xch
use psb_realloc_mod
use psb_caf_mod
implicit none
integer(psb_ipk_), intent(in) :: ictxt, vidx_in(:)
type(psb_xch_idx_type), intent(inout) :: xch_idx
integer(psb_ipk_), intent(out) :: info
! ....local scalars....
integer(psb_ipk_) :: np, me
integer(psb_ipk_) :: np, me, img
integer(psb_ipk_) :: err_act
integer(psb_ipk_) :: nxch, nsnd, nrcv, nesd,nerv, ip, j, k, ixch
! ...parameters
integer(psb_ipk_) :: debug_level, debug_unit
logical, parameter :: debug=.false.
character(len=20) :: name
integer(psb_ipk_), allocatable :: buf_rmt_rcv_bnd(:)[:], buf_rmt_snd_bnd(:)[:]
type(event_type), allocatable, save :: snd_done(:)[:]
type(event_type), save :: rcv_done[*]
name='psi_cnv_v2xch'
call psb_get_erraction(err_act)
debug_level = psb_get_debug_level()
@ -85,6 +88,8 @@ subroutine psi_cnv_v2xch(ictxt, vidx_in, xch_idx,info)
info = psb_success_
call psb_info(ictxt,me,np)
me = this_image()
np = num_images()
if (np == -1) then
info = psb_err_context_error_
call psb_errpush(info,name)
@ -114,6 +119,12 @@ subroutine psi_cnv_v2xch(ictxt, vidx_in, xch_idx,info)
ixch = 1
xch_idx%loc_snd_bnd(1) = 1
xch_idx%loc_rcv_bnd(1) = 1
if (if_caf) then
if (allocated(buf_rmt_rcv_bnd)) deallocate(buf_rmt_rcv_bnd)
if (allocated(buf_rmt_snd_bnd)) deallocate(buf_rmt_snd_bnd)
if (allocated(snd_done)) deallocate(snd_done)
allocate(buf_rmt_rcv_bnd(np*2)[*], buf_rmt_snd_bnd(np*2)[*], snd_done(np)[*])
endif
do
if (ip > size(vidx_in)) then
write(psb_err_unit,*) trim(name),': Warning: out of size of input vector '
@ -131,17 +142,32 @@ subroutine psi_cnv_v2xch(ictxt, vidx_in, xch_idx,info)
& vidx_in(ip+nerv+psb_n_elem_send_+1:ip+nerv+psb_n_elem_send_+nesd)
xch_idx%loc_rcv_bnd(ixch+1) = xch_idx%loc_rcv_bnd(ixch) + nerv
xch_idx%loc_snd_bnd(ixch+1) = xch_idx%loc_snd_bnd(ixch) + nesd
call psb_snd(ictxt,xch_idx%loc_rcv_bnd(ixch:ixch+1),xch_idx%prcs_xch(ixch))
call psb_snd(ictxt,xch_idx%loc_snd_bnd(ixch:ixch+1),xch_idx%prcs_xch(ixch))
call psb_rcv(ictxt,xch_idx%rmt_rcv_bnd(ixch,1:2),xch_idx%prcs_xch(ixch))
call psb_rcv(ictxt,xch_idx%rmt_snd_bnd(ixch,1:2),xch_idx%prcs_xch(ixch))
img = xch_idx%prcs_xch(ixch) + 1
!Here I am assuming that all the data exchange between two images takes place in one exchange
if (if_caf) then
buf_rmt_rcv_bnd(me*2 - 1 : me*2)[img]= xch_idx%loc_rcv_bnd(ixch:ixch+1)
buf_rmt_snd_bnd(me*2 - 1 : me*2)[img]= xch_idx%loc_snd_bnd(ixch:ixch+1)
event post(snd_done(me)[img])
event wait(snd_done(img))
xch_idx%rmt_rcv_bnd(ixch,1:2)=buf_rmt_rcv_bnd(img*2 - 1 : img*2)
xch_idx%rmt_snd_bnd(ixch,1:2)=buf_rmt_snd_bnd(img*2 - 1 : img*2)
else
call psb_snd(ictxt,xch_idx%loc_rcv_bnd(ixch:ixch+1),xch_idx%prcs_xch(ixch))
call psb_snd(ictxt,xch_idx%loc_snd_bnd(ixch:ixch+1),xch_idx%prcs_xch(ixch))
call psb_rcv(ictxt,xch_idx%rmt_rcv_bnd(ixch,1:2),xch_idx%prcs_xch(ixch))
call psb_rcv(ictxt,xch_idx%rmt_snd_bnd(ixch,1:2),xch_idx%prcs_xch(ixch))
endif
ip = ip+nerv+nesd+3
ixch = ixch + 1
end do
xch_idx%rmt_rcv_bnd(:,2) = xch_idx%rmt_rcv_bnd(:,2) - 1
xch_idx%rmt_snd_bnd(:,2) = xch_idx%rmt_snd_bnd(:,2) - 1
if (if_caf) then
if (allocated(buf_rmt_rcv_bnd)) deallocate(buf_rmt_rcv_bnd)
if (allocated(buf_rmt_snd_bnd)) deallocate(buf_rmt_snd_bnd)
if (allocated(snd_done)) deallocate(snd_done)
sync all
endif
call psb_erractionrestore(err_act)
return

@ -98,8 +98,11 @@
! is rebuilt during the CDASB process (in the psi_ldsc_pre_halo subroutine).
!
!
subroutine psi_desc_index(desc,index_in,dep_list,&
& length_dl,nsnd,nrcv,desc_index,isglob_in,info)
use psb_caf_mod
use psb_desc_mod
use psb_realloc_mod
use psb_error_mod
@ -165,6 +168,7 @@ subroutine psi_desc_index(desc,index_in,dep_list,&
! to be received/sent (in the final psblas descriptor).
! be careful of the inversion
!
allocate(sdsz(np),rvsz(np),bsdindx(np),brvindx(np),stat=info)
if(info /= psb_success_) then
info=psb_err_alloc_dealloc_
@ -186,7 +190,14 @@ subroutine psi_desc_index(desc,index_in,dep_list,&
i = i + nerv + 1
end do
ihinsz=i
call mpi_alltoall(sdsz,1,psb_mpi_def_integer,rvsz,1,psb_mpi_def_integer,icomm,minfo)
!call mpi_alltoall(sdsz,1,psb_mpi_def_integer,rvsz,1,psb_mpi_def_integer,icomm,minfo)
if (if_caf) then
call caf_alltoall(sdsz, rvsz,1, minfo)
else
call mpi_alltoall(sdsz,1,psb_mpi_def_integer,rvsz,1,psb_mpi_def_integer,icomm,minfo)
endif
if (minfo /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_,name,a_err='mpi_alltoall')
goto 9999
@ -291,9 +302,13 @@ subroutine psi_desc_index(desc,index_in,dep_list,&
brvindx(proc+1) = idxr
idxr = idxr + rvsz(proc+1)
end do
if (if_caf) then
call caf_alltoallv(sndbuf, sdsz, bsdindx, rcvbuf, rvsz, brvindx, minfo)
else
call mpi_alltoallv(sndbuf,sdsz,bsdindx,psb_mpi_ipk_integer,&
& rcvbuf,rvsz,brvindx,psb_mpi_ipk_integer,icomm,minfo)
call mpi_alltoallv(sndbuf,sdsz,bsdindx,psb_mpi_ipk_integer,&
& rcvbuf,rvsz,brvindx,psb_mpi_ipk_integer,icomm,minfo)
endif
if (minfo /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_,name,a_err='mpi_alltoallv')
goto 9999

@ -126,6 +126,7 @@ subroutine psi_extract_dep_list(ictxt,is_bld,is_upd,desc_str,dep_list,&
use psb_const_mod
use psb_error_mod
use psb_desc_mod
use psb_caf_mod
implicit none
#ifdef MPI_H
include 'mpif.h'
@ -272,8 +273,13 @@ subroutine psi_extract_dep_list(ictxt,is_bld,is_upd,desc_str,dep_list,&
endif
itmp(1:dl_lda) = dep_list(1:dl_lda,me)
dl_mpi = dl_lda
call mpi_allgather(itmp,dl_mpi,psb_mpi_ipk_integer,&
& dep_list,dl_mpi,psb_mpi_ipk_integer,icomm,minfo)
if (if_caf) then
call caf_allgather(itmp,dl_mpi, dep_list, minfo)
else
call mpi_allgather(itmp,dl_mpi,psb_mpi_ipk_integer,&
& dep_list,dl_mpi,psb_mpi_ipk_integer,icomm,minfo)
endif
info = minfo
if (info == 0) deallocate(itmp,stat=info)
if (info /= psb_success_) then

@ -1,6 +1,6 @@
include ../../Make.inc
BASIC_MODS= psb_const_mod.o psb_error_mod.o psb_realloc_mod.o
BASIC_MODS= psb_const_mod.o psb_error_mod.o psb_realloc_mod.o psb_caf_mod.o
COMMINT=psi_comm_buffers_mod.o psi_penv_mod.o psi_bcast_mod.o psi_reduce_mod.o psi_p2p_mod.o
UTIL_MODS = aux/psb_string_mod.o desc/psb_desc_const_mod.o desc/psb_indx_map_mod.o\
desc/psb_gen_block_map_mod.o desc/psb_list_map_mod.o desc/psb_repl_map_mod.o\
@ -36,9 +36,8 @@ UTIL_MODS = aux/psb_string_mod.o desc/psb_desc_const_mod.o desc/psb_indx_map_mod
serial/psb_c_base_mat_mod.o serial/psb_c_csr_mat_mod.o serial/psb_c_csc_mat_mod.o serial/psb_c_mat_mod.o \
serial/psb_z_base_mat_mod.o serial/psb_z_csr_mat_mod.o serial/psb_z_csc_mat_mod.o serial/psb_z_mat_mod.o
MODULES=$(BASIC_MODS) $(UTIL_MODS)
OBJS = error.o psb_base_mod.o $(EXTRA_COBJS) cutil.o
OBJS = error.o psb_base_mod.o $(EXTRA_COBJS) cutil.o
LIBDIR=..
CINCLUDES=-I.
FINCLUDES=$(FMFLAG)$(LIBDIR) $(FMFLAG). $(FIFLAG).
@ -57,7 +56,7 @@ psb_realloc_mod.o: psb_error_mod.o
$(UTIL_MODS): $(BASIC_MODS)
psi_penv_mod.o: psi_comm_buffers_mod.o
psi_bcast_mod.o psi_reduce_mod.o psi_p2p_mod.o: psi_penv_mod.o
psi_bcast_mod.o psi_reduce_mod.o psi_p2p_mod.o: psi_penv_mod.o psb_caf_mod.o
aux/psb_string_mod.o desc/psb_desc_const_mod.o psi_comm_buffers_mod.o: psb_const_mod.o
@ -155,6 +154,10 @@ psblas/psb_s_psblas_mod.o psblas/psb_c_psblas_mod.o psblas/psb_d_psblas_mod.o ps
psb_base_mod.o: $(MODULES)
psb_caf_mod.o: psb_caf_mod.f90 psb_const_mod.o
$(F90) $(FINCLUDES) $(FDEFINES) $(F90COPT) $(EXTRA_OPT) -c $< -o $@
psi_penv_mod.o: psi_penv_mod.F90 $(BASIC_MODS)
$(F90) $(FINCLUDES) $(FDEFINES) $(F90COPT) $(EXTRA_OPT) -c $< -o $@

@ -707,7 +707,7 @@ contains
do
if (ip > size(idx)) then
write(psb_err_unit,*) trim(name),': Warning: out of size of input vector '
write(psb_err_unit,*) trim(name),': Warning: out of size of input vector ', this_image(),ip, size(idx)
exit
end if
if (idx(ip) == -1) exit

@ -70,6 +70,7 @@ contains
subroutine psb_ibcasts(ictxt,dat,root)
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -79,8 +80,8 @@ contains
#endif
integer(psb_mpik_), intent(in) :: ictxt
integer(psb_ipk_), intent(inout) :: dat
integer(psb_ipk_), allocatable :: dat_buf[:]
integer(psb_mpik_), intent(in), optional :: root
integer(psb_mpik_) :: iam, np, root_, info
#if !defined(SERIAL_MPI)
@ -90,12 +91,22 @@ contains
root_ = psb_root_
endif
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,1,psb_mpi_ipk_integer,root_,ictxt,info)
if (if_caf2) then
if (allocated(dat_buf)) deallocate(dat_buf)
allocate(dat_buf[*])
dat_buf=dat
sync all
call co_broadcast(dat_buf,root_ + 1)
dat = dat_buf
else
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,1,psb_mpi_ipk_integer,root_,ictxt,info)
endif
#endif
end subroutine psb_ibcasts
subroutine psb_ibcastv(ictxt,dat,root)
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -105,6 +116,7 @@ contains
#endif
integer(psb_mpik_), intent(in) :: ictxt
integer(psb_ipk_), intent(inout) :: dat(:)
integer(psb_ipk_), allocatable :: dat_buf(:)[:]
integer(psb_mpik_), intent(in), optional :: root
integer(psb_mpik_) :: iam, np, root_, info
@ -114,13 +126,23 @@ contains
else
root_ = psb_root_
endif
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,size(dat),psb_mpi_ipk_integer,root_,ictxt,info)
if (if_caf2) then
if (allocated(dat_buf)) deallocate(dat_buf)
allocate(dat_buf(size(dat))[*])
dat_buf=dat
sync all
call co_broadcast(dat_buf,root_ + 1)
dat = dat_buf
if (allocated(dat_buf)) deallocate(dat_buf)
else
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,size(dat),psb_mpi_ipk_integer,root_,ictxt,info)
endif
#endif
end subroutine psb_ibcastv
subroutine psb_ibcastm(ictxt,dat,root)
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -130,6 +152,7 @@ contains
#endif
integer(psb_mpik_), intent(in) :: ictxt
integer(psb_ipk_), intent(inout) :: dat(:,:)
integer(psb_ipk_), allocatable :: dat_buf(:,:)[:]
integer(psb_mpik_), intent(in), optional :: root
integer(psb_mpik_) :: iam, np, root_, info
@ -140,14 +163,24 @@ contains
else
root_ = psb_root_
endif
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,size(dat),psb_mpi_ipk_integer,root_,ictxt,info)
if (if_caf2) then
if (allocated(dat_buf)) deallocate(dat_buf)
allocate(dat_buf(size(dat,1),size(dat,2))[*])
dat_buf=dat
sync all
call co_broadcast(dat_buf,root_ + 1)
dat = dat_buf
if (allocated(dat_buf)) deallocate(dat_buf)
else
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,size(dat),psb_mpi_ipk_integer,root_,ictxt,info)
endif
#endif
end subroutine psb_ibcastm
subroutine psb_sbcasts(ictxt,dat,root)
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -157,6 +190,7 @@ contains
#endif
integer(psb_mpik_), intent(in) :: ictxt
real(psb_spk_), intent(inout) :: dat
real(psb_spk_), allocatable :: dat_buf[:]
integer(psb_mpik_), intent(in), optional :: root
integer(psb_mpik_) :: iam, np, root_, info
@ -168,13 +202,23 @@ contains
root_ = psb_root_
endif
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,1,psb_mpi_r_spk_,root_,ictxt,info)
if (if_caf2) then
if (allocated(dat_buf)) deallocate(dat_buf)
allocate(dat_buf[*])
dat_buf=dat
sync all
call co_broadcast(dat_buf,root_ + 1)
dat = dat_buf
else
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,1,psb_mpi_r_spk_,root_,ictxt,info)
endif
#endif
end subroutine psb_sbcasts
subroutine psb_sbcastv(ictxt,dat,root)
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -184,6 +228,7 @@ contains
#endif
integer(psb_mpik_), intent(in) :: ictxt
real(psb_spk_), intent(inout) :: dat(:)
real(psb_spk_), allocatable :: dat_buf(:)[:]
integer(psb_mpik_), intent(in), optional :: root
integer(psb_mpik_) :: iam, np, root_, info
@ -195,13 +240,23 @@ contains
root_ = psb_root_
endif
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,size(dat),psb_mpi_r_spk_,root_,ictxt,info)
if (if_caf2) then
if (allocated(dat_buf)) deallocate(dat_buf)
allocate(dat_buf(size(dat))[*])
dat_buf=dat
sync all
call co_broadcast(dat_buf,root_ + 1)
dat = dat_buf
if (allocated(dat_buf)) deallocate(dat_buf)
else
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,size(dat),psb_mpi_r_spk_,root_,ictxt,info)
endif
#endif
end subroutine psb_sbcastv
subroutine psb_sbcastm(ictxt,dat,root)
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -211,6 +266,7 @@ contains
#endif
integer(psb_mpik_), intent(in) :: ictxt
real(psb_spk_), intent(inout) :: dat(:,:)
real(psb_spk_), allocatable :: dat_buf(:,:)[:]
integer(psb_mpik_), intent(in), optional :: root
integer(psb_mpik_) :: iam, np, root_, info
@ -222,14 +278,24 @@ contains
root_ = psb_root_
endif
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,size(dat),psb_mpi_r_spk_,root_,ictxt,info)
if (if_caf2) then
if (allocated(dat_buf)) deallocate(dat_buf)
allocate(dat_buf(size(dat,1),size(dat,2))[*])
dat_buf=dat
sync all
call co_broadcast(dat_buf,root_ + 1)
dat = dat_buf
if (allocated(dat_buf)) deallocate(dat_buf)
else
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,size(dat),psb_mpi_r_spk_,root_,ictxt,info)
endif
#endif
end subroutine psb_sbcastm
subroutine psb_dbcasts(ictxt,dat,root)
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -239,6 +305,7 @@ contains
#endif
integer(psb_mpik_), intent(in) :: ictxt
real(psb_dpk_), intent(inout) :: dat
real(psb_dpk_), allocatable :: dat_buf[:]
integer(psb_mpik_), intent(in), optional :: root
integer(psb_mpik_) :: iam, np, root_, info
@ -250,13 +317,23 @@ contains
root_ = psb_root_
endif
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,1,psb_mpi_r_dpk_,root_,ictxt,info)
if (if_caf2) then
if (allocated(dat_buf)) deallocate(dat_buf)
allocate(dat_buf[*])
dat_buf=dat
sync all
call co_broadcast(dat_buf,root_ + 1)
dat = dat_buf
else
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,1,psb_mpi_r_dpk_,root_,ictxt,info)
endif
#endif
end subroutine psb_dbcasts
subroutine psb_dbcastv(ictxt,dat,root)
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -266,6 +343,7 @@ contains
#endif
integer(psb_mpik_), intent(in) :: ictxt
real(psb_dpk_), intent(inout) :: dat(:)
real(psb_dpk_), allocatable :: dat_buf(:)[:]
integer(psb_mpik_), intent(in), optional :: root
integer(psb_mpik_) :: iam, np, root_, info
@ -277,12 +355,23 @@ contains
root_ = psb_root_
endif
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,size(dat),psb_mpi_r_dpk_,root_,ictxt,info)
if (if_caf2) then
if (allocated(dat_buf)) deallocate(dat_buf)
allocate(dat_buf(size(dat))[*])
dat_buf=dat
sync all
call co_broadcast(dat_buf,root_ + 1)
dat = dat_buf
if (allocated(dat_buf)) deallocate(dat_buf)
else
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,size(dat),psb_mpi_r_dpk_,root_,ictxt,info)
endif
#endif
end subroutine psb_dbcastv
subroutine psb_dbcastm(ictxt,dat,root)
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -292,6 +381,7 @@ contains
#endif
integer(psb_mpik_), intent(in) :: ictxt
real(psb_dpk_), intent(inout) :: dat(:,:)
real(psb_dpk_), allocatable :: dat_buf(:,:)[:]
integer(psb_mpik_), intent(in), optional :: root
integer(psb_mpik_) :: iam, np, root_, info
@ -303,12 +393,23 @@ contains
root_ = psb_root_
endif
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,size(dat),psb_mpi_r_dpk_,root_,ictxt,info)
if (if_caf2) then
if (allocated(dat_buf)) deallocate(dat_buf)
allocate(dat_buf(size(dat,1),size(dat,2))[*])
dat_buf=dat
sync all
call co_broadcast(dat_buf,root_ + 1)
dat = dat_buf
if (allocated(dat_buf)) deallocate(dat_buf)
else
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,size(dat),psb_mpi_r_dpk_,root_,ictxt,info)
endif
#endif
end subroutine psb_dbcastm
subroutine psb_cbcasts(ictxt,dat,root)
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -318,10 +419,10 @@ contains
#endif
integer(psb_mpik_), intent(in) :: ictxt
complex(psb_spk_), intent(inout) :: dat
complex(psb_spk_), allocatable :: dat_buf[:]
integer(psb_mpik_), intent(in), optional :: root
integer(psb_mpik_) :: iam, np, root_, info
#if !defined(SERIAL_MPI)
if (present(root)) then
root_ = root
@ -329,12 +430,22 @@ contains
root_ = psb_root_
endif
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,1,psb_mpi_c_spk_,root_,ictxt,info)
if (if_caf2) then
if (allocated(dat_buf)) deallocate(dat_buf)
allocate(dat_buf[*])
dat_buf=dat
sync all
call co_broadcast(dat_buf,root_ + 1)
dat = dat_buf
else
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,1,psb_mpi_r_dpk_,root_,ictxt,info)
endif
#endif
end subroutine psb_cbcasts
subroutine psb_cbcastv(ictxt,dat,root)
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -344,6 +455,7 @@ contains
#endif
integer(psb_mpik_), intent(in) :: ictxt
complex(psb_spk_), intent(inout) :: dat(:)
complex(psb_spk_), allocatable :: dat_buf(:)[:]
integer(psb_mpik_), intent(in), optional :: root
integer(psb_mpik_) :: iam, np, root_, info
@ -355,12 +467,23 @@ contains
root_ = psb_root_
endif
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,size(dat),psb_mpi_c_spk_,root_,ictxt,info)
if (if_caf2) then
if (allocated(dat_buf)) deallocate(dat_buf)
allocate(dat_buf(size(dat))[*])
dat_buf=dat
sync all
call co_broadcast(dat_buf,root_ + 1)
dat = dat_buf
if (allocated(dat_buf)) deallocate(dat_buf)
else
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,size(dat),psb_mpi_c_spk_,root_,ictxt,info)
endif
#endif
end subroutine psb_cbcastv
subroutine psb_cbcastm(ictxt,dat,root)
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -370,6 +493,7 @@ contains
#endif
integer(psb_mpik_), intent(in) :: ictxt
complex(psb_spk_), intent(inout) :: dat(:,:)
complex(psb_spk_), allocatable :: dat_buf(:,:)[:]
integer(psb_mpik_), intent(in), optional :: root
integer(psb_mpik_) :: iam, np, root_, info
@ -381,12 +505,23 @@ contains
root_ = psb_root_
endif
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,size(dat),psb_mpi_c_spk_,root_,ictxt,info)
if (if_caf2) then
if (allocated(dat_buf)) deallocate(dat_buf)
allocate(dat_buf(size(dat,1),size(dat,2))[*])
dat_buf=dat
sync all
call co_broadcast(dat_buf,root_ + 1)
dat = dat_buf
if (allocated(dat_buf)) deallocate(dat_buf)
else
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,size(dat),psb_mpi_c_spk_,root_,ictxt,info)
endif
#endif
end subroutine psb_cbcastm
subroutine psb_zbcasts(ictxt,dat,root)
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -396,6 +531,7 @@ contains
#endif
integer(psb_mpik_), intent(in) :: ictxt
complex(psb_dpk_), intent(inout) :: dat
complex(psb_dpk_), allocatable :: dat_buf[:]
integer(psb_mpik_), intent(in), optional :: root
integer(psb_mpik_) :: iam, np, root_, info
@ -407,12 +543,25 @@ contains
root_ = psb_root_
endif
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,1,psb_mpi_c_dpk_,root_,ictxt,info)
if (if_caf2) then
if (allocated(dat_buf)) deallocate(dat_buf)
allocate(dat_buf[*])
dat_buf=dat
sync all
!print*,'****',this_image(), dat_buf, dat
call co_broadcast(dat_buf,root_ + 1)
dat = dat_buf
!print*,'***** after',this_image(), dat
else
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,1,psb_mpi_c_dpk_,root_,ictxt,info)
endif
#endif
end subroutine psb_zbcasts
subroutine psb_zbcastv(ictxt,dat,root)
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -422,6 +571,7 @@ contains
#endif
integer(psb_mpik_), intent(in) :: ictxt
complex(psb_dpk_), intent(inout) :: dat(:)
complex(psb_dpk_), allocatable :: dat_buf(:)[:]
integer(psb_mpik_), intent(in), optional :: root
integer(psb_mpik_) :: iam, np, root_, info
@ -433,12 +583,23 @@ contains
root_ = psb_root_
endif
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,size(dat),psb_mpi_c_dpk_,root_,ictxt,info)
if (if_caf2) then
if (allocated(dat_buf)) deallocate(dat_buf)
allocate(dat_buf(size(dat))[*])
dat_buf=dat
sync all
call co_broadcast(dat_buf,root_ + 1)
dat = dat_buf
if (allocated(dat_buf)) deallocate(dat_buf)
else
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,size(dat),psb_mpi_c_dpk_,root_,ictxt,info)
endif
#endif
end subroutine psb_zbcastv
subroutine psb_zbcastm(ictxt,dat,root)
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -448,6 +609,7 @@ contains
#endif
integer(psb_mpik_), intent(in) :: ictxt
complex(psb_dpk_), intent(inout) :: dat(:,:)
complex(psb_dpk_), allocatable :: dat_buf(:,:)[:]
integer(psb_mpik_), intent(in), optional :: root
integer(psb_mpik_) :: iam, np, root_, info
@ -459,13 +621,24 @@ contains
root_ = psb_root_
endif
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,size(dat),psb_mpi_c_dpk_,root_,ictxt,info)
if (if_caf2) then
if (allocated(dat_buf)) deallocate(dat_buf)
allocate(dat_buf(size(dat,1),size(dat,2))[*])
dat_buf=dat
sync all
call co_broadcast(dat_buf,root_ + 1)
dat = dat_buf
if (allocated(dat_buf)) deallocate(dat_buf)
else
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,size(dat),psb_mpi_c_dpk_,root_,ictxt,info)
endif
#endif
end subroutine psb_zbcastm
subroutine psb_hbcasts(ictxt,dat,root,length)
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -492,13 +665,13 @@ contains
endif
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,length_,MPI_CHARACTER,root_,ictxt,info)
#endif
end subroutine psb_hbcasts
subroutine psb_hbcastv(ictxt,dat,root)
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -522,13 +695,13 @@ contains
size_ = size(dat)
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,length_*size_,MPI_CHARACTER,root_,ictxt,info)
#endif
end subroutine psb_hbcastv
subroutine psb_lbcasts(ictxt,dat,root)
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -538,6 +711,7 @@ contains
#endif
integer(psb_mpik_), intent(in) :: ictxt
logical, intent(inout) :: dat
logical, allocatable :: dat_buf[:]
integer(psb_mpik_), intent(in), optional :: root
integer(psb_mpik_) :: iam, np, root_,info
@ -549,14 +723,24 @@ contains
root_ = psb_root_
endif
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,1,MPI_LOGICAL,root_,ictxt,info)
if (if_caf2) then
if (allocated(dat_buf)) deallocate(dat_buf)
allocate(dat_buf[*])
dat_buf=dat
sync all
call co_broadcast(dat_buf,root_ + 1)
dat = dat_buf
else
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,1,MPI_LOGICAL,root_,ictxt,info)
endif
#endif
end subroutine psb_lbcasts
subroutine psb_lbcastv(ictxt,dat,root)
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -566,6 +750,7 @@ contains
#endif
integer(psb_mpik_), intent(in) :: ictxt
logical, intent(inout) :: dat(:)
logical, allocatable :: dat_buf(:)[:]
integer(psb_mpik_), intent(in), optional :: root
integer(psb_mpik_) :: iam, np, root_,info
@ -577,16 +762,25 @@ contains
root_ = psb_root_
endif
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,size(dat),MPI_LOGICAL,root_,ictxt,info)
if (if_caf2) then
allocate(dat_buf(size(dat))[*])
dat_buf=dat
sync all
call co_broadcast(dat_buf,root_ + 1)
dat = dat_buf
if (allocated(dat_buf)) deallocate(dat_buf)
else
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,size(dat),MPI_LOGICAL,root_,ictxt,info)
endif
#endif
end subroutine psb_lbcastv
#if !defined(LONG_INTEGERS)
subroutine psb_i8bcasts(ictxt,dat,root)
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -596,6 +790,7 @@ contains
#endif
integer(psb_mpik_), intent(in) :: ictxt
integer(psb_long_int_k_), intent(inout) :: dat
integer(psb_long_int_k_), allocatable :: dat_buf[:]
integer(psb_mpik_), intent(in), optional :: root
integer(psb_mpik_) :: iam, np, root_, info
@ -607,12 +802,22 @@ contains
root_ = psb_root_
endif
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,1,psb_mpi_lng_integer,root_,ictxt,info)
if (if_caf2) then
if (allocated(dat_buf)) deallocate(dat_buf)
allocate(dat_buf[*])
dat_buf=dat
sync all
call co_broadcast(dat_buf,root_ + 1)
dat = dat_buf
else
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,1,psb_mpi_lng_integer,root_,ictxt,info)
endif
#endif
end subroutine psb_i8bcasts
subroutine psb_i8bcastv(ictxt,dat,root)
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -622,6 +827,7 @@ contains
#endif
integer(psb_mpik_), intent(in) :: ictxt
integer(psb_long_int_k_), intent(inout) :: dat(:)
integer(psb_long_int_k_), allocatable :: dat_buf(:)[:]
integer(psb_mpik_), intent(in), optional :: root
integer(psb_mpik_) :: iam, np, root_, info
@ -632,12 +838,22 @@ contains
root_ = psb_root_
endif
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,size(dat),psb_mpi_lng_integer,root_,ictxt,info)
if (if_caf2) then
allocate(dat_buf(size(dat))[*])
dat_buf=dat
sync all
call co_broadcast(dat_buf,root_ + 1)
dat = dat_buf
if (allocated(dat_buf)) deallocate(dat_buf)
else
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,size(dat),psb_mpi_lng_integer,root_,ictxt,info)
endif
#endif
end subroutine psb_i8bcastv
subroutine psb_i8bcastm(ictxt,dat,root)
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -647,6 +863,7 @@ contains
#endif
integer(psb_mpik_), intent(in) :: ictxt
integer(psb_long_int_k_), intent(inout) :: dat(:,:)
integer(psb_long_int_k_), allocatable :: dat_buf(:,:)[:]
integer(psb_mpik_), intent(in), optional :: root
integer(psb_mpik_) :: iam, np, root_, info
@ -658,8 +875,17 @@ contains
root_ = psb_root_
endif
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,size(dat),psb_mpi_lng_integer,root_,ictxt,info)
if (if_caf2) then
allocate(dat_buf(size(dat,1),size(dat,2))[*])
dat_buf=dat
sync all
call co_broadcast(dat_buf,root_ + 1)
dat = dat_buf
if (allocated(dat_buf)) deallocate(dat_buf)
else
call psb_info(ictxt,iam,np)
call mpi_bcast(dat,size(dat),psb_mpi_lng_integer,root_,ictxt,info)
endif
#endif
end subroutine psb_i8bcastm

@ -244,6 +244,14 @@ module psi_i_mod
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
end subroutine psi_iswapdata_multivect
subroutine psi_iswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info)
import
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag,m
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: y(:,:)
integer(psb_ipk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
end subroutine psi_iswap_xchg_m
subroutine psi_iswapidxm(ictxt,icomm,flag,n,beta,y,idx,&
& totxch,totsnd,totrcv,work,info)
import
@ -253,6 +261,14 @@ module psi_i_mod
integer(psb_ipk_),target :: work(:)
integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd,totrcv
end subroutine psi_iswapidxm
subroutine psi_iswap_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info)
import
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: y(:)
integer(psb_ipk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
end subroutine psi_iswap_xchg_v
subroutine psi_iswapidxv(ictxt,icomm,flag,beta,y,idx,&
& totxch,totsnd,totrcv,work,info)
import
@ -262,6 +278,14 @@ module psi_i_mod
integer(psb_ipk_),target :: work(:)
integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd,totrcv
end subroutine psi_iswapidxv
subroutine psi_iswap_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info)
import
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
class(psb_i_base_vect_type) :: y
integer(psb_ipk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
end subroutine psi_iswap_xchg_vect
subroutine psi_iswap_vidx_vect(iictxt,iicomm,flag,beta,y,idx,&
& totxch,totsnd,totrcv,work,info)
import
@ -326,6 +350,14 @@ module psi_i_mod
type(psb_desc_type), target :: desc_a
integer(psb_ipk_), optional :: data
end subroutine psi_iswaptran_multivect
subroutine psi_iswaptran_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info)
import
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag,m
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: y(:,:)
integer(psb_ipk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
end subroutine psi_iswaptran_xchg_m
subroutine psi_itranidxm(ictxt,icomm,flag,n,beta,y,idx,&
& totxch,totsnd,totrcv,work,info)
import
@ -335,6 +367,14 @@ module psi_i_mod
integer(psb_ipk_),target :: work(:)
integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd,totrcv
end subroutine psi_itranidxm
subroutine psi_iswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info)
import
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: y(:)
integer(psb_ipk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
end subroutine psi_iswaptran_xchg_v
subroutine psi_itranidxv(ictxt,icomm,flag,beta,y,idx,&
& totxch,totsnd,totrcv,work,info)
import
@ -344,6 +384,14 @@ module psi_i_mod
integer(psb_ipk_),target :: work(:)
integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd,totrcv
end subroutine psi_itranidxv
subroutine psi_iswaptran_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info)
import
integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag
integer(psb_ipk_), intent(out) :: info
class(psb_i_base_vect_type) :: y
integer(psb_ipk_) :: beta
class(psb_xch_idx_type), intent(inout) :: xchg
end subroutine psi_iswaptran_xchg_vect
subroutine psi_itran_vidx_vect(iictxt,iicomm,flag,beta,y,idx,&
& totxch,totsnd,totrcv,work,info)
import

File diff suppressed because it is too large Load Diff

@ -63,7 +63,7 @@ Subroutine psb_ccdbldext(a,desc_a,novr,desc_ov,info, extype)
use psb_base_mod, psb_protect_name => psb_ccdbldext
use psi_mod
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -467,9 +467,13 @@ Subroutine psb_ccdbldext(a,desc_a,novr,desc_ov,info, extype)
! Exchange data requests with everybody else: so far we have
! accumulated RECV requests, we have an all-to-all to build
! matchings SENDs.
!
call mpi_alltoall(sdsz,1,psb_mpi_def_integer,rvsz,1, &
& psb_mpi_def_integer,icomm,minfo)
!
if (if_caf) then
call caf_alltoall(sdsz,rvsz,1, minfo)
else
call mpi_alltoall(sdsz,1,psb_mpi_def_integer,rvsz,1, &
& psb_mpi_def_integer,icomm,minfo)
endif
if (minfo /= psb_success_) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='mpi_alltoall')

@ -1,4 +1,5 @@
!!$
!se/tools/psb_zallc.f90
!$
!!$ Parallel Sparse BLAS version 3.4
!!$ (C) Copyright 2006, 2010, 2015
!!$ Salvatore Filippone University of Rome Tor Vergata

@ -58,7 +58,7 @@
Subroutine psb_csphalo(a,desc_a,blk,info,rowcnv,colcnv,&
& rowscale,colscale,outfmt,data)
use psb_base_mod, psb_protect_name => psb_csphalo
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -195,8 +195,12 @@ Subroutine psb_csphalo(a,desc_a,blk,info,rowcnv,colcnv,&
counter = counter+n_el_send+3
Enddo
call mpi_alltoall(sdsz,1,psb_mpi_def_integer,&
& rvsz,1,psb_mpi_def_integer,icomm,minfo)
if (if_caf) then
call caf_alltoall(sdsz,rvsz,1, minfo)
else
call mpi_alltoall(sdsz,1,psb_mpi_def_integer,&
& rvsz,1,psb_mpi_def_integer,icomm,minfo)
endif
if (info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='mpi_alltoall'

@ -63,6 +63,7 @@ Subroutine psb_dcdbldext(a,desc_a,novr,desc_ov,info, extype)
use psb_base_mod, psb_protect_name => psb_dcdbldext
use psi_mod
use psb_caf_mod
#ifdef MPI_MOD
use mpi
@ -468,8 +469,12 @@ Subroutine psb_dcdbldext(a,desc_a,novr,desc_ov,info, extype)
! accumulated RECV requests, we have an all-to-all to build
! matchings SENDs.
!
call mpi_alltoall(sdsz,1,psb_mpi_def_integer,rvsz,1, &
& psb_mpi_def_integer,icomm,minfo)
if (if_caf) then
call caf_alltoall(sdsz,rvsz,1, minfo)
else
call mpi_alltoall(sdsz,1,psb_mpi_def_integer,rvsz,1, &
& psb_mpi_def_integer,icomm,minfo)
endif
if (minfo /= psb_success_) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='mpi_alltoall')
@ -503,9 +508,12 @@ Subroutine psb_dcdbldext(a,desc_a,novr,desc_ov,info, extype)
end if
lworkr = max(iszr,1)
end if
call mpi_alltoallv(works,sdsz,bsdindx,psb_mpi_ipk_integer,&
& workr,rvsz,brvindx,psb_mpi_ipk_integer,icomm,minfo)
if (if_caf) then
call caf_alltoallv(works,sdsz,bsdindx, workr,rvsz,brvindx, minfo)
else
call mpi_alltoallv(works,sdsz,bsdindx,psb_mpi_ipk_integer,&
& workr,rvsz,brvindx,psb_mpi_ipk_integer,icomm,minfo)
endif
if (minfo /= psb_success_) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='mpi_alltoallv')

@ -58,7 +58,7 @@
Subroutine psb_dsphalo(a,desc_a,blk,info,rowcnv,colcnv,&
& rowscale,colscale,outfmt,data)
use psb_base_mod, psb_protect_name => psb_dsphalo
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -195,8 +195,12 @@ Subroutine psb_dsphalo(a,desc_a,blk,info,rowcnv,colcnv,&
counter = counter+n_el_send+3
Enddo
call mpi_alltoall(sdsz,1,psb_mpi_def_integer,&
& rvsz,1,psb_mpi_def_integer,icomm,minfo)
if (if_caf) then
call caf_alltoall(sdsz,rvsz,1, minfo)
else
call mpi_alltoall(sdsz,1,psb_mpi_def_integer,&
& rvsz,1,psb_mpi_def_integer,icomm,minfo)
endif
if (info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='mpi_alltoall'
@ -278,13 +282,21 @@ Subroutine psb_dsphalo(a,desc_a,blk,info,rowcnv,colcnv,&
goto 9999
end if
call mpi_alltoallv(valsnd,sdsz,bsdindx,psb_mpi_r_dpk_,&
& acoo%val,rvsz,brvindx,psb_mpi_r_dpk_,icomm,minfo)
call mpi_alltoallv(iasnd,sdsz,bsdindx,psb_mpi_ipk_integer,&
& acoo%ia,rvsz,brvindx,psb_mpi_ipk_integer,icomm,minfo)
call mpi_alltoallv(jasnd,sdsz,bsdindx,psb_mpi_ipk_integer,&
& acoo%ja,rvsz,brvindx,psb_mpi_ipk_integer,icomm,minfo)
if (if_caf) then
call caf_alltoallv(valsnd,sdsz,bsdindx, acoo%val, rvsz,&
& brvindx, minfo)
call caf_alltoallv(iasnd,sdsz,bsdindx, acoo%ia, rvsz,&
& brvindx, minfo)
call caf_alltoallv(jasnd,sdsz,bsdindx, acoo%ja, rvsz,&
& brvindx, minfo)
else
call mpi_alltoallv(valsnd,sdsz,bsdindx,psb_mpi_r_dpk_,&
& acoo%val,rvsz,brvindx,psb_mpi_r_dpk_,icomm,minfo)
call mpi_alltoallv(iasnd,sdsz,bsdindx,psb_mpi_ipk_integer,&
& acoo%ia,rvsz,brvindx,psb_mpi_ipk_integer,icomm,minfo)
call mpi_alltoallv(jasnd,sdsz,bsdindx,psb_mpi_ipk_integer,&
& acoo%ja,rvsz,brvindx,psb_mpi_ipk_integer,icomm,minfo)
endif
if (info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='mpi_alltoallv'

@ -63,6 +63,7 @@ Subroutine psb_scdbldext(a,desc_a,novr,desc_ov,info, extype)
use psb_base_mod, psb_protect_name => psb_scdbldext
use psi_mod
use psb_caf_mod
#ifdef MPI_MOD
use mpi
@ -467,9 +468,13 @@ Subroutine psb_scdbldext(a,desc_a,novr,desc_ov,info, extype)
! Exchange data requests with everybody else: so far we have
! accumulated RECV requests, we have an all-to-all to build
! matchings SENDs.
!
call mpi_alltoall(sdsz,1,psb_mpi_def_integer,rvsz,1, &
& psb_mpi_def_integer,icomm,minfo)
!
if (if_caf) then
call caf_alltoall(sdsz,rvsz,1,minfo)
else
call mpi_alltoall(sdsz,1,psb_mpi_def_integer,rvsz,1, &
& psb_mpi_def_integer,icomm,minfo)
endif
if (minfo /= psb_success_) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='mpi_alltoall')

@ -58,7 +58,7 @@
Subroutine psb_ssphalo(a,desc_a,blk,info,rowcnv,colcnv,&
& rowscale,colscale,outfmt,data)
use psb_base_mod, psb_protect_name => psb_ssphalo
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -195,8 +195,12 @@ Subroutine psb_ssphalo(a,desc_a,blk,info,rowcnv,colcnv,&
counter = counter+n_el_send+3
Enddo
call mpi_alltoall(sdsz,1,psb_mpi_def_integer,&
& rvsz,1,psb_mpi_def_integer,icomm,minfo)
if (if_caf) then
call caf_alltoall(sdsz,rvsz,1, minfo)
else
call mpi_alltoall(sdsz,1,psb_mpi_def_integer,&
& rvsz,1,psb_mpi_def_integer,icomm,minfo)
endif
if (info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='mpi_alltoall'

@ -63,6 +63,7 @@ Subroutine psb_zcdbldext(a,desc_a,novr,desc_ov,info, extype)
use psb_base_mod, psb_protect_name => psb_zcdbldext
use psi_mod
use psb_caf_mod
#ifdef MPI_MOD
use mpi
@ -467,9 +468,13 @@ Subroutine psb_zcdbldext(a,desc_a,novr,desc_ov,info, extype)
! Exchange data requests with everybody else: so far we have
! accumulated RECV requests, we have an all-to-all to build
! matchings SENDs.
!
call mpi_alltoall(sdsz,1,psb_mpi_def_integer,rvsz,1, &
& psb_mpi_def_integer,icomm,minfo)
!
if (if_caf) then
call caf_alltoall(sdsz,rvsz,1, minfo)
else
call mpi_alltoall(sdsz,1,psb_mpi_def_integer,rvsz,1, &
& psb_mpi_def_integer,icomm,minfo)
endif
if (minfo /= psb_success_) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='mpi_alltoall')

@ -58,7 +58,7 @@
Subroutine psb_zsphalo(a,desc_a,blk,info,rowcnv,colcnv,&
& rowscale,colscale,outfmt,data)
use psb_base_mod, psb_protect_name => psb_zsphalo
use psb_caf_mod
#ifdef MPI_MOD
use mpi
#endif
@ -195,8 +195,12 @@ Subroutine psb_zsphalo(a,desc_a,blk,info,rowcnv,colcnv,&
counter = counter+n_el_send+3
Enddo
call mpi_alltoall(sdsz,1,psb_mpi_def_integer,&
& rvsz,1,psb_mpi_def_integer,icomm,minfo)
if (if_caf) then
call caf_alltoall(sdsz,rvsz,1, minfo)
else
call mpi_alltoall(sdsz,1,psb_mpi_def_integer,&
& rvsz,1,psb_mpi_def_integer,icomm,minfo)
endif
if (info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='mpi_alltoall'

@ -5,7 +5,7 @@ include $(INCDIR)/Make.inc.psblas
# Libraries used
#
LIBDIR=$(BASEDIR)/lib/
PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base
PSBLAS_LIB= -L$(LIBDIR) -L/opencoarray-1.7.4 -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base -lcaf_mpi
LDLIBS=$(PSBLDLIBS)
FINCLUDES=$(FMFLAG)$(INCDIR) $(FMFLAG).

@ -1,11 +1,11 @@
11 Number of inputs
A_Z.mtx kivap005.mtx This (and others) from: http://math.nist.gov/MatrixMarket/ or
bcsstk26.mtx kivap005.mtx This (and others) from: http://math.nist.gov/MatrixMarket/ or
NONE sherman3_b.mtx http://www.cise.ufl.edu/research/sparse/matrices/index.html
MM File format: MM: Matrix Market HB: Harwell-Boeing.
CG BiCGSTAB Iterative method: BiCGSTAB CGS RGMRES BiCGSTABL BICG CG
NONE BJAC Preconditioner NONE DIAG BJAC
CSR Storage format CSR COO JAD
2 IPART: Partition method 0: BLK 2: graph (with Metis)
0 IPART: Partition method 0: BLK 2: graph (with Metis)
2 ISTOPC
00100 ITMAX
-1 ITRACE

@ -3,7 +3,7 @@ INCDIR=$(BASEDIR)/include
include $(INCDIR)/Make.inc.psblas
#
# Libraries used
PFUNIT = /opt/pfunit/pfunit-coarrays-last
PFUNIT = /opt/pfunit/pfunit-marzo2017
FFLAGS += -I$(INCDIR) -I$(PFUNIT)/mod -ISuites
LIBDIR=$(BASEDIR)/lib
PSBLAS_LIB= -L/opencoarrays6.2 -L$(LIBDIR) -lcaf_mpi -lpsb_util -lpsb_base -llapack -lblas
@ -17,7 +17,7 @@ FINCLUDES=$(FMFLAG)$(INCDIR) $(FMFLAG).
all: prova
prova.x: test_psb_dmatdist.o test_psb_dhalo.o test_psb_shalo.o test_psb_chalo.o test_psb_zhalo.o test_psb_reduce_nrm2.o test_psb_sum.o test_psb_max.o test_psb_amx.o test_psb_min.o test_psb_amn.o test_psb_broadcast.o driver.o
prova.x: test_psb_dmatdist.o test_psb_ihalo.o test_psb_dhalo.o test_psb_shalo.o test_psb_chalo.o test_psb_zhalo.o test_psb_reduce_nrm2.o test_psb_sum.o test_psb_max.o test_psb_amx.o test_psb_min.o test_psb_amn.o test_psb_broadcast.o test_psb_caf.o driver.o
$(FCFLAGS)$(F90LINK) -g $(LOPT) -o $@ $^ -L$(PFUNIT)/lib $(PSBLAS_LIB) $(LDLIBS) -DUSE_CAF -lpfunit -lmpi
%: %.x
mpirun -np 8 ./$^

@ -1,3 +1,4 @@
ADD_TEST_SUITE(test_psb_caf_suite)
ADD_TEST_SUITE(test_psb_reduce_nrm2_suite)
ADD_TEST_SUITE(test_psb_max_suite)
ADD_TEST_SUITE(test_psb_amx_suite)
@ -5,6 +6,7 @@ ADD_TEST_SUITE(test_psb_min_suite)
ADD_TEST_SUITE(test_psb_amn_suite)
ADD_TEST_SUITE(test_psb_sum_suite)
ADD_TEST_SUITE(test_psb_broadcast_suite)
ADD_TEST_SUITE(test_psb_ihalo_suite)
ADD_TEST_SUITE(test_psb_dhalo_suite)
ADD_TEST_SUITE(test_psb_shalo_suite)
ADD_TEST_SUITE(test_psb_chalo_suite)

@ -347,7 +347,6 @@ subroutine test_psb_dbroadcast_s(this)
double precision :: dat, check
integer :: root, info, np, icontxt
call prepare_test(dat,check,root,info, np, icontxt)
print*,'from test:', this_image(), dat
call psb_bcast(icontxt, dat, root)
@assertEqual(dat,check)
call psb_exit(icontxt)

@ -2354,7 +2354,6 @@ subroutine test_psb_chalo_tran_8imgs_vect_b(this)
!GETTING BACK X
v = x%get_vect()
PRINT*,'-------', ME, V
if ((me==1).or.(me==2)) then
true = 1
else

@ -2350,7 +2350,6 @@ subroutine test_psb_dhalo_tran_8imgs_vect_b(this)
!GETTING BACK X
v = x%get_vect()
PRINT*,'-------', ME, V
if ((me==1).or.(me==2)) then
true = 1
else

@ -416,9 +416,7 @@ subroutine test_psb_imax_s(this)
Class(CafTestMethod), intent(inout) :: this
integer :: dat, check, root, info, np, icontxt
call prepare_test(dat,check,root,info, np, icontxt)
print*,'dat, check before', dat, check, this_image()
call psb_max(icontxt, dat, root)
print*,'dat after', dat, this_image()
@assertEqual(dat,check)
call psb_exit(icontxt)
end subroutine test_psb_imax_s
@ -442,9 +440,7 @@ subroutine test_psb_dmax_s(this)
double precision :: dat, check
integer :: root, info, np, icontxt
call prepare_test(dat,check,root,info, np, icontxt)
print*,'dat, check before', dat, check, this_image()
call psb_max(icontxt, dat, root)
print*,'dat after', dat, this_image()
@assertEqual(dat,check)
call psb_exit(icontxt)
end subroutine test_psb_dmax_s
@ -535,9 +531,7 @@ subroutine test2_psb_imax_s(this)
Class(CafTestMethod), intent(inout) :: this
integer :: dat, check, root, info, np, icontxt
call prepare_test2(dat,check,root,info, np, icontxt)
print*,'dat, check before', dat, check, this_image()
call psb_max(icontxt, dat, root)
print*,'dat after', dat, this_image()
@assertEqual(dat,check)
call psb_exit(icontxt)
end subroutine test2_psb_imax_s
@ -561,9 +555,7 @@ subroutine test2_psb_dmax_s(this)
double precision :: dat, check
integer :: root, info, np, icontxt
call prepare_test2(dat,check,root,info, np, icontxt)
print*,'dat, check before', dat, check, this_image()
call psb_max(icontxt, dat, root)
print*,'dat after', dat, this_image()
@assertEqual(dat,check)
call psb_exit(icontxt)
end subroutine test2_psb_dmax_s

@ -2350,7 +2350,6 @@ subroutine test_psb_shalo_tran_8imgs_vect_b(this)
!GETTING BACK X
v = x%get_vect()
PRINT*,'-------', ME, V
if ((me==1).or.(me==2)) then
true = 1
else

@ -2354,7 +2354,6 @@ subroutine test_psb_zhalo_tran_8imgs_vect_b(this)
!GETTING BACK X
v = x%get_vect()
PRINT*,'-------', ME, V
if ((me==1).or.(me==2)) then
true = 1
else

@ -4,7 +4,7 @@ include $(INCDIR)/Make.inc.psblas
#
# Libraries used
LIBDIR=$(BASEDIR)/lib
PSBLAS_LIB= -L$(LIBDIR) -L/opencoarrays6.2 -lcaf_mpi -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base
PSBLAS_LIB= -L$(LIBDIR) -L/opencoarrays6.2 -lcaf_mpi -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base
LDLIBS=$(PSBLDLIBS)
#
# Compilers and such

@ -136,7 +136,7 @@ program ppde2d
integer(psb_ipk_) :: iter, itmax,itrace, istopc, irst
integer(psb_long_int_k_) :: amatsize, precsize, descsize, d2size
real(psb_dpk_) :: err, eps
real(psb_dpk_), allocatable :: v(:)
! other variables
integer(psb_ipk_) :: info, i
character(len=20) :: name,ch_err
@ -216,6 +216,11 @@ program ppde2d
call psb_krylov(kmethd,a,prec,bv,xxv,eps,desc_a,info,&
& itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istopc,irst=irst)
if (iam == psb_root_) then
fname="sol.mtx"
v=xxv%get_vect()
call mm_array_write(v,"exact solution", info,iunit=138, filename=fname)
endif
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='solver routine'

@ -147,6 +147,7 @@ program ppde3d
integer(psb_ipk_) :: iter, itmax,itrace, istopc, irst
integer(psb_long_int_k_) :: amatsize, precsize, descsize, d2size
real(psb_dpk_) :: err, eps
real(psb_dpk_), allocatable :: v(:)
! other variables
integer(psb_ipk_) :: info, i
@ -236,6 +237,11 @@ program ppde3d
eps = 1.d-9
call psb_krylov(kmethd,a,prec,bv,xxv,eps,desc_a,info,&
& itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istopc,irst=irst)
if (iam == psb_root_) then
fname="sol3d.mtx"
v=xxv%get_vect()
call mm_array_write(v,"exact solution", info,iunit=138, filename=fname)
endif
if(info /= psb_success_) then
info=psb_err_from_subroutine_

@ -2,8 +2,10 @@
BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL RGMRES
BJAC Preconditioner NONE DIAG BJAC
CSR Storage format for matrix A: CSR COO JAD
700 Domain size (acutal system is this**3)
10 Domain size (acutal system is this**3)
2 Stopping criterion
0404 MAXIT
-1 ITRACE
002 IRST restart for RGMRES and BiCGSTABL

@ -1,39 +0,0 @@
BASEDIR=../..
INCDIR=$(BASEDIR)/include
include $(INCDIR)/Make.inc.psblas
#
# Libraries used
PFUNIT = /opt/pfunit/pfunit-coarrays-last
FFLAGS += -I$(INCDIR) -I$(PFUNIT)/mod -ISuites
LIBDIR=$(BASEDIR)/lib
PSBLAS_LIB= -L/opencoarrays6.2 -L$(LIBDIR) -lcaf_mpi -lpsb_util -lpsb_base -llapack -lblas
LDLIBS=$(PSBLDLIBS)
#
# Compilers and such
#
CCOPT= -g
FINCLUDES=$(FMFLAG)$(INCDIR) $(FMFLAG).
all: test_psb_swapdata
%: %.x
mpirun -np 12 ./$^
%.x:%.o driver.o
$(FCFLAGS)$(F90LINK) -g $(LOPT) -o $@ $^ -L$(PFUNIT)/lib $(PSBLAS_LIB) $(LDLIBS) -DUSE_CAF -lpfunit -lmpi
#Create .F90 file
%.F90: %.pf
$(PFUNIT)/bin/pFUnitParser.py $< $@ -lmpi
#Create .o file
%.o: %.F90
$(FC) -g -DUSE_PFUNIT -DUSE_CAF -c $(FFLAGS) $(FPPFLAGS) $^ $(PFUNIT)/include/driver.F90 -L$(PFUNIT)/lib $(PSBLAS_LIB) $(LDLIBS) -lpfunit -lmpi
clean:
/bin/rm -f *.F90 *.o *.mod
verycleanlib:
(cd ../..; make veryclean)
lib:
(cd ../../; make library)

@ -1 +0,0 @@
ADD_TEST_SUITE(test_psb_swapdata_suite)

File diff suppressed because it is too large Load Diff

@ -83,6 +83,8 @@ subroutine psb_dmatdist(a_glob, a, ictxt, desc_a,&
!
use psb_base_mod
use psb_mat_mod
use iso_fortran_env
use psb_caf_mod
implicit none
! parameters
@ -113,6 +115,14 @@ subroutine psb_dmatdist(a_glob, a, ictxt, desc_a,&
integer(psb_ipk_), parameter :: nb=30
real(psb_dpk_) :: t0, t1, t2, t3, t4, t5
character(len=20) :: name, ch_err
!logical, parameter :: if_caf=.true.
!CAF variables
integer(psb_ipk_), allocatable :: b_irow(:)[:],b_icol(:)[:]
real(psb_dpk_), allocatable :: b_val(:)[:]
integer(psb_ipk_), save :: b_ll[*], b_nnr[*]
type(event_type), save :: ll_done[*], transf_done[*]
info = psb_success_
err = 0
@ -247,6 +257,94 @@ subroutine psb_dmatdist(a_glob, a, ictxt, desc_a,&
! now we should insert rows i_count..j_count-1
nnr = j_count - i_count
if (if_caf) then
if (iam == root) then
ll = 0
do i= i_count, j_count-1
call a_glob%csget(i,i,nz,&
& irow,icol,val,info,nzin=ll,append=.true.)
if (info /= psb_success_) then
if (nz >min(size(irow(ll+1:)),size(icol(ll+1:)),size(val(ll+1:)))) then
write(psb_err_unit,*) 'Allocation failure? This should not happen!'
end if
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
ll = ll + nz
end do
if (allocated(b_val)) deallocate(b_val)
if (allocated(b_irow)) deallocate(b_irow)
if (allocated(b_icol)) deallocate(b_icol)
allocate(b_val(ll)[*], b_irow(ll)[*], b_icol(ll)[*])
do k_count = 1, np_sharing
iproc = iwork(k_count)
!print*,'np_sharing', np_sharing
if (iproc == iam) then
call psb_spins(ll,irow,icol,val,a,desc_a,info)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_spins'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
else
b_ll[iproc + 1]=ll
b_nnr[iproc + 1]=nnr
event post(ll_done[iproc+1])
!print*,'root has post ll_done for', iproc+1
b_val(1:ll)=val(1:ll)
b_irow(1:ll)=irow(1:ll)
b_icol(1:ll)=icol(1:ll)
event post(transf_done[iproc+1])
!print*,'root has post transf_done for', iproc+1
endif
end do
else if (iam /= root) then
if (allocated(b_val)) deallocate(b_val)
if (allocated(b_irow)) deallocate(b_irow)
if (allocated(b_icol)) deallocate(b_icol)
allocate(b_val(1)[*], b_irow(1)[*], b_icol(1)[*])
do k_count = 1, np_sharing
iproc = iwork(k_count)
if (iproc == iam) then
!print*,iproc+1,' waiting for ll_done', np_sharing
event wait(ll_done)
!print*,iproc+1,'stopped waiting for ll_done'
ll = b_ll
nnr = b_nnr
if (ll > size(irow)) then
write(psb_err_unit,*) iam,'need to reallocate ',ll
deallocate(val,irow,icol)
allocate(val(ll),irow(ll),icol(ll),stat=info)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='Allocate'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
endif
!print*,iproc+1,' waiting for transf_done'
event wait(transf_done)
!print*,iproc+1,'stopped waiting for transf_done'
val(1:ll)=b_val(1:ll)[root+1]
icol(1:ll)=b_icol(1:ll)[root+1]
irow(1:ll)=b_irow(1:ll)[root+1]
call psb_spins(ll,irow,icol,val,a,desc_a,info)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psspins'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
endif
end do
endif
else
if (iam == root) then
ll = 0
@ -280,7 +378,7 @@ subroutine psb_dmatdist(a_glob, a, ictxt, desc_a,&
call psb_snd(ictxt,irow(1:ll),iproc)
call psb_snd(ictxt,icol(1:ll),iproc)
call psb_snd(ictxt,val(1:ll),iproc)
call psb_rcv(ictxt,ll,iproc)
!call psb_rcv(ictxt,ll,iproc)
endif
end do
else if (iam /= root) then
@ -305,7 +403,7 @@ subroutine psb_dmatdist(a_glob, a, ictxt, desc_a,&
call psb_rcv(ictxt,irow(1:ll),root)
call psb_rcv(ictxt,icol(1:ll),root)
call psb_rcv(ictxt,val(1:ll),root)
call psb_snd(ictxt,ll,root)
!call psb_snd(ictxt,ll,root)
call psb_spins(ll,irow,icol,val,a,desc_a,info)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
@ -316,6 +414,7 @@ subroutine psb_dmatdist(a_glob, a, ictxt, desc_a,&
endif
end do
endif
endif
i_count = j_count
end do

Loading…
Cancel
Save