diff --git a/base/comm/internals/psi_cswapdata.F90 b/base/comm/internals/psi_cswapdata.F90 index 13fe9c89..29332e1f 100644 --- a/base/comm/internals/psi_cswapdata.F90 +++ b/base/comm/internals/psi_cswapdata.F90 @@ -107,6 +107,7 @@ subroutine psi_cswapdatam(flag,n,beta,y,desc_a,work,info,data) ! locals integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, data_, err_act integer(psb_ipk_), pointer :: d_idx(:) + class(psb_xch_idx_type), pointer :: d_xchg character(len=20) :: name info=psb_success_ @@ -135,13 +136,18 @@ subroutine psi_cswapdatam(flag,n,beta,y,desc_a,work,info,data) end if call desc_a%get_list(data_,d_idx,totxch,idxr,idxs,info) + if (info == 0) call desc_a%get_list(data_,d_xchg,info) if (info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if + if (.false.) then + call psi_swapdata(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info) + else + call psi_swapdata(ictxt,icomm,flag,n,beta,y,d_xchg,info) + endif - call psi_swapdata(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info) if (info /= psb_success_) goto 9999 call psb_erractionrestore(err_act) @@ -273,7 +279,6 @@ subroutine psi_cswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) end do last_clear_count = nxch else - !sync all nxch = size(xchg%prcs_xch) myself = this_image() do ip = 1, nxch @@ -283,11 +288,19 @@ subroutine psi_cswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) rp1 = xchg%rmt_rcv_bnd(ip,1) rp2 = xchg%rmt_rcv_bnd(ip,2) isz = p2-p1+1 - !write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2 call psi_gth(isz,m,xchg%loc_snd_idx(p1:p2),& - & y,buffer(p1:p2)) + & y,sndbuf(p1:p2)) + buffer(rp1:rp2)[img] = sndbuf(p1:p2) + end do + ! + ! Doing event post later should provide more opportunities for + ! overlap + ! + do ip= 1, nxch + img = xchg%prcs_xch(ip) + 1 event post(ufg(myself)[img]) end do + do ip = 1, nxch img = xchg%prcs_xch(ip) + 1 event wait(ufg(img)) @@ -297,11 +310,12 @@ subroutine psi_cswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) isz = p2-p1+1 rp1 = xchg%rmt_snd_bnd(ip,1) rp2 = xchg%rmt_snd_bnd(ip,2) - !write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2 + !write(0,*) myself,'Getting from ',img,' boundaries: ',p1,p2 call psi_sct(isz,m,xchg%loc_rcv_idx(p1:p2),& - & buffer(rp1:rp2)[img],beta,y) + & buffer(p1:p2),beta, y) event post(clear[img]) end do + last_clear_count = nxch endif @@ -759,6 +773,7 @@ subroutine psi_cswapdatav(flag,beta,y,desc_a,work,info,data) ! locals integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, data_, err_act integer(psb_ipk_), pointer :: d_idx(:) + class(psb_xch_idx_type), pointer :: d_xchg character(len=20) :: name info=psb_success_ @@ -788,12 +803,17 @@ subroutine psi_cswapdatav(flag,beta,y,desc_a,work,info,data) end if call desc_a%get_list(data_,d_idx,totxch,idxr,idxs,info) + if (info == 0) call desc_a%get_list(data_,d_xchg,info) if (info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if + if (.false.) then + call psi_swapdata(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info) + else + call psi_swapdata(ictxt,icomm,flag,beta,y,d_xchg,info) + endif - call psi_swapdata(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info) if (info /= psb_success_) goto 9999 call psb_erractionrestore(err_act) @@ -924,7 +944,6 @@ subroutine psi_cswap_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) end do last_clear_count = nxch else - !sync all nxch = size(xchg%prcs_xch) myself = this_image() do ip = 1, nxch @@ -934,11 +953,19 @@ subroutine psi_cswap_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) rp1 = xchg%rmt_rcv_bnd(ip,1) rp2 = xchg%rmt_rcv_bnd(ip,2) isz = p2-p1+1 - !write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2 call psi_gth(isz,xchg%loc_snd_idx(p1:p2),& - & y,buffer(p1:p2)) + & y,sndbuf(p1:p2)) + buffer(rp1:rp2)[img] = sndbuf(p1:p2) + end do + ! + ! Doing event post later should provide more opportunities for + ! overlap + ! + do ip= 1, nxch + img = xchg%prcs_xch(ip) + 1 event post(ufg(myself)[img]) end do + do ip = 1, nxch img = xchg%prcs_xch(ip) + 1 event wait(ufg(img)) @@ -948,11 +975,12 @@ subroutine psi_cswap_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) isz = p2-p1+1 rp1 = xchg%rmt_snd_bnd(ip,1) rp2 = xchg%rmt_snd_bnd(ip,2) - !write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2 + !write(0,*) myself,'Getting from ',img,' boundaries: ',p1,p2 call psi_sct(isz,xchg%loc_rcv_idx(p1:p2),& - & buffer(rp1:rp2)[img],beta,y) + & buffer(p1:p2),beta, y) event post(clear[img]) end do + last_clear_count = nxch endif @@ -1365,6 +1393,7 @@ subroutine psi_cswapdata_vect(flag,beta,y,desc_a,work,info,data) ! locals integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, data_, err_act class(psb_i_base_vect_type), pointer :: d_vidx + class(psb_xch_idx_type), pointer :: d_xchg character(len=20) :: name info=psb_success_ @@ -1394,12 +1423,17 @@ subroutine psi_cswapdata_vect(flag,beta,y,desc_a,work,info,data) end if call desc_a%get_list(data_,d_vidx,totxch,idxr,idxs,info) + if (info == 0) call desc_a%get_list(data_,d_xchg,info) if (info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if + if (.false.) then + call psi_swapdata(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info) + else + call psi_swapdata(ictxt,icomm,flag,beta,y,d_xchg,info) + endif - call psi_swapdata(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info) if (info /= psb_success_) goto 9999 call psb_erractionrestore(err_act) diff --git a/base/comm/internals/psi_cswaptran.F90 b/base/comm/internals/psi_cswaptran.F90 index b20c807c..2a81d241 100644 --- a/base/comm/internals/psi_cswaptran.F90 +++ b/base/comm/internals/psi_cswaptran.F90 @@ -111,6 +111,7 @@ subroutine psi_cswaptranm(flag,n,beta,y,desc_a,work,info,data) ! locals integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, err_act, totxch, data_ integer(psb_ipk_), pointer :: d_idx(:) + class(psb_xch_idx_type), pointer :: d_xchg integer(psb_ipk_) :: ierr(5) character(len=20) :: name @@ -141,12 +142,17 @@ subroutine psi_cswaptranm(flag,n,beta,y,desc_a,work,info,data) end if call desc_a%get_list(data_,d_idx,totxch,idxr,idxs,info) + if (info == 0) call desc_a%get_list(data_,d_xchg,info) if (info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if + if (.false.) then + call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info) + else + call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_xchg,info) + endif - call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info) if (info /= psb_success_) goto 9999 call psb_erractionrestore(err_act) @@ -246,7 +252,7 @@ subroutine psi_cswaptran_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) end if end if - if (.true.) then + if (.false.) then nxch = size(xchg%prcs_xch) myself = this_image() do ip = 1, nxch @@ -277,7 +283,6 @@ subroutine psi_cswaptran_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) end do last_clear_count = nxch else - !sync all nxch = size(xchg%prcs_xch) myself = this_image() do ip = 1, nxch @@ -287,11 +292,19 @@ subroutine psi_cswaptran_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) rp1 = xchg%rmt_snd_bnd(ip,1) rp2 = xchg%rmt_snd_bnd(ip,2) isz = p2-p1+1 - !write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2 call psi_gth(isz,m,xchg%loc_rcv_idx(p1:p2),& - & y,buffer(p1:p2)) + & y,sndbuf(p1:p2)) + buffer(rp1:rp2)[img] = sndbuf(p1:p2) + end do + ! + ! Doing event post later should provide more opportunities for + ! overlap + ! + do ip= 1, nxch + img = xchg%prcs_xch(ip) + 1 event post(ufg(myself)[img]) end do + do ip = 1, nxch img = xchg%prcs_xch(ip) + 1 event wait(ufg(img)) @@ -301,9 +314,9 @@ subroutine psi_cswaptran_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) isz = p2-p1+1 rp1 = xchg%rmt_rcv_bnd(ip,1) rp2 = xchg%rmt_rcv_bnd(ip,2) - !write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2 + !write(0,*) myself,'Getting from ',img,' boundaries: ',p1,p2 call psi_sct(isz,m,xchg%loc_snd_idx(p1:p2),& - & buffer(rp1:rp2)[img],beta,y) + & buffer(p1:p2),beta, y) event post(clear[img]) end do last_clear_count = nxch @@ -761,6 +774,7 @@ subroutine psi_cswaptranv(flag,beta,y,desc_a,work,info,data) ! locals integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, err_act, data_ integer(psb_ipk_), pointer :: d_idx(:) + class(psb_xch_idx_type), pointer :: d_xchg integer(psb_ipk_) :: ierr(5) character(len=20) :: name @@ -790,12 +804,17 @@ subroutine psi_cswaptranv(flag,beta,y,desc_a,work,info,data) end if call desc_a%get_list(data_,d_idx,totxch,idxr,idxs,info) + if (info == 0) call desc_a%get_list(data_,d_xchg,info) if (info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if + if (.false.) then + call psi_swaptran(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info) + else + call psi_swaptran(ictxt,icomm,flag,beta,y,d_xchg,info) + endif - call psi_swaptran(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info) if (info /= psb_success_) goto 9999 call psb_erractionrestore(err_act) @@ -896,7 +915,7 @@ subroutine psi_cswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) end if end if - if (.true.) then + if (.false.) then nxch = size(xchg%prcs_xch) myself = this_image() do ip = 1, nxch @@ -927,7 +946,6 @@ subroutine psi_cswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) end do last_clear_count = nxch else - !sync all nxch = size(xchg%prcs_xch) myself = this_image() do ip = 1, nxch @@ -937,11 +955,19 @@ subroutine psi_cswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) rp1 = xchg%rmt_snd_bnd(ip,1) rp2 = xchg%rmt_snd_bnd(ip,2) isz = p2-p1+1 - !write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2 call psi_gth(isz,xchg%loc_rcv_idx(p1:p2),& - & y,buffer(p1:p2)) + & y,sndbuf(p1:p2)) + buffer(rp1:rp2)[img] = sndbuf(p1:p2) + end do + ! + ! Doing event post later should provide more opportunities for + ! overlap + ! + do ip= 1, nxch + img = xchg%prcs_xch(ip) + 1 event post(ufg(myself)[img]) end do + do ip = 1, nxch img = xchg%prcs_xch(ip) + 1 event wait(ufg(img)) @@ -951,9 +977,9 @@ subroutine psi_cswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) isz = p2-p1+1 rp1 = xchg%rmt_rcv_bnd(ip,1) rp2 = xchg%rmt_rcv_bnd(ip,2) - !write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2 + !write(0,*) myself,'Getting from ',img,' boundaries: ',p1,p2 call psi_sct(isz,xchg%loc_snd_idx(p1:p2),& - & buffer(rp1:rp2)[img],beta,y) + & buffer(p1:p2),beta, y) event post(clear[img]) end do last_clear_count = nxch @@ -1386,6 +1412,7 @@ subroutine psi_cswaptran_vect(flag,beta,y,desc_a,work,info,data) ! locals integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, err_act, data_ class(psb_i_base_vect_type), pointer :: d_vidx + class(psb_xch_idx_type), pointer :: d_xchg integer(psb_ipk_) :: ierr(5) character(len=20) :: name @@ -1415,12 +1442,17 @@ subroutine psi_cswaptran_vect(flag,beta,y,desc_a,work,info,data) end if call desc_a%get_list(data_,d_vidx,totxch,idxr,idxs,info) + if (info == 0) call desc_a%get_list(data_,d_xchg,info) if (info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if + if (.false.) then + call psi_swaptran(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info) + else + call psi_swaptran(ictxt,icomm,flag,beta,y,d_xchg,info) + endif - call psi_swaptran(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info) if (info /= psb_success_) goto 9999 call psb_erractionrestore(err_act) diff --git a/base/comm/internals/psi_dswapdata.F90 b/base/comm/internals/psi_dswapdata.F90 index 3a0e458b..08f33d69 100644 --- a/base/comm/internals/psi_dswapdata.F90 +++ b/base/comm/internals/psi_dswapdata.F90 @@ -107,6 +107,7 @@ subroutine psi_dswapdatam(flag,n,beta,y,desc_a,work,info,data) ! locals integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, data_, err_act integer(psb_ipk_), pointer :: d_idx(:) + class(psb_xch_idx_type), pointer :: d_xchg character(len=20) :: name info=psb_success_ @@ -135,13 +136,18 @@ subroutine psi_dswapdatam(flag,n,beta,y,desc_a,work,info,data) end if call desc_a%get_list(data_,d_idx,totxch,idxr,idxs,info) + if (info == 0) call desc_a%get_list(data_,d_xchg,info) if (info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if + if (.false.) then + call psi_swapdata(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info) + else + call psi_swapdata(ictxt,icomm,flag,n,beta,y,d_xchg,info) + endif - call psi_swapdata(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info) if (info /= psb_success_) goto 9999 call psb_erractionrestore(err_act) @@ -182,9 +188,9 @@ subroutine psi_dswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) integer(psb_ipk_) :: ierr(5) character(len=20) :: name - print*,' call psi_dswap_xchg_m' info=psb_success_ - name='psi_swap_datav' + name='psi_swap_xchg_m' + print*,me call psb_erractionsave(err_act) ictxt = iictxt icomm = iicomm @@ -273,7 +279,6 @@ subroutine psi_dswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) end do last_clear_count = nxch else - !sync all nxch = size(xchg%prcs_xch) myself = this_image() do ip = 1, nxch @@ -283,11 +288,19 @@ subroutine psi_dswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) rp1 = xchg%rmt_rcv_bnd(ip,1) rp2 = xchg%rmt_rcv_bnd(ip,2) isz = p2-p1+1 - !write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2 call psi_gth(isz,m,xchg%loc_snd_idx(p1:p2),& - & y,buffer(p1:p2)) + & y,sndbuf(p1:p2)) + buffer(rp1:rp2)[img] = sndbuf(p1:p2) + end do + ! + ! Doing event post later should provide more opportunities for + ! overlap + ! + do ip= 1, nxch + img = xchg%prcs_xch(ip) + 1 event post(ufg(myself)[img]) end do + do ip = 1, nxch img = xchg%prcs_xch(ip) + 1 event wait(ufg(img)) @@ -297,11 +310,12 @@ subroutine psi_dswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) isz = p2-p1+1 rp1 = xchg%rmt_snd_bnd(ip,1) rp2 = xchg%rmt_snd_bnd(ip,2) - !write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2 + !write(0,*) myself,'Getting from ',img,' boundaries: ',p1,p2 call psi_sct(isz,m,xchg%loc_rcv_idx(p1:p2),& - & buffer(rp1:rp2)[img],beta,y) + & buffer(p1:p2),beta, y) event post(clear[img]) end do + last_clear_count = nxch endif @@ -759,6 +773,7 @@ subroutine psi_dswapdatav(flag,beta,y,desc_a,work,info,data) ! locals integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, data_, err_act integer(psb_ipk_), pointer :: d_idx(:) + class(psb_xch_idx_type), pointer :: d_xchg character(len=20) :: name info=psb_success_ @@ -788,12 +803,17 @@ subroutine psi_dswapdatav(flag,beta,y,desc_a,work,info,data) end if call desc_a%get_list(data_,d_idx,totxch,idxr,idxs,info) + if (info == 0) call desc_a%get_list(data_,d_xchg,info) if (info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if + if (.false.) then + call psi_swapdata(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info) + else + call psi_swapdata(ictxt,icomm,flag,beta,y,d_xchg,info) + endif - call psi_swapdata(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info) if (info /= psb_success_) goto 9999 call psb_erractionrestore(err_act) @@ -835,7 +855,8 @@ subroutine psi_dswap_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) character(len=20) :: name info=psb_success_ - name='psi_swap_datav' + name='psi_swap_xchg_v' + print*, name call psb_erractionsave(err_act) ictxt = iictxt icomm = iicomm @@ -924,7 +945,6 @@ subroutine psi_dswap_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) end do last_clear_count = nxch else - !sync all nxch = size(xchg%prcs_xch) myself = this_image() do ip = 1, nxch @@ -934,11 +954,19 @@ subroutine psi_dswap_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) rp1 = xchg%rmt_rcv_bnd(ip,1) rp2 = xchg%rmt_rcv_bnd(ip,2) isz = p2-p1+1 - !write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2 call psi_gth(isz,xchg%loc_snd_idx(p1:p2),& - & y,buffer(p1:p2)) + & y,sndbuf(p1:p2)) + buffer(rp1:rp2)[img] = sndbuf(p1:p2) + end do + ! + ! Doing event post later should provide more opportunities for + ! overlap + ! + do ip= 1, nxch + img = xchg%prcs_xch(ip) + 1 event post(ufg(myself)[img]) end do + do ip = 1, nxch img = xchg%prcs_xch(ip) + 1 event wait(ufg(img)) @@ -948,11 +976,12 @@ subroutine psi_dswap_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) isz = p2-p1+1 rp1 = xchg%rmt_snd_bnd(ip,1) rp2 = xchg%rmt_snd_bnd(ip,2) - !write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2 + !write(0,*) myself,'Getting from ',img,' boundaries: ',p1,p2 call psi_sct(isz,xchg%loc_rcv_idx(p1:p2),& - & buffer(rp1:rp2)[img],beta,y) + & buffer(p1:p2),beta, y) event post(clear[img]) end do + last_clear_count = nxch endif @@ -1365,6 +1394,7 @@ subroutine psi_dswapdata_vect(flag,beta,y,desc_a,work,info,data) ! locals integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, data_, err_act class(psb_i_base_vect_type), pointer :: d_vidx + class(psb_xch_idx_type), pointer :: d_xchg character(len=20) :: name info=psb_success_ @@ -1394,12 +1424,17 @@ subroutine psi_dswapdata_vect(flag,beta,y,desc_a,work,info,data) end if call desc_a%get_list(data_,d_vidx,totxch,idxr,idxs,info) + if (info == 0) call desc_a%get_list(data_,d_xchg,info) if (info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if + if (.false.) then + call psi_swapdata(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info) + else + call psi_swapdata(ictxt,icomm,flag,beta,y,d_xchg,info) + endif - call psi_swapdata(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info) if (info /= psb_success_) goto 9999 call psb_erractionrestore(err_act) @@ -1443,6 +1478,7 @@ subroutine psi_dswap_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info) info=psb_success_ name='psi_xchg_vect' + print*,name call psb_erractionsave(err_act) ictxt = iictxt icomm = iicomm @@ -1490,13 +1526,16 @@ subroutine psi_dswap_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info) if (last_clear_count>0) & & event wait(clear,until_count=last_clear_count) end if + me = this_image() if (psb_size(buffer) < xchg%max_buffer_size) then ! ! By construction, max_buffer_size was computed with a collective. ! if (allocated(buffer)) deallocate(buffer) !write(*,*) 'Allocating buffer',xchg%max_buffer_size + print*,'allocating buffer', me allocate(buffer(xchg%max_buffer_size)[*],stat=info) + print*,'buffer allocated', me if (allocated(sndbuf)) deallocate(sndbuf) if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info) if (info /= 0) then diff --git a/base/comm/internals/psi_dswaptran.F90 b/base/comm/internals/psi_dswaptran.F90 index afdf3e34..b439555e 100644 --- a/base/comm/internals/psi_dswaptran.F90 +++ b/base/comm/internals/psi_dswaptran.F90 @@ -111,8 +111,8 @@ subroutine psi_dswaptranm(flag,n,beta,y,desc_a,work,info,data) ! locals integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, err_act, totxch, data_ integer(psb_ipk_), pointer :: d_idx(:) - integer(psb_ipk_) :: ierr(5) class(psb_xch_idx_type), pointer :: d_xchg + integer(psb_ipk_) :: ierr(5) character(len=20) :: name info=psb_success_ @@ -148,9 +148,9 @@ subroutine psi_dswaptranm(flag,n,beta,y,desc_a,work,info,data) goto 9999 end if if (.false.) then - call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info) + call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info) else - call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_xchg,info) + call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_xchg,info) endif if (info /= psb_success_) goto 9999 @@ -193,9 +193,9 @@ subroutine psi_dswaptran_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) integer(psb_ipk_) :: ierr(5) character(len=20) :: name - print*,' call psi_dswaptran_xchg_m' info=psb_success_ name='psi_swaptran_datam' + print*,name call psb_erractionsave(err_act) ictxt = iictxt icomm = iicomm @@ -253,7 +253,7 @@ subroutine psi_dswaptran_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) end if end if - if (.true.) then + if (.false.) then nxch = size(xchg%prcs_xch) myself = this_image() do ip = 1, nxch @@ -284,7 +284,6 @@ subroutine psi_dswaptran_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) end do last_clear_count = nxch else - !sync all nxch = size(xchg%prcs_xch) myself = this_image() do ip = 1, nxch @@ -294,11 +293,19 @@ subroutine psi_dswaptran_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) rp1 = xchg%rmt_snd_bnd(ip,1) rp2 = xchg%rmt_snd_bnd(ip,2) isz = p2-p1+1 - !write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2 call psi_gth(isz,m,xchg%loc_rcv_idx(p1:p2),& - & y,buffer(p1:p2)) + & y,sndbuf(p1:p2)) + buffer(rp1:rp2)[img] = sndbuf(p1:p2) + end do + ! + ! Doing event post later should provide more opportunities for + ! overlap + ! + do ip= 1, nxch + img = xchg%prcs_xch(ip) + 1 event post(ufg(myself)[img]) end do + do ip = 1, nxch img = xchg%prcs_xch(ip) + 1 event wait(ufg(img)) @@ -308,9 +315,9 @@ subroutine psi_dswaptran_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) isz = p2-p1+1 rp1 = xchg%rmt_rcv_bnd(ip,1) rp2 = xchg%rmt_rcv_bnd(ip,2) - !write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2 + !write(0,*) myself,'Getting from ',img,' boundaries: ',p1,p2 call psi_sct(isz,m,xchg%loc_snd_idx(p1:p2),& - & buffer(rp1:rp2)[img],beta,y) + & buffer(p1:p2),beta, y) event post(clear[img]) end do last_clear_count = nxch @@ -803,11 +810,12 @@ subroutine psi_dswaptranv(flag,beta,y,desc_a,work,info,data) call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if - if (.false.) then - call psi_swaptran(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info) + if (.false.) then + call psi_swaptran(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info) else - call psi_swaptran(ictxt,icomm,flag,beta,y,d_xchg,info) - end if + call psi_swaptran(ictxt,icomm,flag,beta,y,d_xchg,info) + endif + if (info /= psb_success_) goto 9999 call psb_erractionrestore(err_act) @@ -850,7 +858,8 @@ subroutine psi_dswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) print*,' call psi_dswaptran_xchg_v' info=psb_success_ - name='psi_swap_datav' + name='psi_swaptran_xchg_v' + print*,name call psb_erractionsave(err_act) ictxt = iictxt icomm = iicomm @@ -908,7 +917,7 @@ subroutine psi_dswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) end if end if - if (.true.) then + if (.false.) then nxch = size(xchg%prcs_xch) myself = this_image() do ip = 1, nxch @@ -939,7 +948,6 @@ subroutine psi_dswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) end do last_clear_count = nxch else - !sync all nxch = size(xchg%prcs_xch) myself = this_image() do ip = 1, nxch @@ -949,11 +957,19 @@ subroutine psi_dswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) rp1 = xchg%rmt_snd_bnd(ip,1) rp2 = xchg%rmt_snd_bnd(ip,2) isz = p2-p1+1 - !write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2 call psi_gth(isz,xchg%loc_rcv_idx(p1:p2),& - & y,buffer(p1:p2)) + & y,sndbuf(p1:p2)) + buffer(rp1:rp2)[img] = sndbuf(p1:p2) + end do + ! + ! Doing event post later should provide more opportunities for + ! overlap + ! + do ip= 1, nxch + img = xchg%prcs_xch(ip) + 1 event post(ufg(myself)[img]) end do + do ip = 1, nxch img = xchg%prcs_xch(ip) + 1 event wait(ufg(img)) @@ -963,9 +979,9 @@ subroutine psi_dswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) isz = p2-p1+1 rp1 = xchg%rmt_rcv_bnd(ip,1) rp2 = xchg%rmt_rcv_bnd(ip,2) - !write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2 + !write(0,*) myself,'Getting from ',img,' boundaries: ',p1,p2 call psi_sct(isz,xchg%loc_snd_idx(p1:p2),& - & buffer(rp1:rp2)[img],beta,y) + & buffer(p1:p2),beta, y) event post(clear[img]) end do last_clear_count = nxch @@ -1398,13 +1414,12 @@ subroutine psi_dswaptran_vect(flag,beta,y,desc_a,work,info,data) ! locals integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, err_act, data_ class(psb_i_base_vect_type), pointer :: d_vidx - integer(psb_ipk_) :: ierr(5) class(psb_xch_idx_type), pointer :: d_xchg + integer(psb_ipk_) :: ierr(5) character(len=20) :: name info=psb_success_ - print*,'calling psi_tran_xch_vect' - name='psi_tran_xch_vect' + name='psi_swap_tranv' call psb_erractionsave(err_act) ictxt = desc_a%get_context() @@ -1434,11 +1449,11 @@ subroutine psi_dswaptran_vect(flag,beta,y,desc_a,work,info,data) call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if - if (.false.) then - call psi_swaptran(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info) + if (.false.) then + call psi_swaptran(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info) else - call psi_swaptran(ictxt,icomm,flag,beta,y,d_xchg,info) - end if + call psi_swaptran(ictxt,icomm,flag,beta,y,d_xchg,info) + endif if (info /= psb_success_) goto 9999 @@ -1483,6 +1498,7 @@ subroutine psi_dswaptran_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info) info=psb_success_ name='psi_tran_xchg_vect' + print*,name call psb_erractionsave(err_act) ictxt = iictxt icomm = iicomm @@ -1632,6 +1648,7 @@ subroutine psi_dswaptran_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info) end subroutine psi_dswaptran_xchg_vect + ! ! ! Subroutine: psi_dtran_vidx_vect diff --git a/base/comm/internals/psi_sswapdata.F90 b/base/comm/internals/psi_sswapdata.F90 index 74908183..b5421fa9 100644 --- a/base/comm/internals/psi_sswapdata.F90 +++ b/base/comm/internals/psi_sswapdata.F90 @@ -107,6 +107,7 @@ subroutine psi_sswapdatam(flag,n,beta,y,desc_a,work,info,data) ! locals integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, data_, err_act integer(psb_ipk_), pointer :: d_idx(:) + class(psb_xch_idx_type), pointer :: d_xchg character(len=20) :: name info=psb_success_ @@ -135,13 +136,18 @@ subroutine psi_sswapdatam(flag,n,beta,y,desc_a,work,info,data) end if call desc_a%get_list(data_,d_idx,totxch,idxr,idxs,info) + if (info == 0) call desc_a%get_list(data_,d_xchg,info) if (info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if + if (.false.) then + call psi_swapdata(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info) + else + call psi_swapdata(ictxt,icomm,flag,n,beta,y,d_xchg,info) + endif - call psi_swapdata(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info) if (info /= psb_success_) goto 9999 call psb_erractionrestore(err_act) @@ -182,6 +188,7 @@ subroutine psi_sswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) integer(psb_ipk_) :: ierr(5) character(len=20) :: name + print*,' call psi_sswap_xchg_m' info=psb_success_ name='psi_swap_datav' call psb_erractionsave(err_act) @@ -272,7 +279,6 @@ subroutine psi_sswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) end do last_clear_count = nxch else - !sync all nxch = size(xchg%prcs_xch) myself = this_image() do ip = 1, nxch @@ -282,11 +288,19 @@ subroutine psi_sswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) rp1 = xchg%rmt_rcv_bnd(ip,1) rp2 = xchg%rmt_rcv_bnd(ip,2) isz = p2-p1+1 - !write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2 call psi_gth(isz,m,xchg%loc_snd_idx(p1:p2),& - & y,buffer(p1:p2)) + & y,sndbuf(p1:p2)) + buffer(rp1:rp2)[img] = sndbuf(p1:p2) + end do + ! + ! Doing event post later should provide more opportunities for + ! overlap + ! + do ip= 1, nxch + img = xchg%prcs_xch(ip) + 1 event post(ufg(myself)[img]) end do + do ip = 1, nxch img = xchg%prcs_xch(ip) + 1 event wait(ufg(img)) @@ -296,11 +310,12 @@ subroutine psi_sswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) isz = p2-p1+1 rp1 = xchg%rmt_snd_bnd(ip,1) rp2 = xchg%rmt_snd_bnd(ip,2) - !write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2 + !write(0,*) myself,'Getting from ',img,' boundaries: ',p1,p2 call psi_sct(isz,m,xchg%loc_rcv_idx(p1:p2),& - & buffer(rp1:rp2)[img],beta,y) + & buffer(p1:p2),beta, y) event post(clear[img]) end do + last_clear_count = nxch endif @@ -758,6 +773,7 @@ subroutine psi_sswapdatav(flag,beta,y,desc_a,work,info,data) ! locals integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, data_, err_act integer(psb_ipk_), pointer :: d_idx(:) + class(psb_xch_idx_type), pointer :: d_xchg character(len=20) :: name info=psb_success_ @@ -787,12 +803,17 @@ subroutine psi_sswapdatav(flag,beta,y,desc_a,work,info,data) end if call desc_a%get_list(data_,d_idx,totxch,idxr,idxs,info) + if (info == 0) call desc_a%get_list(data_,d_xchg,info) if (info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if + if (.false.) then + call psi_swapdata(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info) + else + call psi_swapdata(ictxt,icomm,flag,beta,y,d_xchg,info) + endif - call psi_swapdata(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info) if (info /= psb_success_) goto 9999 call psb_erractionrestore(err_act) @@ -923,7 +944,6 @@ subroutine psi_sswap_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) end do last_clear_count = nxch else - !sync all nxch = size(xchg%prcs_xch) myself = this_image() do ip = 1, nxch @@ -933,11 +953,19 @@ subroutine psi_sswap_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) rp1 = xchg%rmt_rcv_bnd(ip,1) rp2 = xchg%rmt_rcv_bnd(ip,2) isz = p2-p1+1 - !write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2 call psi_gth(isz,xchg%loc_snd_idx(p1:p2),& - & y,buffer(p1:p2)) + & y,sndbuf(p1:p2)) + buffer(rp1:rp2)[img] = sndbuf(p1:p2) + end do + ! + ! Doing event post later should provide more opportunities for + ! overlap + ! + do ip= 1, nxch + img = xchg%prcs_xch(ip) + 1 event post(ufg(myself)[img]) end do + do ip = 1, nxch img = xchg%prcs_xch(ip) + 1 event wait(ufg(img)) @@ -947,11 +975,12 @@ subroutine psi_sswap_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) isz = p2-p1+1 rp1 = xchg%rmt_snd_bnd(ip,1) rp2 = xchg%rmt_snd_bnd(ip,2) - !write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2 + !write(0,*) myself,'Getting from ',img,' boundaries: ',p1,p2 call psi_sct(isz,xchg%loc_rcv_idx(p1:p2),& - & buffer(rp1:rp2)[img],beta,y) + & buffer(p1:p2),beta, y) event post(clear[img]) end do + last_clear_count = nxch endif @@ -1364,6 +1393,7 @@ subroutine psi_sswapdata_vect(flag,beta,y,desc_a,work,info,data) ! locals integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, data_, err_act class(psb_i_base_vect_type), pointer :: d_vidx + class(psb_xch_idx_type), pointer :: d_xchg character(len=20) :: name info=psb_success_ @@ -1393,12 +1423,17 @@ subroutine psi_sswapdata_vect(flag,beta,y,desc_a,work,info,data) end if call desc_a%get_list(data_,d_vidx,totxch,idxr,idxs,info) + if (info == 0) call desc_a%get_list(data_,d_xchg,info) if (info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if + if (.false.) then + call psi_swapdata(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info) + else + call psi_swapdata(ictxt,icomm,flag,beta,y,d_xchg,info) + endif - call psi_swapdata(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info) if (info /= psb_success_) goto 9999 call psb_erractionrestore(err_act) diff --git a/base/comm/internals/psi_sswaptran.F90 b/base/comm/internals/psi_sswaptran.F90 index 083dfc6b..917e9147 100644 --- a/base/comm/internals/psi_sswaptran.F90 +++ b/base/comm/internals/psi_sswaptran.F90 @@ -111,6 +111,7 @@ subroutine psi_sswaptranm(flag,n,beta,y,desc_a,work,info,data) ! locals integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, err_act, totxch, data_ integer(psb_ipk_), pointer :: d_idx(:) + class(psb_xch_idx_type), pointer :: d_xchg integer(psb_ipk_) :: ierr(5) character(len=20) :: name @@ -141,12 +142,17 @@ subroutine psi_sswaptranm(flag,n,beta,y,desc_a,work,info,data) end if call desc_a%get_list(data_,d_idx,totxch,idxr,idxs,info) + if (info == 0) call desc_a%get_list(data_,d_xchg,info) if (info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if + if (.false.) then + call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info) + else + call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_xchg,info) + endif - call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info) if (info /= psb_success_) goto 9999 call psb_erractionrestore(err_act) @@ -246,7 +252,7 @@ subroutine psi_sswaptran_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) end if end if - if (.true.) then + if (.false.) then nxch = size(xchg%prcs_xch) myself = this_image() do ip = 1, nxch @@ -277,7 +283,6 @@ subroutine psi_sswaptran_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) end do last_clear_count = nxch else - !sync all nxch = size(xchg%prcs_xch) myself = this_image() do ip = 1, nxch @@ -287,11 +292,19 @@ subroutine psi_sswaptran_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) rp1 = xchg%rmt_snd_bnd(ip,1) rp2 = xchg%rmt_snd_bnd(ip,2) isz = p2-p1+1 - !write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2 call psi_gth(isz,m,xchg%loc_rcv_idx(p1:p2),& - & y,buffer(p1:p2)) + & y,sndbuf(p1:p2)) + buffer(rp1:rp2)[img] = sndbuf(p1:p2) + end do + ! + ! Doing event post later should provide more opportunities for + ! overlap + ! + do ip= 1, nxch + img = xchg%prcs_xch(ip) + 1 event post(ufg(myself)[img]) end do + do ip = 1, nxch img = xchg%prcs_xch(ip) + 1 event wait(ufg(img)) @@ -301,9 +314,9 @@ subroutine psi_sswaptran_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) isz = p2-p1+1 rp1 = xchg%rmt_rcv_bnd(ip,1) rp2 = xchg%rmt_rcv_bnd(ip,2) - !write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2 + !write(0,*) myself,'Getting from ',img,' boundaries: ',p1,p2 call psi_sct(isz,m,xchg%loc_snd_idx(p1:p2),& - & buffer(rp1:rp2)[img],beta,y) + & buffer(p1:p2),beta, y) event post(clear[img]) end do last_clear_count = nxch @@ -761,6 +774,7 @@ subroutine psi_sswaptranv(flag,beta,y,desc_a,work,info,data) ! locals integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, err_act, data_ integer(psb_ipk_), pointer :: d_idx(:) + class(psb_xch_idx_type), pointer :: d_xchg integer(psb_ipk_) :: ierr(5) character(len=20) :: name @@ -790,12 +804,17 @@ subroutine psi_sswaptranv(flag,beta,y,desc_a,work,info,data) end if call desc_a%get_list(data_,d_idx,totxch,idxr,idxs,info) + if (info == 0) call desc_a%get_list(data_,d_xchg,info) if (info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if + if (.false.) then + call psi_swaptran(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info) + else + call psi_swaptran(ictxt,icomm,flag,beta,y,d_xchg,info) + endif - call psi_swaptran(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info) if (info /= psb_success_) goto 9999 call psb_erractionrestore(err_act) @@ -896,7 +915,7 @@ subroutine psi_sswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) end if end if - if (.true.) then + if (.false.) then nxch = size(xchg%prcs_xch) myself = this_image() do ip = 1, nxch @@ -927,7 +946,6 @@ subroutine psi_sswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) end do last_clear_count = nxch else - !sync all nxch = size(xchg%prcs_xch) myself = this_image() do ip = 1, nxch @@ -937,11 +955,19 @@ subroutine psi_sswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) rp1 = xchg%rmt_snd_bnd(ip,1) rp2 = xchg%rmt_snd_bnd(ip,2) isz = p2-p1+1 - !write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2 call psi_gth(isz,xchg%loc_rcv_idx(p1:p2),& - & y,buffer(p1:p2)) + & y,sndbuf(p1:p2)) + buffer(rp1:rp2)[img] = sndbuf(p1:p2) + end do + ! + ! Doing event post later should provide more opportunities for + ! overlap + ! + do ip= 1, nxch + img = xchg%prcs_xch(ip) + 1 event post(ufg(myself)[img]) end do + do ip = 1, nxch img = xchg%prcs_xch(ip) + 1 event wait(ufg(img)) @@ -951,9 +977,9 @@ subroutine psi_sswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) isz = p2-p1+1 rp1 = xchg%rmt_rcv_bnd(ip,1) rp2 = xchg%rmt_rcv_bnd(ip,2) - !write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2 + !write(0,*) myself,'Getting from ',img,' boundaries: ',p1,p2 call psi_sct(isz,xchg%loc_snd_idx(p1:p2),& - & buffer(rp1:rp2)[img],beta,y) + & buffer(p1:p2),beta, y) event post(clear[img]) end do last_clear_count = nxch @@ -1386,6 +1412,7 @@ subroutine psi_sswaptran_vect(flag,beta,y,desc_a,work,info,data) ! locals integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, err_act, data_ class(psb_i_base_vect_type), pointer :: d_vidx + class(psb_xch_idx_type), pointer :: d_xchg integer(psb_ipk_) :: ierr(5) character(len=20) :: name @@ -1415,12 +1442,17 @@ subroutine psi_sswaptran_vect(flag,beta,y,desc_a,work,info,data) end if call desc_a%get_list(data_,d_vidx,totxch,idxr,idxs,info) + if (info == 0) call desc_a%get_list(data_,d_xchg,info) if (info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if + if (.false.) then + call psi_swaptran(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info) + else + call psi_swaptran(ictxt,icomm,flag,beta,y,d_xchg,info) + endif - call psi_swaptran(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info) if (info /= psb_success_) goto 9999 call psb_erractionrestore(err_act) diff --git a/base/comm/internals/psi_zswapdata.F90 b/base/comm/internals/psi_zswapdata.F90 index fe84450a..8c4da6a6 100644 --- a/base/comm/internals/psi_zswapdata.F90 +++ b/base/comm/internals/psi_zswapdata.F90 @@ -107,6 +107,7 @@ subroutine psi_zswapdatam(flag,n,beta,y,desc_a,work,info,data) ! locals integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, data_, err_act integer(psb_ipk_), pointer :: d_idx(:) + class(psb_xch_idx_type), pointer :: d_xchg character(len=20) :: name info=psb_success_ @@ -135,13 +136,18 @@ subroutine psi_zswapdatam(flag,n,beta,y,desc_a,work,info,data) end if call desc_a%get_list(data_,d_idx,totxch,idxr,idxs,info) + if (info == 0) call desc_a%get_list(data_,d_xchg,info) if (info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if + if (.false.) then + call psi_swapdata(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info) + else + call psi_swapdata(ictxt,icomm,flag,n,beta,y,d_xchg,info) + endif - call psi_swapdata(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info) if (info /= psb_success_) goto 9999 call psb_erractionrestore(err_act) @@ -273,7 +279,6 @@ subroutine psi_zswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) end do last_clear_count = nxch else - !sync all nxch = size(xchg%prcs_xch) myself = this_image() do ip = 1, nxch @@ -283,11 +288,19 @@ subroutine psi_zswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) rp1 = xchg%rmt_rcv_bnd(ip,1) rp2 = xchg%rmt_rcv_bnd(ip,2) isz = p2-p1+1 - !write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2 call psi_gth(isz,m,xchg%loc_snd_idx(p1:p2),& - & y,buffer(p1:p2)) + & y,sndbuf(p1:p2)) + buffer(rp1:rp2)[img] = sndbuf(p1:p2) + end do + ! + ! Doing event post later should provide more opportunities for + ! overlap + ! + do ip= 1, nxch + img = xchg%prcs_xch(ip) + 1 event post(ufg(myself)[img]) end do + do ip = 1, nxch img = xchg%prcs_xch(ip) + 1 event wait(ufg(img)) @@ -297,11 +310,12 @@ subroutine psi_zswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) isz = p2-p1+1 rp1 = xchg%rmt_snd_bnd(ip,1) rp2 = xchg%rmt_snd_bnd(ip,2) - !write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2 + !write(0,*) myself,'Getting from ',img,' boundaries: ',p1,p2 call psi_sct(isz,m,xchg%loc_rcv_idx(p1:p2),& - & buffer(rp1:rp2)[img],beta,y) + & buffer(p1:p2),beta, y) event post(clear[img]) end do + last_clear_count = nxch endif @@ -759,6 +773,7 @@ subroutine psi_zswapdatav(flag,beta,y,desc_a,work,info,data) ! locals integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, data_, err_act integer(psb_ipk_), pointer :: d_idx(:) + class(psb_xch_idx_type), pointer :: d_xchg character(len=20) :: name info=psb_success_ @@ -788,12 +803,17 @@ subroutine psi_zswapdatav(flag,beta,y,desc_a,work,info,data) end if call desc_a%get_list(data_,d_idx,totxch,idxr,idxs,info) + if (info == 0) call desc_a%get_list(data_,d_xchg,info) if (info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if + if (.false.) then + call psi_swapdata(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info) + else + call psi_swapdata(ictxt,icomm,flag,beta,y,d_xchg,info) + endif - call psi_swapdata(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info) if (info /= psb_success_) goto 9999 call psb_erractionrestore(err_act) @@ -924,7 +944,6 @@ subroutine psi_zswap_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) end do last_clear_count = nxch else - !sync all nxch = size(xchg%prcs_xch) myself = this_image() do ip = 1, nxch @@ -934,11 +953,19 @@ subroutine psi_zswap_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) rp1 = xchg%rmt_rcv_bnd(ip,1) rp2 = xchg%rmt_rcv_bnd(ip,2) isz = p2-p1+1 - !write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2 call psi_gth(isz,xchg%loc_snd_idx(p1:p2),& - & y,buffer(p1:p2)) + & y,sndbuf(p1:p2)) + buffer(rp1:rp2)[img] = sndbuf(p1:p2) + end do + ! + ! Doing event post later should provide more opportunities for + ! overlap + ! + do ip= 1, nxch + img = xchg%prcs_xch(ip) + 1 event post(ufg(myself)[img]) end do + do ip = 1, nxch img = xchg%prcs_xch(ip) + 1 event wait(ufg(img)) @@ -948,11 +975,12 @@ subroutine psi_zswap_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) isz = p2-p1+1 rp1 = xchg%rmt_snd_bnd(ip,1) rp2 = xchg%rmt_snd_bnd(ip,2) - !write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2 + !write(0,*) myself,'Getting from ',img,' boundaries: ',p1,p2 call psi_sct(isz,xchg%loc_rcv_idx(p1:p2),& - & buffer(rp1:rp2)[img],beta,y) + & buffer(p1:p2),beta, y) event post(clear[img]) end do + last_clear_count = nxch endif @@ -1365,6 +1393,7 @@ subroutine psi_zswapdata_vect(flag,beta,y,desc_a,work,info,data) ! locals integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, data_, err_act class(psb_i_base_vect_type), pointer :: d_vidx + class(psb_xch_idx_type), pointer :: d_xchg character(len=20) :: name info=psb_success_ @@ -1394,12 +1423,17 @@ subroutine psi_zswapdata_vect(flag,beta,y,desc_a,work,info,data) end if call desc_a%get_list(data_,d_vidx,totxch,idxr,idxs,info) + if (info == 0) call desc_a%get_list(data_,d_xchg,info) if (info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if + if (.false.) then + call psi_swapdata(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info) + else + call psi_swapdata(ictxt,icomm,flag,beta,y,d_xchg,info) + endif - call psi_swapdata(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info) if (info /= psb_success_) goto 9999 call psb_erractionrestore(err_act) diff --git a/base/comm/internals/psi_zswaptran.F90 b/base/comm/internals/psi_zswaptran.F90 index 1d60970f..103f5a3d 100644 --- a/base/comm/internals/psi_zswaptran.F90 +++ b/base/comm/internals/psi_zswaptran.F90 @@ -111,6 +111,7 @@ subroutine psi_zswaptranm(flag,n,beta,y,desc_a,work,info,data) ! locals integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, err_act, totxch, data_ integer(psb_ipk_), pointer :: d_idx(:) + class(psb_xch_idx_type), pointer :: d_xchg integer(psb_ipk_) :: ierr(5) character(len=20) :: name @@ -141,12 +142,17 @@ subroutine psi_zswaptranm(flag,n,beta,y,desc_a,work,info,data) end if call desc_a%get_list(data_,d_idx,totxch,idxr,idxs,info) + if (info == 0) call desc_a%get_list(data_,d_xchg,info) if (info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if + if (.false.) then + call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info) + else + call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_xchg,info) + endif - call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info) if (info /= psb_success_) goto 9999 call psb_erractionrestore(err_act) @@ -246,7 +252,7 @@ subroutine psi_zswaptran_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) end if end if - if (.true.) then + if (.false.) then nxch = size(xchg%prcs_xch) myself = this_image() do ip = 1, nxch @@ -277,7 +283,6 @@ subroutine psi_zswaptran_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) end do last_clear_count = nxch else - !sync all nxch = size(xchg%prcs_xch) myself = this_image() do ip = 1, nxch @@ -287,11 +292,19 @@ subroutine psi_zswaptran_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) rp1 = xchg%rmt_snd_bnd(ip,1) rp2 = xchg%rmt_snd_bnd(ip,2) isz = p2-p1+1 - !write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2 call psi_gth(isz,m,xchg%loc_rcv_idx(p1:p2),& - & y,buffer(p1:p2)) + & y,sndbuf(p1:p2)) + buffer(rp1:rp2)[img] = sndbuf(p1:p2) + end do + ! + ! Doing event post later should provide more opportunities for + ! overlap + ! + do ip= 1, nxch + img = xchg%prcs_xch(ip) + 1 event post(ufg(myself)[img]) end do + do ip = 1, nxch img = xchg%prcs_xch(ip) + 1 event wait(ufg(img)) @@ -301,9 +314,9 @@ subroutine psi_zswaptran_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) isz = p2-p1+1 rp1 = xchg%rmt_rcv_bnd(ip,1) rp2 = xchg%rmt_rcv_bnd(ip,2) - !write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2 + !write(0,*) myself,'Getting from ',img,' boundaries: ',p1,p2 call psi_sct(isz,m,xchg%loc_snd_idx(p1:p2),& - & buffer(rp1:rp2)[img],beta,y) + & buffer(p1:p2),beta, y) event post(clear[img]) end do last_clear_count = nxch @@ -761,6 +774,7 @@ subroutine psi_zswaptranv(flag,beta,y,desc_a,work,info,data) ! locals integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, err_act, data_ integer(psb_ipk_), pointer :: d_idx(:) + class(psb_xch_idx_type), pointer :: d_xchg integer(psb_ipk_) :: ierr(5) character(len=20) :: name @@ -790,12 +804,17 @@ subroutine psi_zswaptranv(flag,beta,y,desc_a,work,info,data) end if call desc_a%get_list(data_,d_idx,totxch,idxr,idxs,info) + if (info == 0) call desc_a%get_list(data_,d_xchg,info) if (info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if + if (.false.) then + call psi_swaptran(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info) + else + call psi_swaptran(ictxt,icomm,flag,beta,y,d_xchg,info) + endif - call psi_swaptran(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info) if (info /= psb_success_) goto 9999 call psb_erractionrestore(err_act) @@ -896,7 +915,7 @@ subroutine psi_zswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) end if end if - if (.true.) then + if (.false.) then nxch = size(xchg%prcs_xch) myself = this_image() do ip = 1, nxch @@ -927,7 +946,6 @@ subroutine psi_zswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) end do last_clear_count = nxch else - !sync all nxch = size(xchg%prcs_xch) myself = this_image() do ip = 1, nxch @@ -937,11 +955,19 @@ subroutine psi_zswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) rp1 = xchg%rmt_snd_bnd(ip,1) rp2 = xchg%rmt_snd_bnd(ip,2) isz = p2-p1+1 - !write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2 call psi_gth(isz,xchg%loc_rcv_idx(p1:p2),& - & y,buffer(p1:p2)) + & y,sndbuf(p1:p2)) + buffer(rp1:rp2)[img] = sndbuf(p1:p2) + end do + ! + ! Doing event post later should provide more opportunities for + ! overlap + ! + do ip= 1, nxch + img = xchg%prcs_xch(ip) + 1 event post(ufg(myself)[img]) end do + do ip = 1, nxch img = xchg%prcs_xch(ip) + 1 event wait(ufg(img)) @@ -951,9 +977,9 @@ subroutine psi_zswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) isz = p2-p1+1 rp1 = xchg%rmt_rcv_bnd(ip,1) rp2 = xchg%rmt_rcv_bnd(ip,2) - !write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2 + !write(0,*) myself,'Getting from ',img,' boundaries: ',p1,p2 call psi_sct(isz,xchg%loc_snd_idx(p1:p2),& - & buffer(rp1:rp2)[img],beta,y) + & buffer(p1:p2),beta, y) event post(clear[img]) end do last_clear_count = nxch @@ -1386,6 +1412,7 @@ subroutine psi_zswaptran_vect(flag,beta,y,desc_a,work,info,data) ! locals integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, err_act, data_ class(psb_i_base_vect_type), pointer :: d_vidx + class(psb_xch_idx_type), pointer :: d_xchg integer(psb_ipk_) :: ierr(5) character(len=20) :: name @@ -1415,12 +1442,17 @@ subroutine psi_zswaptran_vect(flag,beta,y,desc_a,work,info,data) end if call desc_a%get_list(data_,d_vidx,totxch,idxr,idxs,info) + if (info == 0) call desc_a%get_list(data_,d_xchg,info) if (info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if + if (.false.) then + call psi_swaptran(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info) + else + call psi_swaptran(ictxt,icomm,flag,beta,y,d_xchg,info) + endif - call psi_swaptran(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info) if (info /= psb_success_) goto 9999 call psb_erractionrestore(err_act) diff --git a/base/modules/psb_error_impl.F90 b/base/modules/psb_error_impl.F90 index b1ed0735..9020a90a 100644 --- a/base/modules/psb_error_impl.F90 +++ b/base/modules/psb_error_impl.F90 @@ -19,7 +19,7 @@ subroutine psb_ser_error_handler(err_act) if (err_act /= psb_act_ret_) & & call psb_error() - if (err_act == psb_act_abort_) stop + ! if (err_act == psb_act_abort_) stop return end subroutine psb_ser_error_handler diff --git a/base/modules/psi_penv_mod.F90 b/base/modules/psi_penv_mod.F90 index d2d62a45..d8388c10 100644 --- a/base/modules/psi_penv_mod.F90 +++ b/base/modules/psi_penv_mod.F90 @@ -300,7 +300,7 @@ contains if (info == mpi_success) call mpi_init(info) if (info /= mpi_success) then write(psb_err_unit,*) 'Error in initalizing MPI, bailing out',info - stop + ! stop end if end if @@ -486,7 +486,7 @@ contains integer(psb_mpik_) :: code, info #if defined(SERIAL_MPI) - stop + !stop #else if (present(errc)) then code = errc diff --git a/test/integrationTest/test_psb_halo.pf b/test/integrationTest/test_psb_halo.pf index 737812cb..609bbfbe 100644 --- a/test/integrationTest/test_psb_halo.pf +++ b/test/integrationTest/test_psb_halo.pf @@ -3,11 +3,625 @@ use pfunit_mod use psb_base_mod implicit none include 'mpif.h' +contains + +@test(nimgs=[std]) +subroutine test_psb_halotran_m_2imgs(this) + implicit none + Class(CafTestMethod), intent(inout) :: this + integer :: msg, me, i=0, np, j, info + integer, parameter :: nrows=6 + integer :: icontxt, mid, true + integer, allocatable :: vg(:), ia(:) + real(psb_dpk_), allocatable :: val(:) + real(psb_dpk_), allocatable :: v(:), y(:,:), check(:) + integer(psb_ipk_) :: iictxt, icomm, flag + type(psb_desc_type):: desc_a + type(psb_d_vect_type) :: x + + np = this%getNumImages() + if (np < 2) then + print*,'You need at least 2 processes to run this test.' + return + endif + call psb_init(icontxt,np,MPI_COMM_WORLD) + !call psb_info(icontxt, me, np) + me = this_image() + !Allocate vectors + allocate(vg(nrows)) + allocate(ia(nrows)) + allocate(val(nrows)) + allocate(v(nrows)) + i = 0 + do j=1,size(vg,1) + vg(j)= i + i = i+1 + if (i==np) then + i=0 + endif + enddo + + + !Use only 2 processes + !Assuming nrows is a multiple of 2 so mid is an integer + !Distribute equally to the two processes + mid=nrows/2 + + do i=1, mid + vg(i)=0 + enddo + do i=mid+1, nrows + vg(i)=1 + enddo + + + do i=1,size(ia,1) + ia(i)=i + enddo + + do i=1,mid + val(i)=1. + enddo + + do i=mid + 1,nrows + val(i)=2. + enddo + + call psb_cdall(icontxt,desc_a,info, vg=vg) + if ( me == 1) call psb_cdins(nz=1,ia=(/mid/),ja=(/mid+1/), desc_a=desc_a, info=info) + if ( me == 2) call psb_cdins(nz=1,ia=(/mid+1/),ja=(/mid/), desc_a=desc_a, info=info) + call psb_cdasb(desc_a, info) + + call psb_geall(x,desc_a,info) + call psb_geins(m=nrows, irw=ia, val=val,x=x, desc_a=desc_a, info=info) + call psb_geasb(x,desc_a,info) + + call psb_barrier(icontxt) + v = x%get_vect() + !Let's modify x, so we need to update halo indices + + if ((me == 1).or.(me == 2)) then + call psb_geaxpby(dble(me),x,0.0d0,x,desc_a,info) + !x%v%v(mid +1)=x%v%v(mid+1) + 2.0d0 + endif + call psb_barrier(icontxt) + + ! END OF SETUP + + v = x%get_vect() + allocate(y(size(v,1),1)) + y(:,1)=v + call psb_halo(y, desc_a, info, tran='T') + !GETTING BACK X + call psb_barrier(icontxt) + + + !Let's build the expected solution + if ((me == 1).or.(me==2)) then + allocate(check(mid+1)) + else + allocate(check(1)) + endif + if (me == 1 ) then + check(1:mid-1)=1.0d0 + check(mid)=2.0d0 + check(mid + 1)=2.0d0 + else if (me == 2) then + check(1)=6.0d0 + check(mid-1:mid)=4.0d0 + check(mid + 1)=1.0d0 + else + check(1)=0.0d0 + endif + !call psb_barrier(icontxt) + + if ((me==1).or.(me==2)) then + true = 1 + else + true=0 + endif + + @assertEqual(real(true*check),real(true*y(:,1))) + deallocate(vg,ia,val,v,y,check) + + call psb_gefree(x, desc_a, info) + call psb_cdfree(desc_a, info) + + call psb_exit(icontxt) + +end subroutine test_psb_halotran_m_2imgs + +@test(nimgs=[std]) +subroutine test_psb_halotran_v_2imgs(this) + implicit none + Class(CafTestMethod), intent(inout) :: this + integer :: msg, me, i=0, np, j, info + integer, parameter :: nrows=6 + integer :: icontxt, mid, true + integer, allocatable :: vg(:), ia(:) + real(psb_dpk_), allocatable :: val(:) + real(psb_dpk_), allocatable :: v(:), check(:) + integer(psb_ipk_) :: iictxt, icomm, flag + type(psb_desc_type):: desc_a + type(psb_d_vect_type) :: x + + np = this%getNumImages() + if (np < 2) then + print*,'You need at least 2 processes to run this test.' + return + endif + call psb_init(icontxt,np,MPI_COMM_WORLD) + !call psb_info(icontxt, me, np) + me = this_image() + !Allocate vectors + allocate(vg(nrows)) + allocate(ia(nrows)) + allocate(val(nrows)) + allocate(v(nrows)) + i = 0 + do j=1,size(vg,1) + vg(j)= i + i = i+1 + if (i==np) then + i=0 + endif + enddo + + + !Use only 2 processes + !Assuming nrows is a multiple of 2 so mid is an integer + !Distribute equally to the two processes + mid=nrows/2 + + do i=1, mid + vg(i)=0 + enddo + do i=mid+1, nrows + vg(i)=1 + enddo + + + do i=1,size(ia,1) + ia(i)=i + enddo + + do i=1,mid + val(i)=1. + enddo + + do i=mid + 1,nrows + val(i)=2. + enddo + + call psb_cdall(icontxt,desc_a,info, vg=vg) + if ( me == 1) call psb_cdins(nz=1,ia=(/mid/),ja=(/mid+1/), desc_a=desc_a, info=info) + if ( me == 2) call psb_cdins(nz=1,ia=(/mid+1/),ja=(/mid/), desc_a=desc_a, info=info) + call psb_cdasb(desc_a, info) + + call psb_geall(x,desc_a,info) + call psb_geins(m=nrows, irw=ia, val=val,x=x, desc_a=desc_a, info=info) + call psb_geasb(x,desc_a,info) + + call psb_barrier(icontxt) + v = x%get_vect() + !Let's modify x, so we need to update halo indices + + if ((me == 1).or.(me == 2)) then + call psb_geaxpby(dble(me),x,0.0d0,x,desc_a,info) + !x%v%v(mid +1)=x%v%v(mid+1) + 2.0d0 + endif + call psb_barrier(icontxt) + + ! END OF SETUP + + v = x%get_vect() + call psb_halo(v, desc_a, info, tran='T') + !GETTING BACK X + call psb_barrier(icontxt) + + + !Let's build the expected solution + if ((me == 1).or.(me==2)) then + allocate(check(mid+1)) + else + allocate(check(1)) + endif + if (me == 1 ) then + check(1:mid-1)=1.0d0 + check(mid)=2.0d0 + check(mid + 1)=2.0d0 + else if (me == 2) then + check(1)=6.0d0 + check(mid-1:mid)=4.0d0 + check(mid + 1)=1.0d0 + else + check(1)=0.0d0 + endif + !call psb_barrier(icontxt) + + if ((me==1).or.(me==2)) then + true = 1 + else + true=0 + endif + + @assertEqual(real(true*check),real(true*v)) + deallocate(vg,ia,val,v,check) + + call psb_gefree(x, desc_a, info) + call psb_cdfree(desc_a, info) + + call psb_exit(icontxt) + +end subroutine test_psb_halotran_v_2imgs + +@test(nimgs=[std]) +subroutine test_psb_halotran_2imgs(this) + implicit none + Class(CafTestMethod), intent(inout) :: this + integer :: msg, me, i=0, np, j, info + integer, parameter :: nrows=6 + integer :: icontxt, mid, true + integer, allocatable :: vg(:), ia(:) + real(psb_dpk_), allocatable :: val(:) + real(psb_dpk_), allocatable :: v(:), check(:) + integer(psb_ipk_) :: iictxt, icomm, flag + type(psb_desc_type):: desc_a + type(psb_d_vect_type) :: x + + np = this%getNumImages() + if (np < 2) then + print*,'You need at least 2 processes to run this test.' + return + endif + call psb_init(icontxt,np,MPI_COMM_WORLD) + !call psb_info(icontxt, me, np) + me = this_image() + !Allocate vectors + allocate(vg(nrows)) + allocate(ia(nrows)) + allocate(val(nrows)) + allocate(v(nrows)) + i = 0 + do j=1,size(vg,1) + vg(j)= i + i = i+1 + if (i==np) then + i=0 + endif + enddo + + + !Use only 2 processes + !Assuming nrows is a multiple of 2 so mid is an integer + !Distribute equally to the two processes + mid=nrows/2 + + do i=1, mid + vg(i)=0 + enddo + do i=mid+1, nrows + vg(i)=1 + enddo + + + do i=1,size(ia,1) + ia(i)=i + enddo + + do i=1,mid + val(i)=1. + enddo + + do i=mid + 1,nrows + val(i)=2. + enddo + + call psb_cdall(icontxt,desc_a,info, vg=vg) + if ( me == 1) call psb_cdins(nz=1,ia=(/mid/),ja=(/mid+1/), desc_a=desc_a, info=info) + if ( me == 2) call psb_cdins(nz=1,ia=(/mid+1/),ja=(/mid/), desc_a=desc_a, info=info) + call psb_cdasb(desc_a, info) + + call psb_geall(x,desc_a,info) + call psb_geins(m=nrows, irw=ia, val=val,x=x, desc_a=desc_a, info=info) + call psb_geasb(x,desc_a,info) + + !Let's modify x, so we need to update halo indices + + if ((me == 1).or.(me == 2)) then + call psb_geaxpby(dble(me),x,0.0d0,x,desc_a,info) + !x%v%v(mid +1)=x%v%v(mid+1) + 2.0d0 + endif + call psb_barrier(icontxt) + + call psb_barrier(icontxt) + v = x%get_vect() + ! END OF SETUP + + call psb_halo(x, desc_a, info, tran='T') + !GETTING BACK X + call psb_barrier(icontxt) + v = x%get_vect() + !Let's build the expected solution + if ((me == 1).or.(me==2)) then + allocate(check(mid+1)) + else + allocate(check(1)) + endif + if (me == 1 ) then + check(1:mid-1)=1.0d0 + check(mid)=2.0d0 + check(mid + 1)=2.0d0 + else if (me == 2) then + check(1)=6.0d0 + check(mid-1:mid)=4.0d0 + check(mid + 1)=1.0d0 + else + check(1)=0.0d0 + endif + !call psb_barrier(icontxt) -contains + if ((me==1).or.(me==2)) then + true = 1 + else + true=0 + endif + + @assertEqual(real(true*check),real(true*v)) + deallocate(vg,ia,val,v,check) + + call psb_gefree(x, desc_a, info) + call psb_cdfree(desc_a, info) + + call psb_exit(icontxt) + +end subroutine test_psb_halotran_2imgs + + +@test(nimgs=[std]) +subroutine test_psb_halom_2imgs(this) + implicit none + Class(CafTestMethod), intent(inout) :: this + integer :: msg, me, i=0, np, j, info + integer, parameter :: nrows=6 + integer :: icontxt, mid, true + integer, allocatable :: vg(:), ia(:) + real(psb_dpk_), allocatable :: val(:) + real(psb_dpk_), allocatable :: y(:,:),v(:), check(:) + integer(psb_ipk_) :: iictxt, icomm, flag + type(psb_desc_type):: desc_a + type(psb_d_vect_type) :: x + + np = this%getNumImages() + if (np < 2) then + print*,'You need at least 2 processes to run this test.' + return + endif + call psb_init(icontxt,np,MPI_COMM_WORLD) + !call psb_info(icontxt, me, np) + me = this_image() + !Allocate vectors + allocate(vg(nrows)) + allocate(ia(nrows)) + allocate(val(nrows)) + allocate(v(nrows)) + i = 0 + do j=1,size(vg,1) + vg(j)= i + i = i+1 + if (i==np) then + i=0 + endif + enddo + + + !Use only 2 processes + !Assuming nrows is a multiple of 2 so mid is an integer + !Distribute equally to the two processes + mid=nrows/2 + + do i=1, mid + vg(i)=0 + enddo + do i=mid+1, nrows + vg(i)=1 + enddo + + + do i=1,size(ia,1) + ia(i)=i + enddo + + do i=1,mid + val(i)=1. + enddo + + do i=mid + 1,nrows + val(i)=2. + enddo + + call psb_cdall(icontxt,desc_a,info, vg=vg) + if ( me == 1) call psb_cdins(nz=1,ia=(/mid/),ja=(/mid+1/), desc_a=desc_a, info=info) + if ( me == 2) call psb_cdins(nz=1,ia=(/mid+1/),ja=(/mid/), desc_a=desc_a, info=info) + call psb_cdasb(desc_a, info) + + call psb_geall(x,desc_a,info) + call psb_geins(m=nrows, irw=ia, val=val,x=x, desc_a=desc_a, info=info) + call psb_geasb(x,desc_a,info) + + call psb_barrier(icontxt) + v = x%get_vect() + !Let's modify x, so we need to update halo indices + + if ((me == 1).or.(me == 2)) then + v(mid +1)=v(mid+1) + 2.0d0 + endif + call psb_barrier(icontxt) + + ! END OF SETUP + allocate(y(size(v,1),1)) + y(:,1)=v + call psb_halo(y, desc_a, info) + !GETTING BACK X + call psb_barrier(icontxt) + + + !Let's build the expected solution + if ((me == 1).or.(me==2)) then + allocate(check(mid+1)) + else + allocate(check(1)) + endif + if (me == 1 ) then + check(1:mid)=1.0d0 + check(mid + 1)=2.0d0 + else if (me == 2) then + check(1:mid)=2.0d0 + check(mid + 1)=1.0d0 + else + check(1)=0.0d0 + endif + !call psb_barrier(icontxt) + + if ((me==1).or.(me==2)) then + true = 1 + else + true=0 + endif + + @assertEqual(real(true*check),real(true*y(:,1))) + deallocate(vg,ia,val,v,y,check) + + call psb_gefree(x, desc_a, info) + call psb_cdfree(desc_a, info) + + call psb_exit(icontxt) + +end subroutine test_psb_halom_2imgs + +@test(nimgs=[std]) +subroutine test_psb_halov_2imgs(this) + implicit none + Class(CafTestMethod), intent(inout) :: this + integer :: msg, me, i=0, np, j, info + integer, parameter :: nrows=6 + integer :: icontxt, mid, true + integer, allocatable :: vg(:), ia(:) + real(psb_dpk_), allocatable :: val(:) + real(psb_dpk_), allocatable :: v(:), check(:) + integer(psb_ipk_) :: iictxt, icomm, flag + type(psb_desc_type):: desc_a + type(psb_d_vect_type) :: x + + np = this%getNumImages() + if (np < 2) then + print*,'You need at least 2 processes to run this test.' + return + endif + call psb_init(icontxt,np,MPI_COMM_WORLD) + !call psb_info(icontxt, me, np) + me = this_image() + !Allocate vectors + allocate(vg(nrows)) + allocate(ia(nrows)) + allocate(val(nrows)) + allocate(v(nrows)) + i = 0 + do j=1,size(vg,1) + vg(j)= i + i = i+1 + if (i==np) then + i=0 + endif + enddo + + + !Use only 2 processes + !Assuming nrows is a multiple of 2 so mid is an integer + !Distribute equally to the two processes + mid=nrows/2 + + do i=1, mid + vg(i)=0 + enddo + do i=mid+1, nrows + vg(i)=1 + enddo + + + do i=1,size(ia,1) + ia(i)=i + enddo + + do i=1,mid + val(i)=1. + enddo + + do i=mid + 1,nrows + val(i)=2. + enddo + + call psb_cdall(icontxt,desc_a,info, vg=vg) + if ( me == 1) call psb_cdins(nz=1,ia=(/mid/),ja=(/mid+1/), desc_a=desc_a, info=info) + if ( me == 2) call psb_cdins(nz=1,ia=(/mid+1/),ja=(/mid/), desc_a=desc_a, info=info) + call psb_cdasb(desc_a, info) + + call psb_geall(x,desc_a,info) + call psb_geins(m=nrows, irw=ia, val=val,x=x, desc_a=desc_a, info=info) + call psb_geasb(x,desc_a,info) + + call psb_barrier(icontxt) + v = x%get_vect() + !Let's modify x, so we need to update halo indices + + if ((me == 1).or.(me == 2)) then + x%v%v(mid +1)=x%v%v(mid+1) + 2.0d0 + endif + call psb_barrier(icontxt) + + ! END OF SETUP + + v = x%get_vect() + call psb_halo(v, desc_a, info) + !GETTING BACK X + call psb_barrier(icontxt) + + + !Let's build the expected solution + if ((me == 1).or.(me==2)) then + allocate(check(mid+1)) + else + allocate(check(1)) + endif + if (me == 1 ) then + check(1:mid)=1.0d0 + check(mid + 1)=2.0d0 + else if (me == 2) then + check(1:mid)=2.0d0 + check(mid + 1)=1.0d0 + else + check(1)=0.0d0 + endif + !call psb_barrier(icontxt) + + if ((me==1).or.(me==2)) then + true = 1 + else + true=0 + endif + + @assertEqual(real(true*check),real(true*v)) + deallocate(vg,ia,val,v,check) + + call psb_gefree(x, desc_a, info) + call psb_cdfree(desc_a, info) + + call psb_exit(icontxt) + +end subroutine test_psb_halov_2imgs @test(nimgs=[std]) -subroutine test_psb_halo_2imgs(this) +subroutine test_psb_halovect_2imgs(this) implicit none Class(CafTestMethod), intent(inout) :: this integer :: msg, me, i=0, np, j, info @@ -125,7 +739,7 @@ subroutine test_psb_halo_2imgs(this) call psb_exit(icontxt) -end subroutine test_psb_halo_2imgs +end subroutine test_psb_halovect_2imgs diff --git a/test/pargen/Makefile b/test/pargen/Makefile index f9f44088..f462f0f0 100644 --- a/test/pargen/Makefile +++ b/test/pargen/Makefile @@ -4,7 +4,7 @@ include $(INCDIR)/Make.inc.psblas # # Libraries used LIBDIR=$(BASEDIR)/lib -PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base +PSBLAS_LIB= -L$(LIBDIR) -L/opencoarrays6.2 -lcaf_mpi -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base LDLIBS=$(PSBLDLIBS) # # Compilers and such @@ -19,21 +19,21 @@ all: ppde3d spde3d ppde2d spde2d ppde3d: ppde3d.o - $(F90LINK) ppde3d.o -o ppde3d $(PSBLAS_LIB) $(LDLIBS) + $(F90LINK) -fcoarray=lib ppde3d.o -o ppde3d $(PSBLAS_LIB) $(LDLIBS) /bin/mv ppde3d $(EXEDIR) spde3d: spde3d.o - $(F90LINK) spde3d.o -o spde3d $(PSBLAS_LIB) $(LDLIBS) + $(F90LINK) -fcoarray=lib spde3d.o -o spde3d $(PSBLAS_LIB) $(LDLIBS) /bin/mv spde3d $(EXEDIR) ppde2d: ppde2d.o - $(F90LINK) ppde2d.o -o ppde2d $(PSBLAS_LIB) $(LDLIBS) + $(F90LINK) -fcoarray=lib ppde2d.o -o ppde2d $(PSBLAS_LIB) $(LDLIBS) /bin/mv ppde2d $(EXEDIR) spde2d: spde2d.o - $(F90LINK) spde2d.o -o spde2d $(PSBLAS_LIB) $(LDLIBS) + $(F90LINK) -fcoarray=lib spde2d.o -o spde2d $(PSBLAS_LIB) $(LDLIBS) /bin/mv spde2d $(EXEDIR) diff --git a/test/pargen/ppde2d.f90 b/test/pargen/ppde2d.f90 index 5f072f3c..6569fef0 100644 --- a/test/pargen/ppde2d.f90 +++ b/test/pargen/ppde2d.f90 @@ -65,13 +65,13 @@ contains use psb_base_mod, only : psb_dpk_ real(psb_dpk_) :: b1 real(psb_dpk_), intent(in) :: x,y - b1=1.d0/sqrt(2.d0) + b1=0.d0/sqrt(2.d0) end function b1 function b2(x,y) use psb_base_mod, only : psb_dpk_ real(psb_dpk_) :: b2 real(psb_dpk_), intent(in) :: x,y - b2=1.d0/sqrt(2.d0) + b2=0.d0/sqrt(2.d0) end function b2 function c(x,y) use psb_base_mod, only : psb_dpk_ @@ -83,13 +83,13 @@ contains use psb_base_mod, only : psb_dpk_ real(psb_dpk_) :: a1 real(psb_dpk_), intent(in) :: x,y - a1=1.d0/80 + a1=1.d0 end function a1 function a2(x,y) use psb_base_mod, only : psb_dpk_ real(psb_dpk_) :: a2 real(psb_dpk_), intent(in) :: x,y - a2=1.d0/80 + a2=1.d0 end function a2 function g(x,y) use psb_base_mod, only : psb_dpk_, done, dzero @@ -151,7 +151,7 @@ program ppde2d if (iam < 0) then ! This should not happen, but just in case call psb_exit(ictxt) - stop + !stop endif if(psb_get_errstatus() /= 0) goto 9999 name='pde2d90' @@ -263,11 +263,11 @@ program ppde2d end if call psb_exit(ictxt) - stop + !stop 9999 call psb_error(ictxt) - stop + !stop contains ! @@ -322,7 +322,7 @@ contains ! wrong number of parameter, print an error message and exit call pr_usage(izero) call psb_abort(ictxt) - stop 1 + !stop 1 endif end if diff --git a/test/pargen/runs/ppde.inp b/test/pargen/runs/ppde.inp index c91fb2d2..85b46bdd 100644 --- a/test/pargen/runs/ppde.inp +++ b/test/pargen/runs/ppde.inp @@ -2,10 +2,8 @@ BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL RGMRES BJAC Preconditioner NONE DIAG BJAC CSR Storage format for matrix A: CSR COO JAD -100 Domain size (acutal system is this**3) +700 Domain size (acutal system is this**3) 2 Stopping criterion 0404 MAXIT -1 ITRACE 002 IRST restart for RGMRES and BiCGSTABL - - diff --git a/test/unitTest/test_psb_swapdata.pf b/test/unitTest/test_psb_swapdata.pf index b81bf345..3f2678e9 100644 --- a/test/unitTest/test_psb_swapdata.pf +++ b/test/unitTest/test_psb_swapdata.pf @@ -3,9 +3,530 @@ use pfunit_mod use psb_base_mod implicit none include 'mpif.h' +interface +subroutine psi_sswap_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) + use psi_mod, psb_protect_name => psi_sswap_xchg_v + use psb_error_mod + use psb_realloc_mod + use psb_desc_mod + use psb_penv_mod + use psb_s_base_vect_mod + use iso_fortran_env + implicit none + integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag + integer(psb_ipk_), intent(out) :: info + real(psb_spk_) :: y(:) + real(psb_spk_) :: beta + class(psb_xch_idx_type), intent(inout) :: xchg + ! locals + integer(psb_mpik_) :: ictxt, icomm, np, me,& + & proc_to_comm, p2ptag, iret + integer(psb_ipk_) :: nesd, nerv,& + & err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,& + & snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself + integer :: count + real(psb_spk_), allocatable, save :: buffer(:)[:], sndbuf(:) + type(event_type), allocatable, save :: ufg(:)[:] + type(event_type), allocatable, save :: clear[:] + integer, save :: last_clear_count = 0 + logical :: swap_mpi, swap_sync, swap_send, swap_recv,& + & albf,do_send,do_recv + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name + +end subroutine psi_sswap_xchg_v + +subroutine psi_sswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) + use psi_mod, psb_protect_name => psi_sswap_xchg_m + use psb_error_mod + use psb_realloc_mod + use psb_desc_mod + use psb_penv_mod + use psb_s_base_vect_mod + use iso_fortran_env + implicit none + integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag, m + integer(psb_ipk_), intent(out) :: info + real(psb_spk_) :: y(:,:) + real(psb_spk_) :: beta + class(psb_xch_idx_type), intent(inout) :: xchg + ! locals + integer(psb_mpik_) :: ictxt, icomm, np, me,& + & proc_to_comm, p2ptag, iret + integer(psb_ipk_) :: nesd, nerv,& + & err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,& + & snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself + integer :: count + real(psb_spk_), allocatable, save :: buffer(:)[:], sndbuf(:) + type(event_type), allocatable, save :: ufg(:)[:] + type(event_type), allocatable, save :: clear[:] + integer, save :: last_clear_count = 0 + logical :: swap_mpi, swap_sync, swap_send, swap_recv,& + & albf,do_send,do_recv + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name + +end subroutine psi_sswap_xchg_m +subroutine psi_dswap_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) + use psi_mod, psb_protect_name => psi_dswap_xchg_v + use psb_error_mod + use psb_realloc_mod + use psb_desc_mod + use psb_penv_mod + use psb_d_base_vect_mod + use iso_fortran_env + implicit none + integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag + integer(psb_ipk_), intent(out) :: info + real(psb_dpk_) :: y(:) + real(psb_dpk_) :: beta + class(psb_xch_idx_type), intent(inout) :: xchg + ! locals + integer(psb_mpik_) :: ictxt, icomm, np, me,& + & proc_to_comm, p2ptag, iret + integer(psb_ipk_) :: nesd, nerv,& + & err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,& + & snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself + integer :: count + real(psb_dpk_), allocatable, save :: buffer(:)[:], sndbuf(:) + type(event_type), allocatable, save :: ufg(:)[:] + type(event_type), allocatable, save :: clear[:] + integer, save :: last_clear_count = 0 + logical :: swap_mpi, swap_sync, swap_send, swap_recv,& + & albf,do_send,do_recv + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name + +end subroutine psi_dswap_xchg_v +end interface + +interface +subroutine psi_dswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) + use psi_mod, psb_protect_name => psi_dswap_xchg_m + use psb_error_mod + use psb_realloc_mod + use psb_desc_mod + use psb_penv_mod + use psb_d_base_vect_mod + use iso_fortran_env + implicit none + integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag,m + integer(psb_ipk_), intent(out) :: info + real(psb_dpk_) :: y(:,:) + real(psb_dpk_) :: beta + class(psb_xch_idx_type), intent(inout) :: xchg + ! locals + integer(psb_mpik_) :: ictxt, icomm, np, me,& + & proc_to_comm, p2ptag, iret + integer(psb_ipk_) :: nesd, nerv,& + & err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,& + & snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself + integer :: count + real(psb_dpk_), allocatable, save :: buffer(:)[:], sndbuf(:) + type(event_type), allocatable, save :: ufg(:)[:] + type(event_type), allocatable, save :: clear[:] + integer, save :: last_clear_count = 0 + logical :: swap_mpi, swap_sync, swap_send, swap_recv,& + & albf,do_send,do_recv + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name +end subroutine psi_dswap_xchg_m +end interface contains +@test(nimgs=[std]) +subroutine test_psb_sswapdatav_2imgs(this) + implicit none + Class(CafTestMethod), intent(inout) :: this + integer :: msg, me, i=0, np, j, info + integer, parameter :: nrows=6 + integer :: icontxt, mid, true + integer, allocatable :: vg(:), ia(:) + real(psb_spk_), allocatable :: val(:) + real(psb_spk_), allocatable :: y(:), check(:) + class(psb_xch_idx_type), pointer :: xchg + integer(psb_ipk_) :: iictxt, icomm, flag + type(psb_desc_type):: desc_a + type(psb_s_vect_type) :: x + + np = this%getNumImages() + if (np < 2) then + print*,'You need at least 2 processes to run this test.' + return + endif + call psb_init(icontxt,np,MPI_COMM_WORLD) + !call psb_info(icontxt, me, np) + me = this_image() + !Allocate vectors + allocate(vg(nrows)) + allocate(ia(nrows)) + allocate(val(nrows)) + allocate(y(nrows)) + i = 0 + do j=1,size(vg,1) + vg(j)= i + i = i+1 + if (i==np) then + i=0 + endif + enddo + + + !Use only 2 processes + !Assuming nrows is a multiple of 2 so mid is an integer + !Distribute equally to the two processes + mid=nrows/2 + + do i=1, mid + vg(i)=0 + enddo + do i=mid+1, nrows + vg(i)=1 + enddo + + + do i=1,size(ia,1) + ia(i)=i + enddo + + do i=1,mid + val(i)=1. + enddo + + do i=mid + 1,nrows + val(i)=2. + enddo + + call psb_cdall(icontxt,desc_a,info, vg=vg) + if ( me == 1) call psb_cdins(nz=1,ia=(/mid/),ja=(/mid+1/), desc_a=desc_a, info=info) + if ( me == 2) call psb_cdins(nz=1,ia=(/mid+1/),ja=(/mid/), desc_a=desc_a, info=info) + call psb_cdasb(desc_a, info) + + call psb_geall(x,desc_a,info) + call psb_geins(m=nrows, irw=ia, val=val,x=x, desc_a=desc_a, info=info) + call psb_geasb(x,desc_a,info) + + call psb_barrier(icontxt) + y = x%get_vect() + !Let's modify x, so we need to update halo indices + + if ((me == 1).or.(me == 2)) then + y(mid +1)=y(mid+1) + 2.0 + endif + call psb_barrier(icontxt) + + ! END OF SETUP + + + iictxt = desc_a%get_context() + + icomm = desc_a%get_mpic() + + call desc_a%get_list(psb_comm_halo_,xchg,info) + + flag = IOR(psb_swap_send_, psb_swap_recv_) + + call psi_sswap_xchg_v(iictxt,icomm,flag,0.0,y,xchg,info) + !GETTING BACK X + call psb_barrier(icontxt) + + !Let's build the expected solution + if (allocated(check)) deallocate(check) + if ((me == 1).or.(me==2)) then + allocate(check(mid+1)) + else + allocate(check(1)) + endif + if (me == 1 ) then + check(1:mid)=1.0 + check(mid + 1)=2.0 + else if (me == 2) then + check(1:mid)=2.0 + check(mid + 1)=1.0 + else + check(1)=0.0 + endif + !call psb_barrier(icontxt) + + if ((me==1).or.(me==2)) then + true = 1 + else + true=0 + endif + + @assertEqual(real(true*check),real(true*y)) + deallocate(vg,ia,val,y,check) + + call psb_gefree(x, desc_a, info) + call psb_cdfree(desc_a, info) + + call psb_exit(icontxt) + +end subroutine test_psb_sswapdatav_2imgs + +@test(nimgs=[std]) +subroutine test_psb_swapdatam_2imgs(this) + implicit none + Class(CafTestMethod), intent(inout) :: this + integer :: msg, me, i=0, np, j, info + integer, parameter :: nrows=6 + integer :: icontxt, mid, true + integer, allocatable :: vg(:), ia(:) + real(psb_dpk_), allocatable :: val(:) + real(psb_dpk_), allocatable :: y(:,:), check(:), v(:) + class(psb_xch_idx_type), pointer :: xchg + integer(psb_ipk_) :: iictxt, icomm, flag + type(psb_desc_type):: desc_a + type(psb_d_vect_type) :: x + + np = this%getNumImages() + if (np < 2) then + print*,'You need at least 2 processes to run this test.' + return + endif + call psb_init(icontxt,np,MPI_COMM_WORLD) + !call psb_info(icontxt, me, np) + me = this_image() + !Allocate vectors + allocate(vg(nrows)) + allocate(ia(nrows)) + allocate(val(nrows)) + allocate(v(nrows)) + i = 0 + do j=1,size(vg,1) + vg(j)= i + i = i+1 + if (i==np) then + i=0 + endif + enddo + + + !Use only 2 processes + !Assuming nrows is a multiple of 2 so mid is an integer + !Distribute equally to the two processes + mid=nrows/2 + + do i=1, mid + vg(i)=0 + enddo + do i=mid+1, nrows + vg(i)=1 + enddo + + + do i=1,size(ia,1) + ia(i)=i + enddo + + do i=1,mid + val(i)=1. + enddo + + do i=mid + 1,nrows + val(i)=2. + enddo + + call psb_cdall(icontxt,desc_a,info, vg=vg) + if ( me == 1) call psb_cdins(nz=1,ia=(/mid/),ja=(/mid+1/), desc_a=desc_a, info=info) + if ( me == 2) call psb_cdins(nz=1,ia=(/mid+1/),ja=(/mid/), desc_a=desc_a, info=info) + call psb_cdasb(desc_a, info) + + call psb_geall(x,desc_a,info) + call psb_geins(m=nrows, irw=ia, val=val,x=x, desc_a=desc_a, info=info) + call psb_geasb(x,desc_a,info) + + call psb_barrier(icontxt) + v = x%get_vect() + allocate(y(size(v,1),1)) + y(:,1)=v + !Let's modify x, so we need to update halo indices + + if ((me == 1).or.(me == 2)) then + y(mid +1,1)=y(mid+1,1) + 2.0d0 + endif + call psb_barrier(icontxt) + + ! END OF SETUP + + + iictxt = desc_a%get_context() + + icomm = desc_a%get_mpic() + + call desc_a%get_list(psb_comm_halo_,xchg,info) + + flag = IOR(psb_swap_send_, psb_swap_recv_) + + call psi_dswap_xchg_m(iictxt,icomm,flag,1,0.0d0,y,xchg,info) + !GETTING BACK X + call psb_barrier(icontxt) + + !Let's build the expected solution + if (allocated(check)) deallocate(check) + if ((me == 1).or.(me==2)) then + allocate(check(mid+1)) + else + allocate(check(1)) + endif + if (me == 1 ) then + check(1:mid)=1.0d0 + check(mid + 1)=2.0d0 + else if (me == 2) then + check(1:mid)=2.0d0 + check(mid + 1)=1.0d0 + else + check(1)=0.0d0 + endif + !call psb_barrier(icontxt) + + if ((me==1).or.(me==2)) then + true = 1 + else + true=0 + endif + + @assertEqual(real(true*check),real(true*y(:,1))) + deallocate(vg,ia,val,y,v,check) + + call psb_gefree(x, desc_a, info) + call psb_cdfree(desc_a, info) + + call psb_exit(icontxt) + +end subroutine test_psb_swapdatam_2imgs + +@test(nimgs=[std]) +subroutine test_psb_swapdatav_2imgs(this) + implicit none + Class(CafTestMethod), intent(inout) :: this + integer :: msg, me, i=0, np, j, info + integer, parameter :: nrows=6 + integer :: icontxt, mid, true + integer, allocatable :: vg(:), ia(:) + real(psb_dpk_), allocatable :: val(:) + real(psb_dpk_), allocatable :: y(:), check(:) + class(psb_xch_idx_type), pointer :: xchg + integer(psb_ipk_) :: iictxt, icomm, flag + type(psb_desc_type):: desc_a + type(psb_d_vect_type) :: x + + np = this%getNumImages() + if (np < 2) then + print*,'You need at least 2 processes to run this test.' + return + endif + call psb_init(icontxt,np,MPI_COMM_WORLD) + !call psb_info(icontxt, me, np) + me = this_image() + !Allocate vectors + allocate(vg(nrows)) + allocate(ia(nrows)) + allocate(val(nrows)) + allocate(y(nrows)) + i = 0 + do j=1,size(vg,1) + vg(j)= i + i = i+1 + if (i==np) then + i=0 + endif + enddo + + + !Use only 2 processes + !Assuming nrows is a multiple of 2 so mid is an integer + !Distribute equally to the two processes + mid=nrows/2 + + do i=1, mid + vg(i)=0 + enddo + do i=mid+1, nrows + vg(i)=1 + enddo + + + do i=1,size(ia,1) + ia(i)=i + enddo + + do i=1,mid + val(i)=1. + enddo + + do i=mid + 1,nrows + val(i)=2. + enddo + + call psb_cdall(icontxt,desc_a,info, vg=vg) + if ( me == 1) call psb_cdins(nz=1,ia=(/mid/),ja=(/mid+1/), desc_a=desc_a, info=info) + if ( me == 2) call psb_cdins(nz=1,ia=(/mid+1/),ja=(/mid/), desc_a=desc_a, info=info) + call psb_cdasb(desc_a, info) + + call psb_geall(x,desc_a,info) + call psb_geins(m=nrows, irw=ia, val=val,x=x, desc_a=desc_a, info=info) + call psb_geasb(x,desc_a,info) + + call psb_barrier(icontxt) + y = x%get_vect() + !Let's modify x, so we need to update halo indices + + if ((me == 1).or.(me == 2)) then + y(mid +1)=y(mid+1) + 2.0d0 + endif + call psb_barrier(icontxt) + + ! END OF SETUP + + + iictxt = desc_a%get_context() + + icomm = desc_a%get_mpic() + + call desc_a%get_list(psb_comm_halo_,xchg,info) + + flag = IOR(psb_swap_send_, psb_swap_recv_) + + call psi_dswap_xchg_v(iictxt,icomm,flag,0.0d0,y,xchg,info) + !GETTING BACK X + call psb_barrier(icontxt) + + !Let's build the expected solution + if (allocated(check)) deallocate(check) + if ((me == 1).or.(me==2)) then + allocate(check(mid+1)) + else + allocate(check(1)) + endif + if (me == 1 ) then + check(1:mid)=1.0d0 + check(mid + 1)=2.0d0 + else if (me == 2) then + check(1:mid)=2.0d0 + check(mid + 1)=1.0d0 + else + check(1)=0.0d0 + endif + !call psb_barrier(icontxt) + + if ((me==1).or.(me==2)) then + true = 1 + else + true=0 + endif + + @assertEqual(real(true*check),real(true*y)) + deallocate(vg,ia,val,y,check) + + call psb_gefree(x, desc_a, info) + call psb_cdfree(desc_a, info) + + call psb_exit(icontxt) + +end subroutine test_psb_swapdatav_2imgs + @test(nimgs=[std]) subroutine test_psb_swapdata_2imgs(this) implicit none @@ -837,5 +1358,274 @@ subroutine test_psb_swapdata_8imgs_b(this) end subroutine test_psb_swapdata_8imgs_b + +@test(nimgs=[std]) +subroutine test_psb_swapdatatran_2imgs(this) + implicit none + Class(CafTestMethod), intent(inout) :: this + integer :: msg, me, i=0, np, j, info + integer, parameter :: nrows=6 + integer :: icontxt, mid, true + integer, allocatable :: vg(:), ia(:) + real(psb_dpk_), allocatable :: val(:) + real(psb_dpk_), allocatable :: v(:), check(:) + class(psb_xch_idx_type), pointer :: xchg + integer(psb_ipk_) :: iictxt, icomm, flag + type(psb_desc_type):: desc_a + type(psb_d_vect_type) :: x + + np = this%getNumImages() + if (np < 2) then + print*,'You need at least 2 processes to run this test.' + return + endif + call psb_init(icontxt,np,MPI_COMM_WORLD) + !call psb_info(icontxt, me, np) + me = this_image() + !Allocate vectors + allocate(vg(nrows)) + allocate(ia(nrows)) + allocate(val(nrows)) + allocate(v(nrows)) + i = 0 + do j=1,size(vg,1) + vg(j)= i + i = i+1 + if (i==np) then + i=0 + endif + enddo + + + !Use only 2 processes + !Assuming nrows is a multiple of 2 so mid is an integer + !Distribute equally to the two processes + mid=nrows/2 + + do i=1, mid + vg(i)=0 + enddo + do i=mid+1, nrows + vg(i)=1 + enddo + + + do i=1,size(ia,1) + ia(i)=i + enddo + + do i=1,mid + val(i)=1. + enddo + + do i=mid + 1,nrows + val(i)=2. + enddo + + call psb_cdall(icontxt,desc_a,info, vg=vg) + if ( me == 1) call psb_cdins(nz=1,ia=(/mid/),ja=(/mid+1/), desc_a=desc_a, info=info) + if ( me == 2) call psb_cdins(nz=1,ia=(/mid+1/),ja=(/mid/), desc_a=desc_a, info=info) + call psb_cdasb(desc_a, info) + + call psb_geall(x,desc_a,info) + call psb_geins(m=nrows, irw=ia, val=val,x=x, desc_a=desc_a, info=info) + call psb_geasb(x,desc_a,info) + + call psb_barrier(icontxt) + !v = x%get_vect() + !Let's modify x, so we need to update halo indices + + if ((me == 1).or.(me == 2)) then + call psb_geaxpby(dble(me),x,0.0d0,x,desc_a,info) + !x%v%v(mid +1)=x%v%v(mid+1) + 2.0d0 + endif + call psb_barrier(icontxt) + + ! END OF SETUP + v = x%get_vect() + + + iictxt = desc_a%get_context() + + icomm = desc_a%get_mpic() + + call desc_a%get_list(psb_comm_halo_,xchg,info) + + flag = IOR(psb_swap_send_, psb_swap_recv_) + sync all + + call psi_dswaptran_xchg_vect(iictxt,icomm,flag,0.0d0,x%v,xchg,info) + !GETTING BACK X + call psb_barrier(icontxt) + v = x%get_vect() + sync all + + !Let's build the expected solution + if ((me == 1).or.(me==2)) then + allocate(check(mid+1)) + else + allocate(check(1)) + endif + if (me == 1 )then + check(1:mid)=1.0d0 + check(mid + 1)=2.0d0 + else if (me == 2) then + check(1)=2.0d0 + check(mid-1:mid)=4.0d0 + check(mid + 1)=1.0d0 + else + check(1)=0.0d0 + endif + !call psb_barrier(icontxt) + + if ((me==1).or.(me==2)) then + true = 1 + else + true=0 + endif + @assertEqual(real(true*check),real(true*v)) + deallocate(vg,ia,val,v,check) + + call psb_gefree(x, desc_a, info) + call psb_cdfree(desc_a, info) + + call psb_exit(icontxt) + +end subroutine test_psb_swapdatatran_2imgs + +@test(nimgs=[std]) +subroutine test_psb_sswapdatam_2imgs(this) + implicit none + Class(CafTestMethod), intent(inout) :: this + integer :: msg, me, i=0, np, j, info + integer, parameter :: nrows=6 + integer :: icontxt, mid, true + integer, allocatable :: vg(:), ia(:) + real(psb_spk_), allocatable :: val(:) + real(psb_spk_), allocatable :: y(:,:), check(:), v(:) + class(psb_xch_idx_type), pointer :: xchg + integer(psb_ipk_) :: iictxt, icomm, flag + type(psb_desc_type):: desc_a + type(psb_s_vect_type) :: x + + np = this%getNumImages() + if (np < 2) then + print*,'You need at least 2 processes to run this test.' + return + endif + call psb_init(icontxt,np,MPI_COMM_WORLD) + !call psb_info(icontxt, me, np) + me = this_image() + !Allocate vectors + allocate(vg(nrows)) + allocate(ia(nrows)) + allocate(val(nrows)) + allocate(v(nrows)) + i = 0 + do j=1,size(vg,1) + vg(j)= i + i = i+1 + if (i==np) then + i=0 + endif + enddo + + + !Use only 2 processes + !Assuming nrows is a multiple of 2 so mid is an integer + !Distribute equally to the two processes + mid=nrows/2 + + do i=1, mid + vg(i)=0 + enddo + do i=mid+1, nrows + vg(i)=1 + enddo + + + do i=1,size(ia,1) + ia(i)=i + enddo + + do i=1,mid + val(i)=1. + enddo + + do i=mid + 1,nrows + val(i)=2. + enddo + + call psb_cdall(icontxt,desc_a,info, vg=vg) + if ( me == 1) call psb_cdins(nz=1,ia=(/mid/),ja=(/mid+1/), desc_a=desc_a, info=info) + if ( me == 2) call psb_cdins(nz=1,ia=(/mid+1/),ja=(/mid/), desc_a=desc_a, info=info) + call psb_cdasb(desc_a, info) + + call psb_geall(x,desc_a,info) + call psb_geins(m=nrows, irw=ia, val=val,x=x, desc_a=desc_a, info=info) + call psb_geasb(x,desc_a,info) + + call psb_barrier(icontxt) + v = x%get_vect() + allocate(y(size(v,1),1)) + y(:,1)=v + !Let's modify x, so we need to update halo indices + + if ((me == 1).or.(me == 2)) then + y(mid +1,1)=y(mid+1,1) + 2.0 + endif + call psb_barrier(icontxt) + + ! END OF SETUP + + + iictxt = desc_a%get_context() + + icomm = desc_a%get_mpic() + + call desc_a%get_list(psb_comm_halo_,xchg,info) + + flag = IOR(psb_swap_send_, psb_swap_recv_) + + print*,'size of y', size(y,1), size(y,2) + call psi_sswap_xchg_m(iictxt,icomm,flag,1,0.0,y,xchg,info) + !GETTING BACK X + call psb_barrier(icontxt) + + !Let's build the expected solution + if (allocated(check)) deallocate(check) + if ((me == 1).or.(me==2)) then + allocate(check(mid+1)) + else + allocate(check(1)) + endif + if (me == 1 ) then + check(1:mid)=1.0 + check(mid + 1)=2.0 + else if (me == 2) then + check(1:mid)=2.0 + check(mid + 1)=1.0 + else + check(1)=0.0 + endif + !call psb_barrier(icontxt) + + if ((me==1).or.(me==2)) then + true = 1 + else + true=0 + endif + + @assertEqual(real(true*check),real(true*y(:,1))) + deallocate(vg,ia,val,y,v,check) + + call psb_gefree(x, desc_a, info) + call psb_cdfree(desc_a, info) + + call psb_exit(icontxt) + +end subroutine test_psb_sswapdatam_2imgs + + end module test_psb_swapdata