From bcc9a0f39ac136d394b77f1629e74dbb3e4305c9 Mon Sep 17 00:00:00 2001 From: Ambra Abdullahi Date: Wed, 22 Mar 2017 21:11:54 +0000 Subject: [PATCH] Added CAF collective subroutines and matdist --- base/comm/internals/psi_cswapdata.F90 | 10 +- base/comm/internals/psi_cswaptran.F90 | 10 +- base/comm/internals/psi_dswapdata.F90 | 14 +- base/comm/internals/psi_dswaptran.F90 | 13 +- base/comm/internals/psi_iswapdata.F90 | 548 ++- base/comm/internals/psi_iswaptran.F90 | 544 ++- base/comm/internals/psi_sswapdata.F90 | 10 +- base/comm/internals/psi_sswaptran.F90 | 10 +- base/comm/internals/psi_zswapdata.F90 | 10 +- base/comm/internals/psi_zswaptran.F90 | 10 +- base/internals/Makefile | 2 +- base/internals/psb_indx_map_fnd_owner.F90 | 36 +- base/internals/psi_desc_impl.f90 | 42 +- base/internals/psi_desc_index.F90 | 21 +- base/internals/psi_extrct_dl.F90 | 10 +- base/modules/Makefile | 11 +- base/modules/desc/psb_desc_mod.F90 | 2 +- base/modules/psi_bcast_mod.F90 | 324 +- base/modules/psi_i_mod.f90 | 48 + base/modules/psi_reduce_mod.F90 | 4168 ++++++++++++++------ base/tools/psb_ccdbldext.F90 | 12 +- base/tools/psb_cdalv.f90 | 3 +- base/tools/psb_csphalo.F90 | 10 +- base/tools/psb_dcdbldext.F90 | 18 +- base/tools/psb_dsphalo.F90 | 32 +- base/tools/psb_scdbldext.F90 | 11 +- base/tools/psb_ssphalo.F90 | 10 +- base/tools/psb_zcdbldext.F90 | 11 +- base/tools/psb_zsphalo.F90 | 10 +- test/fileread/Makefile | 2 +- test/fileread/runs/dfs.inp | 4 +- test/integrationTest/Makefile | 4 +- test/integrationTest/Suites/testSuites.inc | 2 + test/integrationTest/test_psb_broadcast.pf | 1 - test/integrationTest/test_psb_chalo.pf | 1 - test/integrationTest/test_psb_dhalo.pf | 1 - test/integrationTest/test_psb_max.pf | 8 - test/integrationTest/test_psb_shalo.pf | 1 - test/integrationTest/test_psb_zhalo.pf | 1 - test/pargen/Makefile | 2 +- test/pargen/ppde2d.f90 | 7 +- test/pargen/ppde3d.f90 | 6 + test/pargen/runs/ppde.inp | 4 +- test/unitTest/Makefile | 39 - test/unitTest/Suites/testSuites.inc | 1 - test/unitTest/test_psb_swapdata.pf | 1631 -------- util/psb_d_mat_dist_impl.f90 | 103 +- 47 files changed, 4780 insertions(+), 2998 deletions(-) delete mode 100644 test/unitTest/Makefile delete mode 100644 test/unitTest/Suites/testSuites.inc delete mode 100644 test/unitTest/test_psb_swapdata.pf diff --git a/base/comm/internals/psi_cswapdata.F90 b/base/comm/internals/psi_cswapdata.F90 index 29332e1f..fe7f01e2 100644 --- a/base/comm/internals/psi_cswapdata.F90 +++ b/base/comm/internals/psi_cswapdata.F90 @@ -89,6 +89,7 @@ subroutine psi_cswapdatam(flag,n,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -142,7 +143,7 @@ subroutine psi_cswapdatam(flag,n,beta,y,desc_a,work,info,data) goto 9999 end if - if (.false.) then + if (.not.(if_caf2)) then call psi_swapdata(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info) else call psi_swapdata(ictxt,icomm,flag,n,beta,y,d_xchg,info) @@ -188,7 +189,6 @@ subroutine psi_cswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) integer(psb_ipk_) :: ierr(5) character(len=20) :: name - print*,' call psi_cswap_xchg_m' info=psb_success_ name='psi_swap_datav' call psb_erractionsave(err_act) @@ -755,6 +755,7 @@ subroutine psi_cswapdatav(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -808,7 +809,7 @@ subroutine psi_cswapdatav(flag,beta,y,desc_a,work,info,data) call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if - if (.false.) then + if (.not.(if_caf2)) then call psi_swapdata(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info) else call psi_swapdata(ictxt,icomm,flag,beta,y,d_xchg,info) @@ -1374,6 +1375,7 @@ subroutine psi_cswapdata_vect(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -1428,7 +1430,7 @@ subroutine psi_cswapdata_vect(flag,beta,y,desc_a,work,info,data) call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if - if (.false.) then + if (.not.(if_caf2)) then call psi_swapdata(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info) else call psi_swapdata(ictxt,icomm,flag,beta,y,d_xchg,info) diff --git a/base/comm/internals/psi_cswaptran.F90 b/base/comm/internals/psi_cswaptran.F90 index 2a81d241..453c742f 100644 --- a/base/comm/internals/psi_cswaptran.F90 +++ b/base/comm/internals/psi_cswaptran.F90 @@ -93,6 +93,7 @@ subroutine psi_cswaptranm(flag,n,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -147,7 +148,7 @@ subroutine psi_cswaptranm(flag,n,beta,y,desc_a,work,info,data) call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if - if (.false.) then + if (.not.(if_caf2)) then call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info) else call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_xchg,info) @@ -756,6 +757,7 @@ subroutine psi_cswaptranv(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -809,7 +811,7 @@ subroutine psi_cswaptranv(flag,beta,y,desc_a,work,info,data) call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if - if (.false.) then + if (.not.(if_caf2)) then call psi_swaptran(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info) else call psi_swaptran(ictxt,icomm,flag,beta,y,d_xchg,info) @@ -855,7 +857,6 @@ subroutine psi_cswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) integer(psb_ipk_) :: ierr(5) character(len=20) :: name - print*,' call psi_dswaptran_xchg_v' info=psb_success_ name='psi_swap_datav' call psb_erractionsave(err_act) @@ -1393,6 +1394,7 @@ subroutine psi_cswaptran_vect(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -1447,7 +1449,7 @@ subroutine psi_cswaptran_vect(flag,beta,y,desc_a,work,info,data) call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if - if (.false.) then + if (.not.(if_caf2)) then call psi_swaptran(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info) else call psi_swaptran(ictxt,icomm,flag,beta,y,d_xchg,info) diff --git a/base/comm/internals/psi_dswapdata.F90 b/base/comm/internals/psi_dswapdata.F90 index 08f33d69..7231ac43 100644 --- a/base/comm/internals/psi_dswapdata.F90 +++ b/base/comm/internals/psi_dswapdata.F90 @@ -89,6 +89,7 @@ subroutine psi_dswapdatam(flag,n,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -142,7 +143,7 @@ subroutine psi_dswapdatam(flag,n,beta,y,desc_a,work,info,data) goto 9999 end if - if (.false.) then + if (.not.(if_caf2)) then call psi_swapdata(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info) else call psi_swapdata(ictxt,icomm,flag,n,beta,y,d_xchg,info) @@ -190,7 +191,6 @@ subroutine psi_dswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) info=psb_success_ name='psi_swap_xchg_m' - print*,me call psb_erractionsave(err_act) ictxt = iictxt icomm = iicomm @@ -755,6 +755,7 @@ subroutine psi_dswapdatav(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -808,7 +809,7 @@ subroutine psi_dswapdatav(flag,beta,y,desc_a,work,info,data) call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if - if (.false.) then + if (.not.(if_caf2)) then call psi_swapdata(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info) else call psi_swapdata(ictxt,icomm,flag,beta,y,d_xchg,info) @@ -856,7 +857,6 @@ subroutine psi_dswap_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) info=psb_success_ name='psi_swap_xchg_v' - print*, name call psb_erractionsave(err_act) ictxt = iictxt icomm = iicomm @@ -1375,6 +1375,7 @@ subroutine psi_dswapdata_vect(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -1429,7 +1430,7 @@ subroutine psi_dswapdata_vect(flag,beta,y,desc_a,work,info,data) call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if - if (.false.) then + if (.not.(if_caf2)) then call psi_swapdata(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info) else call psi_swapdata(ictxt,icomm,flag,beta,y,d_xchg,info) @@ -1478,7 +1479,6 @@ subroutine psi_dswap_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info) info=psb_success_ name='psi_xchg_vect' - print*,name call psb_erractionsave(err_act) ictxt = iictxt icomm = iicomm @@ -1533,9 +1533,7 @@ subroutine psi_dswap_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info) ! if (allocated(buffer)) deallocate(buffer) !write(*,*) 'Allocating buffer',xchg%max_buffer_size - print*,'allocating buffer', me allocate(buffer(xchg%max_buffer_size)[*],stat=info) - print*,'buffer allocated', me if (allocated(sndbuf)) deallocate(sndbuf) if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info) if (info /= 0) then diff --git a/base/comm/internals/psi_dswaptran.F90 b/base/comm/internals/psi_dswaptran.F90 index b439555e..4a454bbd 100644 --- a/base/comm/internals/psi_dswaptran.F90 +++ b/base/comm/internals/psi_dswaptran.F90 @@ -93,6 +93,7 @@ subroutine psi_dswaptranm(flag,n,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -147,7 +148,7 @@ subroutine psi_dswaptranm(flag,n,beta,y,desc_a,work,info,data) call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if - if (.false.) then + if (.not.(if_caf2)) then call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info) else call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_xchg,info) @@ -195,7 +196,6 @@ subroutine psi_dswaptran_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) info=psb_success_ name='psi_swaptran_datam' - print*,name call psb_erractionsave(err_act) ictxt = iictxt icomm = iicomm @@ -757,6 +757,7 @@ subroutine psi_dswaptranv(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -810,7 +811,7 @@ subroutine psi_dswaptranv(flag,beta,y,desc_a,work,info,data) call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if - if (.false.) then + if (.not.(if_caf2)) then call psi_swaptran(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info) else call psi_swaptran(ictxt,icomm,flag,beta,y,d_xchg,info) @@ -856,10 +857,8 @@ subroutine psi_dswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) integer(psb_ipk_) :: ierr(5) character(len=20) :: name - print*,' call psi_dswaptran_xchg_v' info=psb_success_ name='psi_swaptran_xchg_v' - print*,name call psb_erractionsave(err_act) ictxt = iictxt icomm = iicomm @@ -1395,6 +1394,7 @@ subroutine psi_dswaptran_vect(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -1449,7 +1449,7 @@ subroutine psi_dswaptran_vect(flag,beta,y,desc_a,work,info,data) call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if - if (.false.) then + if (.not.(if_caf2)) then call psi_swaptran(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info) else call psi_swaptran(ictxt,icomm,flag,beta,y,d_xchg,info) @@ -1498,7 +1498,6 @@ subroutine psi_dswaptran_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info) info=psb_success_ name='psi_tran_xchg_vect' - print*,name call psb_erractionsave(err_act) ictxt = iictxt icomm = iicomm diff --git a/base/comm/internals/psi_iswapdata.F90 b/base/comm/internals/psi_iswapdata.F90 index 31afc78b..9e1ff10f 100644 --- a/base/comm/internals/psi_iswapdata.F90 +++ b/base/comm/internals/psi_iswapdata.F90 @@ -89,6 +89,7 @@ subroutine psi_iswapdatam(flag,n,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -107,6 +108,7 @@ subroutine psi_iswapdatam(flag,n,beta,y,desc_a,work,info,data) ! locals integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, data_, err_act integer(psb_ipk_), pointer :: d_idx(:) + class(psb_xch_idx_type), pointer :: d_xchg character(len=20) :: name info=psb_success_ @@ -134,14 +136,19 @@ subroutine psi_iswapdatam(flag,n,beta,y,desc_a,work,info,data) data_ = psb_comm_halo_ end if - call desc_a%get_list(data_,d_idx,totxch,idxr,idxs,info) + call desc_a%get_list(data_,d_idx,totxch,idxr,idxs,info) + if (info == 0) call desc_a%get_list(data_,d_xchg,info) if (info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if + if (.not.(if_caf2)) then + call psi_swapdata(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info) + else + call psi_swapdata(ictxt,icomm,flag,n,beta,y,d_xchg,info) + endif - call psi_swapdata(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info) if (info /= psb_success_) goto 9999 call psb_erractionrestore(err_act) @@ -152,6 +159,174 @@ subroutine psi_iswapdatam(flag,n,beta,y,desc_a,work,info,data) return end subroutine psi_iswapdatam +subroutine psi_iswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) + use psi_mod, psb_protect_name => psi_iswap_xchg_m + use psb_error_mod + use psb_realloc_mod + use psb_desc_mod + use psb_penv_mod + use psb_i_base_vect_mod + use iso_fortran_env + implicit none + integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag, m + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: y(:,:) + integer(psb_ipk_) :: beta + class(psb_xch_idx_type), intent(inout) :: xchg + ! locals + integer(psb_mpik_) :: ictxt, icomm, np, me,& + & proc_to_comm, p2ptag, iret + integer(psb_ipk_) :: nesd, nerv,& + & err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,& + & snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself + integer :: count + integer(psb_ipk_), allocatable, save :: buffer(:)[:], sndbuf(:) + type(event_type), allocatable, save :: ufg(:)[:] + type(event_type), allocatable, save :: clear[:] + integer, save :: last_clear_count = 0 + logical :: swap_mpi, swap_sync, swap_send, swap_recv,& + & albf,do_send,do_recv + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name + + info=psb_success_ + name='psi_swap_datav' + call psb_erractionsave(err_act) + ictxt = iictxt + icomm = iicomm + call psb_info(ictxt,me,np) + if (np == -1) then + info=psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + + n=1 + + swap_mpi = iand(flag,psb_swap_mpi_) /= 0 + swap_sync = iand(flag,psb_swap_sync_) /= 0 + swap_send = iand(flag,psb_swap_send_) /= 0 + swap_recv = iand(flag,psb_swap_recv_) /= 0 + do_send = swap_mpi .or. swap_sync .or. swap_send + do_recv = swap_mpi .or. swap_sync .or. swap_recv + + if (.not.(do_send.and.do_recv)) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='Unimplemented case in xchg_vect') + goto 9999 + end if + + if (.not.allocated(ufg)) then + !write(*,*) 'Allocating events',np + allocate(ufg(np)[*],stat=info) + if (info == 0) allocate(clear[*],stat=info) + if (info /= 0) then + + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='Coarray events allocation') + goto 9999 + end if + else + + if (last_clear_count>0) & + & event wait(clear,until_count=last_clear_count) + end if + + if (psb_size(buffer) < xchg%max_buffer_size) then + ! + ! By construction, max_buffer_size was computed with a collective. + ! + if (allocated(buffer)) deallocate(buffer) + if (allocated(sndbuf)) deallocate(sndbuf) + !write(*,*) 'Allocating buffer',xchg%max_buffer_size + allocate(buffer(xchg%max_buffer_size)[*],stat=info) + if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info) + if (info /= 0) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='Coarray buffer allocation') + goto 9999 + end if + end if + + if (.false.) then + nxch = size(xchg%prcs_xch) + myself = this_image() + do ip = 1, nxch + img = xchg%prcs_xch(ip) + 1 + p1 = xchg%loc_snd_bnd(ip) + p2 = xchg%loc_snd_bnd(ip+1)-1 + rp1 = xchg%rmt_rcv_bnd(ip,1) + rp2 = xchg%rmt_rcv_bnd(ip,2) + isz = p2-p1+1 + !write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2 + call psi_gth(isz,m,xchg%loc_snd_idx(p1:p2),y,buffer(p1:p2)) + event post(ufg(myself)[img]) + end do + + do ip = 1, nxch + img = xchg%prcs_xch(ip) + 1 + event wait(ufg(img)) + img = xchg%prcs_xch(ip) + 1 + p1 = xchg%loc_rcv_bnd(ip) + p2 = xchg%loc_rcv_bnd(ip+1)-1 + isz = p2-p1+1 + rp1 = xchg%rmt_snd_bnd(ip,1) + rp2 = xchg%rmt_snd_bnd(ip,2) + !write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2 + call psi_sct(isz,m,xchg%loc_rcv_idx(p1:p2),buffer(rp1:rp2)[img],beta,y) + event post(clear[img]) + + end do + last_clear_count = nxch + else + nxch = size(xchg%prcs_xch) + myself = this_image() + do ip = 1, nxch + img = xchg%prcs_xch(ip) + 1 + p1 = xchg%loc_snd_bnd(ip) + p2 = xchg%loc_snd_bnd(ip+1)-1 + rp1 = xchg%rmt_rcv_bnd(ip,1) + rp2 = xchg%rmt_rcv_bnd(ip,2) + isz = p2-p1+1 + call psi_gth(isz,m,xchg%loc_snd_idx(p1:p2),& + & y,sndbuf(p1:p2)) + buffer(rp1:rp2)[img] = sndbuf(p1:p2) + end do + ! + ! Doing event post later should provide more opportunities for + ! overlap + ! + do ip= 1, nxch + img = xchg%prcs_xch(ip) + 1 + event post(ufg(myself)[img]) + end do + + do ip = 1, nxch + img = xchg%prcs_xch(ip) + 1 + event wait(ufg(img)) + img = xchg%prcs_xch(ip) + 1 + p1 = xchg%loc_rcv_bnd(ip) + p2 = xchg%loc_rcv_bnd(ip+1)-1 + isz = p2-p1+1 + rp1 = xchg%rmt_snd_bnd(ip,1) + rp2 = xchg%rmt_snd_bnd(ip,2) + !write(0,*) myself,'Getting from ',img,' boundaries: ',p1,p2 + call psi_sct(isz,m,xchg%loc_rcv_idx(p1:p2),& + & buffer(p1:p2),beta, y) + event post(clear[img]) + end do + + last_clear_count = nxch + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ictxt,err_act) + + return +end subroutine psi_iswap_xchg_m + subroutine psi_iswapidxm(iictxt,iicomm,flag,n,beta,y,idx, & & totxch,totsnd,totrcv,work,info) @@ -580,6 +755,7 @@ subroutine psi_iswapdatav(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -598,6 +774,7 @@ subroutine psi_iswapdatav(flag,beta,y,desc_a,work,info,data) ! locals integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, data_, err_act integer(psb_ipk_), pointer :: d_idx(:) + class(psb_xch_idx_type), pointer :: d_xchg character(len=20) :: name info=psb_success_ @@ -626,13 +803,20 @@ subroutine psi_iswapdatav(flag,beta,y,desc_a,work,info,data) data_ = psb_comm_halo_ end if + call desc_a%get_list(data_,d_idx,totxch,idxr,idxs,info) + if (info == 0) call desc_a%get_list(data_,d_xchg,info) if (info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if - call psi_swapdata(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info) + if (.not.(if_caf2)) then + call psi_swapdata(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info) + else + call psi_swapdata(ictxt,icomm,flag,beta,y,d_xchg,info) + endif + if (info /= psb_success_) goto 9999 call psb_erractionrestore(err_act) @@ -644,6 +828,173 @@ subroutine psi_iswapdatav(flag,beta,y,desc_a,work,info,data) end subroutine psi_iswapdatav +subroutine psi_iswap_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) + use psi_mod, psb_protect_name => psi_iswap_xchg_v + use psb_error_mod + use psb_realloc_mod + use psb_desc_mod + use psb_penv_mod + use psb_i_base_vect_mod + use iso_fortran_env + implicit none + integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: y(:) + integer(psb_ipk_) :: beta + class(psb_xch_idx_type), intent(inout) :: xchg + ! locals + integer(psb_mpik_) :: ictxt, icomm, np, me,& + & proc_to_comm, p2ptag, iret + integer(psb_ipk_) :: nesd, nerv,& + & err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,& + & snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself + integer :: count + integer(psb_ipk_), allocatable, save :: buffer(:)[:], sndbuf(:) + type(event_type), allocatable, save :: ufg(:)[:] + type(event_type), allocatable, save :: clear[:] + integer, save :: last_clear_count = 0 + logical :: swap_mpi, swap_sync, swap_send, swap_recv,& + & albf,do_send,do_recv + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name + + info=psb_success_ + name='psi_swap_datav' + call psb_erractionsave(err_act) + ictxt = iictxt + icomm = iicomm + call psb_info(ictxt,me,np) + if (np == -1) then + info=psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + + n=1 + + swap_mpi = iand(flag,psb_swap_mpi_) /= 0 + swap_sync = iand(flag,psb_swap_sync_) /= 0 + swap_send = iand(flag,psb_swap_send_) /= 0 + swap_recv = iand(flag,psb_swap_recv_) /= 0 + do_send = swap_mpi .or. swap_sync .or. swap_send + do_recv = swap_mpi .or. swap_sync .or. swap_recv + + if (.not.(do_send.and.do_recv)) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='Unimplemented case in xchg_vect') + goto 9999 + end if + + if (.not.allocated(ufg)) then + !write(*,*) 'Allocating events',np + allocate(ufg(np)[*],stat=info) + if (info == 0) allocate(clear[*],stat=info) + if (info /= 0) then + + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='Coarray events allocation') + goto 9999 + end if + else + + if (last_clear_count>0) & + & event wait(clear,until_count=last_clear_count) + end if + + if (psb_size(buffer) < xchg%max_buffer_size) then + ! + ! By construction, max_buffer_size was computed with a collective. + ! + if (allocated(buffer)) deallocate(buffer) + if (allocated(sndbuf)) deallocate(sndbuf) + !write(*,*) 'Allocating buffer',xchg%max_buffer_size + allocate(buffer(xchg%max_buffer_size)[*],stat=info) + if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info) + if (info /= 0) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='Coarray buffer allocation') + goto 9999 + end if + end if + + if (.false.) then + nxch = size(xchg%prcs_xch) + myself = this_image() + do ip = 1, nxch + img = xchg%prcs_xch(ip) + 1 + p1 = xchg%loc_snd_bnd(ip) + p2 = xchg%loc_snd_bnd(ip+1)-1 + rp1 = xchg%rmt_rcv_bnd(ip,1) + rp2 = xchg%rmt_rcv_bnd(ip,2) + isz = p2-p1+1 + !write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2 + call psi_gth(isz,xchg%loc_snd_idx(p1:p2),y,buffer(p1:p2)) + event post(ufg(myself)[img]) + end do + + do ip = 1, nxch + img = xchg%prcs_xch(ip) + 1 + event wait(ufg(img)) + img = xchg%prcs_xch(ip) + 1 + p1 = xchg%loc_rcv_bnd(ip) + p2 = xchg%loc_rcv_bnd(ip+1)-1 + isz = p2-p1+1 + rp1 = xchg%rmt_snd_bnd(ip,1) + rp2 = xchg%rmt_snd_bnd(ip,2) + !write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2 + call psi_sct(isz,xchg%loc_rcv_idx(p1:p2),buffer(rp1:rp2)[img],beta,y) + event post(clear[img]) + + end do + last_clear_count = nxch + else + nxch = size(xchg%prcs_xch) + myself = this_image() + do ip = 1, nxch + img = xchg%prcs_xch(ip) + 1 + p1 = xchg%loc_snd_bnd(ip) + p2 = xchg%loc_snd_bnd(ip+1)-1 + rp1 = xchg%rmt_rcv_bnd(ip,1) + rp2 = xchg%rmt_rcv_bnd(ip,2) + isz = p2-p1+1 + call psi_gth(isz,xchg%loc_snd_idx(p1:p2),& + & y,sndbuf(p1:p2)) + buffer(rp1:rp2)[img] = sndbuf(p1:p2) + end do + ! + ! Doing event post later should provide more opportunities for + ! overlap + ! + do ip= 1, nxch + img = xchg%prcs_xch(ip) + 1 + event post(ufg(myself)[img]) + end do + + do ip = 1, nxch + img = xchg%prcs_xch(ip) + 1 + event wait(ufg(img)) + img = xchg%prcs_xch(ip) + 1 + p1 = xchg%loc_rcv_bnd(ip) + p2 = xchg%loc_rcv_bnd(ip+1)-1 + isz = p2-p1+1 + rp1 = xchg%rmt_snd_bnd(ip,1) + rp2 = xchg%rmt_snd_bnd(ip,2) + !write(0,*) myself,'Getting from ',img,' boundaries: ',p1,p2 + call psi_sct(isz,xchg%loc_rcv_idx(p1:p2),& + & buffer(p1:p2),beta, y) + event post(clear[img]) + end do + + last_clear_count = nxch + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ictxt,err_act) + + return +end subroutine psi_iswap_xchg_v ! ! @@ -1026,6 +1377,7 @@ subroutine psi_iswapdata_vect(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -1045,6 +1397,7 @@ subroutine psi_iswapdata_vect(flag,beta,y,desc_a,work,info,data) ! locals integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, data_, err_act class(psb_i_base_vect_type), pointer :: d_vidx + class(psb_xch_idx_type), pointer :: d_xchg character(len=20) :: name info=psb_success_ @@ -1074,12 +1427,18 @@ subroutine psi_iswapdata_vect(flag,beta,y,desc_a,work,info,data) end if call desc_a%get_list(data_,d_vidx,totxch,idxr,idxs,info) + if (info == 0) call desc_a%get_list(data_,d_xchg,info) if (info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if - call psi_swapdata(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info) + if (.not.(if_caf2)) then + call psi_swapdata(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info) + else + call psi_swapdata(ictxt,icomm,flag,beta,y,d_xchg,info) + endif + if (info /= psb_success_) goto 9999 call psb_erractionrestore(err_act) @@ -1090,6 +1449,187 @@ subroutine psi_iswapdata_vect(flag,beta,y,desc_a,work,info,data) return end subroutine psi_iswapdata_vect +subroutine psi_iswap_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info) + use psi_mod, psb_protect_name => psi_iswap_xchg_vect + use psb_error_mod + use psb_realloc_mod + use psb_desc_mod + use psb_penv_mod + use psb_i_base_vect_mod + use iso_fortran_env + implicit none + integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag + integer(psb_ipk_), intent(out) :: info + class(psb_i_base_vect_type) :: y + integer(psb_ipk_) :: beta + class(psb_xch_idx_type), intent(inout) :: xchg + + ! locals + integer(psb_mpik_) :: ictxt, icomm, np, me,& + & proc_to_comm, p2ptag, iret + integer(psb_ipk_) :: nesd, nerv,& + & err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,& + & snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself + integer :: count + integer(psb_ipk_), allocatable, save :: buffer(:)[:], sndbuf(:) + type(event_type), allocatable, save :: ufg(:)[:] + type(event_type), allocatable, save :: clear[:] + integer, save :: last_clear_count = 0 + logical :: swap_mpi, swap_sync, swap_send, swap_recv,& + & albf,do_send,do_recv + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name + + info=psb_success_ + name='psi_xchg_vect' + call psb_erractionsave(err_act) + ictxt = iictxt + icomm = iicomm + + call psb_info(ictxt,me,np) + if (np == -1) then + info=psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + + if (np /= num_images()) then + write(*,*) 'Something is wrong MPI vs CAF ', np, num_images() + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='Num_images /= np') + goto 9999 + end if + + n=1 + + swap_mpi = iand(flag,psb_swap_mpi_) /= 0 + swap_sync = iand(flag,psb_swap_sync_) /= 0 + swap_send = iand(flag,psb_swap_send_) /= 0 + swap_recv = iand(flag,psb_swap_recv_) /= 0 + do_send = swap_mpi .or. swap_sync .or. swap_send + do_recv = swap_mpi .or. swap_sync .or. swap_recv + + if (.not.(do_send.and.do_recv)) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='Unimplemented case in xchg_vect') + goto 9999 + end if + if (.not.allocated(ufg)) then + !write(*,*) 'Allocating events',np + allocate(ufg(np)[*],stat=info) + if (info == 0) allocate(clear[*],stat=info) + if (info /= 0) then + + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='Coarray events allocation') + goto 9999 + end if + else + if (last_clear_count>0) & + & event wait(clear,until_count=last_clear_count) + end if + me = this_image() + if (psb_size(buffer) < xchg%max_buffer_size) then + ! + ! By construction, max_buffer_size was computed with a collective. + ! + if (allocated(buffer)) deallocate(buffer) + !write(*,*) 'Allocating buffer',xchg%max_buffer_size, me + allocate(buffer(xchg%max_buffer_size)[*],stat=info) + if (allocated(sndbuf)) deallocate(sndbuf) + if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info) + if (info /= 0) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='Coarray buffer allocation') + goto 9999 + end if + end if + if (.false.) then + !sync all + nxch = size(xchg%prcs_xch) + myself = this_image() + do ip = 1, nxch + img = xchg%prcs_xch(ip) + 1 + p1 = xchg%loc_snd_bnd(ip) + p2 = xchg%loc_snd_bnd(ip+1)-1 + rp1 = xchg%rmt_rcv_bnd(ip,1) + rp2 = xchg%rmt_rcv_bnd(ip,2) + isz = p2-p1+1 + !write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2 + call y%gth(isz,xchg%loc_snd_idx(p1:p2),buffer(p1:p2)) + event post(ufg(myself)[img]) + end do + + do ip = 1, nxch + img = xchg%prcs_xch(ip) + 1 + event wait(ufg(img)) + img = xchg%prcs_xch(ip) + 1 + p1 = xchg%loc_rcv_bnd(ip) + p2 = xchg%loc_rcv_bnd(ip+1)-1 + isz = p2-p1+1 + rp1 = xchg%rmt_snd_bnd(ip,1) + rp2 = xchg%rmt_snd_bnd(ip,2) + !write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2 + call y%sct(isz,xchg%loc_rcv_idx(p1:p2),buffer(rp1:rp2)[img],beta) + event post(clear[img]) + + end do + last_clear_count = nxch + + else + + nxch = size(xchg%prcs_xch) + myself = this_image() + do ip = 1, nxch + img = xchg%prcs_xch(ip) + 1 + p1 = xchg%loc_snd_bnd(ip) + p2 = xchg%loc_snd_bnd(ip+1)-1 + rp1 = xchg%rmt_rcv_bnd(ip,1) + rp2 = xchg%rmt_rcv_bnd(ip,2) + isz = p2-p1+1 + !write(0,*) myself,'Posting for ',img,' boundaries: ',rp1,rp2 + if (.false.) then + call y%gth(isz,xchg%loc_snd_idx(p1:p2),buffer(rp1:rp2)[img]) + else + call y%gth(isz,xchg%loc_snd_idx(p1:p2),sndbuf(p1:p2)) + buffer(rp1:rp2)[img] = sndbuf(p1:p2) + end if + end do + ! + ! Doing event post later should provide more opportunities for + ! overlap + ! + do ip= 1, nxch + img = xchg%prcs_xch(ip) + 1 + event post(ufg(myself)[img]) + end do + do ip = 1, nxch + img = xchg%prcs_xch(ip) + 1 + event wait(ufg(img)) + img = xchg%prcs_xch(ip) + 1 + p1 = xchg%loc_rcv_bnd(ip) + p2 = xchg%loc_rcv_bnd(ip+1)-1 + isz = p2-p1+1 + rp1 = xchg%rmt_snd_bnd(ip,1) + rp2 = xchg%rmt_snd_bnd(ip,2) + !write(0,*) myself,'Getting from ',img,' boundaries: ',p1,p2 + call y%sct(isz,xchg%loc_rcv_idx(p1:p2),buffer(p1:p2),beta) + event post(clear[img]) + end do + + last_clear_count = nxch + + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ictxt,err_act) + + return + +end subroutine psi_iswap_xchg_vect + ! ! diff --git a/base/comm/internals/psi_iswaptran.F90 b/base/comm/internals/psi_iswaptran.F90 index 6c9907bb..2ad78102 100644 --- a/base/comm/internals/psi_iswaptran.F90 +++ b/base/comm/internals/psi_iswaptran.F90 @@ -93,6 +93,7 @@ subroutine psi_iswaptranm(flag,n,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -111,6 +112,7 @@ subroutine psi_iswaptranm(flag,n,beta,y,desc_a,work,info,data) ! locals integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, err_act, totxch, data_ integer(psb_ipk_), pointer :: d_idx(:) + class(psb_xch_idx_type), pointer :: d_xchg integer(psb_ipk_) :: ierr(5) character(len=20) :: name @@ -141,12 +143,17 @@ subroutine psi_iswaptranm(flag,n,beta,y,desc_a,work,info,data) end if call desc_a%get_list(data_,d_idx,totxch,idxr,idxs,info) + if (info == 0) call desc_a%get_list(data_,d_xchg,info) if (info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if + if (.not.(if_caf2)) then + call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info) + else + call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_xchg,info) + endif - call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info) if (info /= psb_success_) goto 9999 call psb_erractionrestore(err_act) @@ -157,6 +164,174 @@ subroutine psi_iswaptranm(flag,n,beta,y,desc_a,work,info,data) return end subroutine psi_iswaptranm +subroutine psi_iswaptran_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) + use psi_mod, psb_protect_name => psi_iswaptran_xchg_m + use psb_error_mod + use psb_realloc_mod + use psb_desc_mod + use psb_penv_mod + use psb_i_base_vect_mod + use iso_fortran_env + implicit none + integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag, m + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: y(:,:) + integer(psb_ipk_) :: beta + class(psb_xch_idx_type), intent(inout) :: xchg + ! locals + integer(psb_mpik_) :: ictxt, icomm, np, me,& + & proc_to_comm, p2ptag, iret + integer(psb_ipk_) :: nesd, nerv,& + & err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,& + & snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself + integer :: count + integer(psb_ipk_), allocatable, save :: buffer(:)[:], sndbuf(:) + type(event_type), allocatable, save :: ufg(:)[:] + type(event_type), allocatable, save :: clear[:] + integer, save :: last_clear_count = 0 + logical :: swap_mpi, swap_sync, swap_send, swap_recv,& + & albf,do_send,do_recv + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name + + info=psb_success_ + name='psi_swaptran_datam' + call psb_erractionsave(err_act) + ictxt = iictxt + icomm = iicomm + call psb_info(ictxt,me,np) + if (np == -1) then + info=psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + + n=1 + + swap_mpi = iand(flag,psb_swap_mpi_) /= 0 + swap_sync = iand(flag,psb_swap_sync_) /= 0 + swap_send = iand(flag,psb_swap_send_) /= 0 + swap_recv = iand(flag,psb_swap_recv_) /= 0 + do_send = swap_mpi .or. swap_sync .or. swap_send + do_recv = swap_mpi .or. swap_sync .or. swap_recv + + if (.not.(do_send.and.do_recv)) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='Unimplemented case in tran_xchg_vm') + goto 9999 + end if + + if (.not.allocated(ufg)) then + !write(*,*) 'Allocating events',np + allocate(ufg(np)[*],stat=info) + if (info == 0) allocate(clear[*],stat=info) + if (info /= 0) then + + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='Coarray events allocation') + goto 9999 + end if + else + + if (last_clear_count>0) & + & event wait(clear,until_count=last_clear_count) + end if + + if (psb_size(buffer) < xchg%max_buffer_size) then + ! + ! By construction, max_buffer_size was computed with a collective. + ! + if (allocated(buffer)) deallocate(buffer) + if (allocated(sndbuf)) deallocate(sndbuf) + !write(*,*) 'Allocating buffer',xchg%max_buffer_size + allocate(buffer(xchg%max_buffer_size)[*],stat=info) + if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info) + if (info /= 0) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='Coarray buffer allocation') + goto 9999 + end if + end if + + if (.false.) then + nxch = size(xchg%prcs_xch) + myself = this_image() + do ip = 1, nxch + img = xchg%prcs_xch(ip) + 1 + p1 = xchg%loc_rcv_bnd(ip) + p2 = xchg%loc_rcv_bnd(ip+1)-1 + rp1 = xchg%rmt_snd_bnd(ip,1) + rp2 = xchg%rmt_snd_bnd(ip,2) + isz = p2-p1+1 + !write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2 + call psi_gth(isz,m,xchg%loc_rcv_idx(p1:p2),y,buffer(p1:p2)) + event post(ufg(myself)[img]) + end do + + do ip = 1, nxch + img = xchg%prcs_xch(ip) + 1 + event wait(ufg(img)) + img = xchg%prcs_xch(ip) + 1 + p1 = xchg%loc_snd_bnd(ip) + p2 = xchg%loc_snd_bnd(ip+1)-1 + isz = p2-p1+1 + rp1 = xchg%rmt_rcv_bnd(ip,1) + rp2 = xchg%rmt_rcv_bnd(ip,2) + !write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2 + call psi_sct(isz,m,xchg%loc_snd_idx(p1:p2),buffer(rp1:rp2)[img],beta,y) + event post(clear[img]) + + end do + last_clear_count = nxch + else + nxch = size(xchg%prcs_xch) + myself = this_image() + do ip = 1, nxch + img = xchg%prcs_xch(ip) + 1 + p1 = xchg%loc_rcv_bnd(ip) + p2 = xchg%loc_rcv_bnd(ip+1)-1 + rp1 = xchg%rmt_snd_bnd(ip,1) + rp2 = xchg%rmt_snd_bnd(ip,2) + isz = p2-p1+1 + call psi_gth(isz,m,xchg%loc_rcv_idx(p1:p2),& + & y,sndbuf(p1:p2)) + buffer(rp1:rp2)[img] = sndbuf(p1:p2) + end do + ! + ! Doing event post later should provide more opportunities for + ! overlap + ! + do ip= 1, nxch + img = xchg%prcs_xch(ip) + 1 + event post(ufg(myself)[img]) + end do + + do ip = 1, nxch + img = xchg%prcs_xch(ip) + 1 + event wait(ufg(img)) + img = xchg%prcs_xch(ip) + 1 + p1 = xchg%loc_snd_bnd(ip) + p2 = xchg%loc_snd_bnd(ip+1)-1 + isz = p2-p1+1 + rp1 = xchg%rmt_rcv_bnd(ip,1) + rp2 = xchg%rmt_rcv_bnd(ip,2) + !write(0,*) myself,'Getting from ',img,' boundaries: ',p1,p2 + call psi_sct(isz,m,xchg%loc_snd_idx(p1:p2),& + & buffer(p1:p2),beta, y) + event post(clear[img]) + end do + last_clear_count = nxch + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ictxt,err_act) + + return +end subroutine psi_iswaptran_xchg_m + + subroutine psi_itranidxm(iictxt,iicomm,flag,n,beta,y,idx,totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_itranidxm @@ -583,6 +758,7 @@ subroutine psi_iswaptranv(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -601,6 +777,7 @@ subroutine psi_iswaptranv(flag,beta,y,desc_a,work,info,data) ! locals integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, err_act, data_ integer(psb_ipk_), pointer :: d_idx(:) + class(psb_xch_idx_type), pointer :: d_xchg integer(psb_ipk_) :: ierr(5) character(len=20) :: name @@ -630,12 +807,18 @@ subroutine psi_iswaptranv(flag,beta,y,desc_a,work,info,data) end if call desc_a%get_list(data_,d_idx,totxch,idxr,idxs,info) + if (info == 0) call desc_a%get_list(data_,d_xchg,info) if (info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if - call psi_swaptran(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info) + if (.not.(if_caf2)) then + call psi_swaptran(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info) + else + call psi_swaptran(ictxt,icomm,flag,beta,y,d_xchg,info) + endif + if (info /= psb_success_) goto 9999 call psb_erractionrestore(err_act) @@ -646,6 +829,172 @@ subroutine psi_iswaptranv(flag,beta,y,desc_a,work,info,data) return end subroutine psi_iswaptranv +subroutine psi_iswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) + use psi_mod, psb_protect_name => psi_iswaptran_xchg_v + use psb_error_mod + use psb_realloc_mod + use psb_desc_mod + use psb_penv_mod + use psb_i_base_vect_mod + use iso_fortran_env + implicit none + integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: y(:) + integer(psb_ipk_) :: beta + class(psb_xch_idx_type), intent(inout) :: xchg + ! locals + integer(psb_mpik_) :: ictxt, icomm, np, me,& + & proc_to_comm, p2ptag, iret + integer(psb_ipk_) :: nesd, nerv,& + & err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,& + & snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself + integer :: count + integer(psb_ipk_), allocatable, save :: buffer(:)[:], sndbuf(:) + type(event_type), allocatable, save :: ufg(:)[:] + type(event_type), allocatable, save :: clear[:] + integer, save :: last_clear_count = 0 + logical :: swap_mpi, swap_sync, swap_send, swap_recv,& + & albf,do_send,do_recv + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name + + info=psb_success_ + name='psi_swap_datav' + call psb_erractionsave(err_act) + ictxt = iictxt + icomm = iicomm + call psb_info(ictxt,me,np) + if (np == -1) then + info=psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + + n=1 + + swap_mpi = iand(flag,psb_swap_mpi_) /= 0 + swap_sync = iand(flag,psb_swap_sync_) /= 0 + swap_send = iand(flag,psb_swap_send_) /= 0 + swap_recv = iand(flag,psb_swap_recv_) /= 0 + do_send = swap_mpi .or. swap_sync .or. swap_send + do_recv = swap_mpi .or. swap_sync .or. swap_recv + + if (.not.(do_send.and.do_recv)) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='Unimplemented case in xchg_vect') + goto 9999 + end if + + if (.not.allocated(ufg)) then + !write(*,*) 'Allocating events',np + allocate(ufg(np)[*],stat=info) + if (info == 0) allocate(clear[*],stat=info) + if (info /= 0) then + + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='Coarray events allocation') + goto 9999 + end if + else + + if (last_clear_count>0) & + & event wait(clear,until_count=last_clear_count) + end if + + if (psb_size(buffer) < xchg%max_buffer_size) then + ! + ! By construction, max_buffer_size was computed with a collective. + ! + if (allocated(buffer)) deallocate(buffer) + if (allocated(sndbuf)) deallocate(sndbuf) + !write(*,*) 'Allocating buffer',xchg%max_buffer_size + allocate(buffer(xchg%max_buffer_size)[*],stat=info) + if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info) + if (info /= 0) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='Coarray buffer allocation') + goto 9999 + end if + end if + + if (.false.) then + nxch = size(xchg%prcs_xch) + myself = this_image() + do ip = 1, nxch + img = xchg%prcs_xch(ip) + 1 + p1 = xchg%loc_rcv_bnd(ip) + p2 = xchg%loc_rcv_bnd(ip+1)-1 + rp1 = xchg%rmt_snd_bnd(ip,1) + rp2 = xchg%rmt_snd_bnd(ip,2) + isz = p2-p1+1 + !write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2 + call psi_gth(isz,xchg%loc_rcv_idx(p1:p2),y,buffer(p1:p2)) + event post(ufg(myself)[img]) + end do + + do ip = 1, nxch + img = xchg%prcs_xch(ip) + 1 + event wait(ufg(img)) + img = xchg%prcs_xch(ip) + 1 + p1 = xchg%loc_snd_bnd(ip) + p2 = xchg%loc_snd_bnd(ip+1)-1 + isz = p2-p1+1 + rp1 = xchg%rmt_rcv_bnd(ip,1) + rp2 = xchg%rmt_rcv_bnd(ip,2) + !write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2 + call psi_sct(isz,xchg%loc_snd_idx(p1:p2),buffer(rp1:rp2)[img],beta,y) + event post(clear[img]) + + end do + last_clear_count = nxch + else + nxch = size(xchg%prcs_xch) + myself = this_image() + do ip = 1, nxch + img = xchg%prcs_xch(ip) + 1 + p1 = xchg%loc_rcv_bnd(ip) + p2 = xchg%loc_rcv_bnd(ip+1)-1 + rp1 = xchg%rmt_snd_bnd(ip,1) + rp2 = xchg%rmt_snd_bnd(ip,2) + isz = p2-p1+1 + call psi_gth(isz,xchg%loc_rcv_idx(p1:p2),& + & y,sndbuf(p1:p2)) + buffer(rp1:rp2)[img] = sndbuf(p1:p2) + end do + ! + ! Doing event post later should provide more opportunities for + ! overlap + ! + do ip= 1, nxch + img = xchg%prcs_xch(ip) + 1 + event post(ufg(myself)[img]) + end do + + do ip = 1, nxch + img = xchg%prcs_xch(ip) + 1 + event wait(ufg(img)) + img = xchg%prcs_xch(ip) + 1 + p1 = xchg%loc_snd_bnd(ip) + p2 = xchg%loc_snd_bnd(ip+1)-1 + isz = p2-p1+1 + rp1 = xchg%rmt_rcv_bnd(ip,1) + rp2 = xchg%rmt_rcv_bnd(ip,2) + !write(0,*) myself,'Getting from ',img,' boundaries: ',p1,p2 + call psi_sct(isz,xchg%loc_snd_idx(p1:p2),& + & buffer(p1:p2),beta, y) + event post(clear[img]) + end do + last_clear_count = nxch + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ictxt,err_act) + + return +end subroutine psi_iswaptran_xchg_v ! ! @@ -658,7 +1007,6 @@ end subroutine psi_iswaptranv ! ! - subroutine psi_itranidxv(iictxt,iicomm,flag,beta,y,idx,totxch,totsnd,totrcv,work,info) use psi_mod, psb_protect_name => psi_itranidxv @@ -1047,6 +1395,7 @@ subroutine psi_iswaptran_vect(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -1066,6 +1415,7 @@ subroutine psi_iswaptran_vect(flag,beta,y,desc_a,work,info,data) ! locals integer(psb_ipk_) :: ictxt, np, me, icomm, idxs, idxr, totxch, err_act, data_ class(psb_i_base_vect_type), pointer :: d_vidx + class(psb_xch_idx_type), pointer :: d_xchg integer(psb_ipk_) :: ierr(5) character(len=20) :: name @@ -1095,12 +1445,18 @@ subroutine psi_iswaptran_vect(flag,beta,y,desc_a,work,info,data) end if call desc_a%get_list(data_,d_vidx,totxch,idxr,idxs,info) + if (info == 0) call desc_a%get_list(data_,d_xchg,info) if (info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if - call psi_swaptran(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info) + if (.not.(if_caf2)) then + call psi_swaptran(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info) + else + call psi_swaptran(ictxt,icomm,flag,beta,y,d_xchg,info) + endif + if (info /= psb_success_) goto 9999 call psb_erractionrestore(err_act) @@ -1112,7 +1468,187 @@ subroutine psi_iswaptran_vect(flag,beta,y,desc_a,work,info,data) end subroutine psi_iswaptran_vect +subroutine psi_iswaptran_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info) + use psi_mod, psb_protect_name => psi_iswaptran_xchg_vect + use psb_error_mod + use psb_realloc_mod + use psb_desc_mod + use psb_penv_mod + use psb_i_base_vect_mod + use iso_fortran_env + implicit none + integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag + integer(psb_ipk_), intent(out) :: info + class(psb_i_base_vect_type) :: y + integer(psb_ipk_) :: beta + class(psb_xch_idx_type), intent(inout) :: xchg + + ! locals + integer(psb_mpik_) :: ictxt, icomm, np, me,& + & proc_to_comm, p2ptag, iret + integer(psb_ipk_) :: nesd, nerv,& + & err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,& + & snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself + integer :: count + integer(psb_ipk_), allocatable, save :: buffer(:)[:], sndbuf(:) + type(event_type), allocatable, save :: ufg(:)[:] + type(event_type), allocatable, save :: clear[:] + integer, save :: last_clear_count = 0 + logical :: swap_mpi, swap_sync, swap_send, swap_recv,& + & albf,do_send,do_recv + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name + + info=psb_success_ + name='psi_tran_xchg_vect' + call psb_erractionsave(err_act) + ictxt = iictxt + icomm = iicomm + + call psb_info(ictxt,me,np) + if (np == -1) then + info=psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + + if (np /= num_images()) then + write(*,*) 'Something is wrong MPI vs CAF ', np, num_images() + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='Num_images /= np') + goto 9999 + end if + + n=1 + + swap_mpi = iand(flag,psb_swap_mpi_) /= 0 + swap_sync = iand(flag,psb_swap_sync_) /= 0 + swap_send = iand(flag,psb_swap_send_) /= 0 + swap_recv = iand(flag,psb_swap_recv_) /= 0 + do_send = swap_mpi .or. swap_sync .or. swap_send + do_recv = swap_mpi .or. swap_sync .or. swap_recv + + if (.not.(do_send.and.do_recv)) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='Unimplemented case in xchg_vect') + goto 9999 + end if + + if (.not.allocated(ufg)) then + !write(*,*) 'Allocating events',np + allocate(ufg(np)[*],stat=info) + if (info == 0) allocate(clear[*],stat=info) + if (info /= 0) then + + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='Coarray events allocation') + goto 9999 + end if + else + if (last_clear_count>0) & + & event wait(clear,until_count=last_clear_count) + end if + if (psb_size(buffer) < xchg%max_buffer_size) then + ! + ! By construction, max_buffer_size was computed with a collective. + ! + if (allocated(buffer)) deallocate(buffer) + !write(*,*) 'Allocating buffer',xchg%max_buffer_size + allocate(buffer(xchg%max_buffer_size)[*],stat=info) + if (allocated(sndbuf)) deallocate(sndbuf) + if (info == 0) allocate(sndbuf(xchg%max_buffer_size),stat=info) + if (info /= 0) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='Coarray buffer allocation') + goto 9999 + end if + end if + if (.false.) then + !sync all + nxch = size(xchg%prcs_xch) + myself = this_image() + do ip = 1, nxch + img = xchg%prcs_xch(ip) + 1 + p1 = xchg%loc_rcv_bnd(ip) + p2 = xchg%loc_rcv_bnd(ip+1)-1 + rp1 = xchg%rmt_snd_bnd(ip,1) + rp2 = xchg%rmt_snd_bnd(ip,2) + isz = p2-p1+1 + !write(0,*) myself,'Posting for ',img,' boundaries: ',p1,p2 + call y%gth(isz,xchg%loc_rcv_idx(p1:p2),buffer(p1:p2)) + event post(ufg(myself)[img]) + end do + + do ip = 1, nxch + img = xchg%prcs_xch(ip) + 1 + event wait(ufg(img)) + img = xchg%prcs_xch(ip) + 1 + p1 = xchg%loc_snd_bnd(ip) + p2 = xchg%loc_snd_bnd(ip+1)-1 + isz = p2-p1+1 + rp1 = xchg%rmt_rcv_bnd(ip,1) + rp2 = xchg%rmt_rcv_bnd(ip,2) + !write(0,*) myself,'Getting from ',img,'Remote boundaries: ',rp1,rp2 + call y%sct(isz,xchg%loc_snd_idx(p1:p2),buffer(rp1:rp2)[img],beta) + event post(clear[img]) + + end do + last_clear_count = nxch + + else + + nxch = size(xchg%prcs_xch) + myself = this_image() + do ip = 1, nxch + img = xchg%prcs_xch(ip) + 1 + p1 = xchg%loc_rcv_bnd(ip) + p2 = xchg%loc_rcv_bnd(ip+1)-1 + rp1 = xchg%rmt_snd_bnd(ip,1) + rp2 = xchg%rmt_snd_bnd(ip,2) + isz = p2-p1+1 + !write(0,*) myself,'Posting for ',img,' boundaries: ',rp1,rp2 + if (.false.) then + call y%gth(isz,xchg%loc_rcv_idx(p1:p2),buffer(rp1:rp2)[img]) + else + call y%gth(isz,xchg%loc_rcv_idx(p1:p2),sndbuf(p1:p2)) + buffer(rp1:rp2)[img] = sndbuf(p1:p2) + end if + end do + ! + ! Doing event post later should provide more opportunities for + ! overlap + ! + do ip= 1, nxch + img = xchg%prcs_xch(ip) + 1 + event post(ufg(myself)[img]) + end do + + do ip = 1, nxch + img = xchg%prcs_xch(ip) + 1 + event wait(ufg(img)) + img = xchg%prcs_xch(ip) + 1 + p1 = xchg%loc_snd_bnd(ip) + p2 = xchg%loc_snd_bnd(ip+1)-1 + isz = p2-p1+1 + rp1 = xchg%rmt_rcv_bnd(ip,1) + rp2 = xchg%rmt_rcv_bnd(ip,2) + !write(0,*) myself,'Getting from ',img,' boundaries: ',p1,p2 + call y%sct(isz,xchg%loc_snd_idx(p1:p2),buffer(p1:p2),beta) + event post(clear[img]) + end do + + last_clear_count = nxch + + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ictxt,err_act) + + return +end subroutine psi_iswaptran_xchg_vect ! ! ! Subroutine: psi_itran_vidx_vect diff --git a/base/comm/internals/psi_sswapdata.F90 b/base/comm/internals/psi_sswapdata.F90 index b5421fa9..c303af82 100644 --- a/base/comm/internals/psi_sswapdata.F90 +++ b/base/comm/internals/psi_sswapdata.F90 @@ -89,6 +89,7 @@ subroutine psi_sswapdatam(flag,n,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -142,7 +143,7 @@ subroutine psi_sswapdatam(flag,n,beta,y,desc_a,work,info,data) goto 9999 end if - if (.false.) then + if (.not.(if_caf2)) then call psi_swapdata(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info) else call psi_swapdata(ictxt,icomm,flag,n,beta,y,d_xchg,info) @@ -188,7 +189,6 @@ subroutine psi_sswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) integer(psb_ipk_) :: ierr(5) character(len=20) :: name - print*,' call psi_sswap_xchg_m' info=psb_success_ name='psi_swap_datav' call psb_erractionsave(err_act) @@ -755,6 +755,7 @@ subroutine psi_sswapdatav(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -808,7 +809,7 @@ subroutine psi_sswapdatav(flag,beta,y,desc_a,work,info,data) call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if - if (.false.) then + if (.not.(if_caf2)) then call psi_swapdata(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info) else call psi_swapdata(ictxt,icomm,flag,beta,y,d_xchg,info) @@ -1374,6 +1375,7 @@ subroutine psi_sswapdata_vect(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -1428,7 +1430,7 @@ subroutine psi_sswapdata_vect(flag,beta,y,desc_a,work,info,data) call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if - if (.false.) then + if (.not.(if_caf2)) then call psi_swapdata(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info) else call psi_swapdata(ictxt,icomm,flag,beta,y,d_xchg,info) diff --git a/base/comm/internals/psi_sswaptran.F90 b/base/comm/internals/psi_sswaptran.F90 index 917e9147..3b127ef2 100644 --- a/base/comm/internals/psi_sswaptran.F90 +++ b/base/comm/internals/psi_sswaptran.F90 @@ -93,6 +93,7 @@ subroutine psi_sswaptranm(flag,n,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -147,7 +148,7 @@ subroutine psi_sswaptranm(flag,n,beta,y,desc_a,work,info,data) call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if - if (.false.) then + if (.not.(if_caf2)) then call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info) else call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_xchg,info) @@ -756,6 +757,7 @@ subroutine psi_sswaptranv(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -809,7 +811,7 @@ subroutine psi_sswaptranv(flag,beta,y,desc_a,work,info,data) call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if - if (.false.) then + if (.not.(if_caf2)) then call psi_swaptran(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info) else call psi_swaptran(ictxt,icomm,flag,beta,y,d_xchg,info) @@ -855,7 +857,6 @@ subroutine psi_sswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) integer(psb_ipk_) :: ierr(5) character(len=20) :: name - print*,' call psi_dswaptran_xchg_v' info=psb_success_ name='psi_swap_datav' call psb_erractionsave(err_act) @@ -1393,6 +1394,7 @@ subroutine psi_sswaptran_vect(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -1447,7 +1449,7 @@ subroutine psi_sswaptran_vect(flag,beta,y,desc_a,work,info,data) call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if - if (.false.) then + if (.not.(if_caf2)) then call psi_swaptran(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info) else call psi_swaptran(ictxt,icomm,flag,beta,y,d_xchg,info) diff --git a/base/comm/internals/psi_zswapdata.F90 b/base/comm/internals/psi_zswapdata.F90 index 8c4da6a6..28bf0722 100644 --- a/base/comm/internals/psi_zswapdata.F90 +++ b/base/comm/internals/psi_zswapdata.F90 @@ -89,6 +89,7 @@ subroutine psi_zswapdatam(flag,n,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -142,7 +143,7 @@ subroutine psi_zswapdatam(flag,n,beta,y,desc_a,work,info,data) goto 9999 end if - if (.false.) then + if (.not.(if_caf2)) then call psi_swapdata(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info) else call psi_swapdata(ictxt,icomm,flag,n,beta,y,d_xchg,info) @@ -188,7 +189,6 @@ subroutine psi_zswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) integer(psb_ipk_) :: ierr(5) character(len=20) :: name - print*,' call psi_zswap_xchg_m' info=psb_success_ name='psi_swap_datav' call psb_erractionsave(err_act) @@ -755,6 +755,7 @@ subroutine psi_zswapdatav(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -808,7 +809,7 @@ subroutine psi_zswapdatav(flag,beta,y,desc_a,work,info,data) call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if - if (.false.) then + if (.not.(if_caf2)) then call psi_swapdata(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info) else call psi_swapdata(ictxt,icomm,flag,beta,y,d_xchg,info) @@ -1374,6 +1375,7 @@ subroutine psi_zswapdata_vect(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -1428,7 +1430,7 @@ subroutine psi_zswapdata_vect(flag,beta,y,desc_a,work,info,data) call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if - if (.false.) then + if (.not.(if_caf2)) then call psi_swapdata(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info) else call psi_swapdata(ictxt,icomm,flag,beta,y,d_xchg,info) diff --git a/base/comm/internals/psi_zswaptran.F90 b/base/comm/internals/psi_zswaptran.F90 index 103f5a3d..56c2f436 100644 --- a/base/comm/internals/psi_zswaptran.F90 +++ b/base/comm/internals/psi_zswaptran.F90 @@ -93,6 +93,7 @@ subroutine psi_zswaptranm(flag,n,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -147,7 +148,7 @@ subroutine psi_zswaptranm(flag,n,beta,y,desc_a,work,info,data) call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if - if (.false.) then + if (.not.(if_caf2)) then call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_idx,totxch,idxs,idxr,work,info) else call psi_swaptran(ictxt,icomm,flag,n,beta,y,d_xchg,info) @@ -756,6 +757,7 @@ subroutine psi_zswaptranv(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -809,7 +811,7 @@ subroutine psi_zswaptranv(flag,beta,y,desc_a,work,info,data) call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if - if (.false.) then + if (.not.(if_caf2)) then call psi_swaptran(ictxt,icomm,flag,beta,y,d_idx,totxch,idxs,idxr,work,info) else call psi_swaptran(ictxt,icomm,flag,beta,y,d_xchg,info) @@ -855,7 +857,6 @@ subroutine psi_zswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) integer(psb_ipk_) :: ierr(5) character(len=20) :: name - print*,' call psi_dswaptran_xchg_v' info=psb_success_ name='psi_swap_datav' call psb_erractionsave(err_act) @@ -1393,6 +1394,7 @@ subroutine psi_zswaptran_vect(flag,beta,y,desc_a,work,info,data) use psb_error_mod use psb_desc_mod use psb_penv_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -1447,7 +1449,7 @@ subroutine psi_zswaptran_vect(flag,beta,y,desc_a,work,info,data) call psb_errpush(psb_err_internal_error_,name,a_err='psb_cd_get_list') goto 9999 end if - if (.false.) then + if (.not.(if_caf2)) then call psi_swaptran(ictxt,icomm,flag,beta,y,d_vidx,totxch,idxs,idxr,work,info) else call psi_swaptran(ictxt,icomm,flag,beta,y,d_xchg,info) diff --git a/base/internals/Makefile b/base/internals/Makefile index 1230aca1..37edd4d3 100644 --- a/base/internals/Makefile +++ b/base/internals/Makefile @@ -37,7 +37,7 @@ lib: mpfobjs $(FOBJS) $(FOBJS2) $(COBJS) $(MPFOBJS2) $(MPFOBJS) $(RANLIB) $(LIBDIR)/$(LIBNAME) $(FOBJS) $(FBOJS2): $(MODDIR)/psi_mod.o -mpfobjs: +mpfobjs: (make $(MPFOBJS) F90="$(MPF90)" FC="$(MPF90)" FCOPT="$(F90COPT)") (make $(FOBJS2) F90="$(MPF77)" FC="$(MPF77)" FCOPT="$(FCOPT)") clean: diff --git a/base/internals/psb_indx_map_fnd_owner.F90 b/base/internals/psb_indx_map_fnd_owner.F90 index 1731b732..582431bc 100644 --- a/base/internals/psb_indx_map_fnd_owner.F90 +++ b/base/internals/psb_indx_map_fnd_owner.F90 @@ -1,4 +1,4 @@ -!!$ +!ii!$ !!$ Parallel Sparse BLAS version 3.4 !!$ (C) Copyright 2006, 2010, 2015 !!$ Salvatore Filippone University of Rome Tor Vergata @@ -51,6 +51,7 @@ subroutine psb_indx_map_fnd_owner(idx,iprc,idxmap,info) use psb_error_mod use psb_penv_mod use psb_realloc_mod + use psb_caf_mod use psb_indx_map_mod, psb_protect_name => psb_indx_map_fnd_owner #ifdef MPI_MOD use mpi @@ -168,10 +169,13 @@ subroutine psb_indx_map_fnd_owner(idx,iprc,idxmap,info) if (gettime) then t3 = psb_wtime() end if - - call mpi_allgatherv(idx,hsz(me+1),psb_mpi_ipk_integer,& - & hproc,hsz,hidx,psb_mpi_ipk_integer,& - & icomm,minfo) + if (if_caf) then + call caf_allgatherv(idx, hsz(me+1), hproc, hsz, hidx, info) + else + call mpi_allgatherv(idx,hsz(me+1),psb_mpi_ipk_integer,& + & hproc,hsz,hidx,psb_mpi_ipk_integer,& + & icomm,minfo) + endif if (gettime) then tamx = psb_wtime() - t3 end if @@ -211,11 +215,15 @@ subroutine psb_indx_map_fnd_owner(idx,iprc,idxmap,info) if (gettime) then t3 = psb_wtime() end if + ! Collect all the answers with alltoallv (need sizes) - call mpi_alltoall(sdsz,1,psb_mpi_def_integer,& - & rvsz,1,psb_mpi_def_integer,icomm,minfo) - + if (if_caf) then + call caf_alltoall(sdsz,rvsz, 1, minfo) + else + call mpi_alltoall(sdsz,1,psb_mpi_def_integer,& + & rvsz,1,psb_mpi_def_integer,icomm,minfo) + endif isz = sum(rvsz) allocate(answers(isz,2),idxsrch(nv,2),stat=info) @@ -228,9 +236,15 @@ subroutine psb_indx_map_fnd_owner(idx,iprc,idxmap,info) rvidx(ip) = j j = j + rvsz(ip) end do - call mpi_alltoallv(hproc,sdsz,sdidx,psb_mpi_ipk_integer,& - & answers(:,1),rvsz,rvidx,psb_mpi_ipk_integer,& - & icomm,minfo) + print*,'---------hproc', hproc,'sdsz',sdsz,'sdidx',sdidx,'answers',answers(:,1), 'rvsz', rvsz,'rvidx', rvidx, this_image() + + if (if_caf) then + call caf_alltoallv(hproc, sdsz,sdidx, answers(:,1),rvsz,rvidx, minfo) + else + call mpi_alltoallv(hproc,sdsz,sdidx,psb_mpi_ipk_integer,& + & answers(:,1),rvsz,rvidx,psb_mpi_ipk_integer,& + & icomm,minfo) + endif if (gettime) then tamx = psb_wtime() - t3 + tamx end if diff --git a/base/internals/psi_desc_impl.f90 b/base/internals/psi_desc_impl.f90 index a172c6c9..800188a9 100644 --- a/base/internals/psi_desc_impl.f90 +++ b/base/internals/psi_desc_impl.f90 @@ -63,20 +63,23 @@ end subroutine psi_renum_index subroutine psi_cnv_v2xch(ictxt, vidx_in, xch_idx,info) use psi_mod, psi_protect_name => psi_cnv_v2xch use psb_realloc_mod + use psb_caf_mod implicit none integer(psb_ipk_), intent(in) :: ictxt, vidx_in(:) type(psb_xch_idx_type), intent(inout) :: xch_idx integer(psb_ipk_), intent(out) :: info ! ....local scalars.... - integer(psb_ipk_) :: np, me + integer(psb_ipk_) :: np, me, img integer(psb_ipk_) :: err_act integer(psb_ipk_) :: nxch, nsnd, nrcv, nesd,nerv, ip, j, k, ixch ! ...parameters integer(psb_ipk_) :: debug_level, debug_unit logical, parameter :: debug=.false. character(len=20) :: name - + integer(psb_ipk_), allocatable :: buf_rmt_rcv_bnd(:)[:], buf_rmt_snd_bnd(:)[:] + type(event_type), allocatable, save :: snd_done(:)[:] + type(event_type), save :: rcv_done[*] name='psi_cnv_v2xch' call psb_get_erraction(err_act) debug_level = psb_get_debug_level() @@ -85,6 +88,8 @@ subroutine psi_cnv_v2xch(ictxt, vidx_in, xch_idx,info) info = psb_success_ call psb_info(ictxt,me,np) + me = this_image() + np = num_images() if (np == -1) then info = psb_err_context_error_ call psb_errpush(info,name) @@ -114,6 +119,12 @@ subroutine psi_cnv_v2xch(ictxt, vidx_in, xch_idx,info) ixch = 1 xch_idx%loc_snd_bnd(1) = 1 xch_idx%loc_rcv_bnd(1) = 1 + if (if_caf) then + if (allocated(buf_rmt_rcv_bnd)) deallocate(buf_rmt_rcv_bnd) + if (allocated(buf_rmt_snd_bnd)) deallocate(buf_rmt_snd_bnd) + if (allocated(snd_done)) deallocate(snd_done) + allocate(buf_rmt_rcv_bnd(np*2)[*], buf_rmt_snd_bnd(np*2)[*], snd_done(np)[*]) + endif do if (ip > size(vidx_in)) then write(psb_err_unit,*) trim(name),': Warning: out of size of input vector ' @@ -131,17 +142,32 @@ subroutine psi_cnv_v2xch(ictxt, vidx_in, xch_idx,info) & vidx_in(ip+nerv+psb_n_elem_send_+1:ip+nerv+psb_n_elem_send_+nesd) xch_idx%loc_rcv_bnd(ixch+1) = xch_idx%loc_rcv_bnd(ixch) + nerv xch_idx%loc_snd_bnd(ixch+1) = xch_idx%loc_snd_bnd(ixch) + nesd - call psb_snd(ictxt,xch_idx%loc_rcv_bnd(ixch:ixch+1),xch_idx%prcs_xch(ixch)) - call psb_snd(ictxt,xch_idx%loc_snd_bnd(ixch:ixch+1),xch_idx%prcs_xch(ixch)) - call psb_rcv(ictxt,xch_idx%rmt_rcv_bnd(ixch,1:2),xch_idx%prcs_xch(ixch)) - call psb_rcv(ictxt,xch_idx%rmt_snd_bnd(ixch,1:2),xch_idx%prcs_xch(ixch)) + img = xch_idx%prcs_xch(ixch) + 1 +!Here I am assuming that all the data exchange between two images takes place in one exchange + if (if_caf) then + buf_rmt_rcv_bnd(me*2 - 1 : me*2)[img]= xch_idx%loc_rcv_bnd(ixch:ixch+1) + buf_rmt_snd_bnd(me*2 - 1 : me*2)[img]= xch_idx%loc_snd_bnd(ixch:ixch+1) + event post(snd_done(me)[img]) + event wait(snd_done(img)) + xch_idx%rmt_rcv_bnd(ixch,1:2)=buf_rmt_rcv_bnd(img*2 - 1 : img*2) + xch_idx%rmt_snd_bnd(ixch,1:2)=buf_rmt_snd_bnd(img*2 - 1 : img*2) + else + call psb_snd(ictxt,xch_idx%loc_rcv_bnd(ixch:ixch+1),xch_idx%prcs_xch(ixch)) + call psb_snd(ictxt,xch_idx%loc_snd_bnd(ixch:ixch+1),xch_idx%prcs_xch(ixch)) + call psb_rcv(ictxt,xch_idx%rmt_rcv_bnd(ixch,1:2),xch_idx%prcs_xch(ixch)) + call psb_rcv(ictxt,xch_idx%rmt_snd_bnd(ixch,1:2),xch_idx%prcs_xch(ixch)) + endif ip = ip+nerv+nesd+3 ixch = ixch + 1 end do xch_idx%rmt_rcv_bnd(:,2) = xch_idx%rmt_rcv_bnd(:,2) - 1 xch_idx%rmt_snd_bnd(:,2) = xch_idx%rmt_snd_bnd(:,2) - 1 - - + if (if_caf) then + if (allocated(buf_rmt_rcv_bnd)) deallocate(buf_rmt_rcv_bnd) + if (allocated(buf_rmt_snd_bnd)) deallocate(buf_rmt_snd_bnd) + if (allocated(snd_done)) deallocate(snd_done) + sync all + endif call psb_erractionrestore(err_act) return diff --git a/base/internals/psi_desc_index.F90 b/base/internals/psi_desc_index.F90 index 88b588fb..12d094b9 100644 --- a/base/internals/psi_desc_index.F90 +++ b/base/internals/psi_desc_index.F90 @@ -98,8 +98,11 @@ ! is rebuilt during the CDASB process (in the psi_ldsc_pre_halo subroutine). ! ! + + subroutine psi_desc_index(desc,index_in,dep_list,& & length_dl,nsnd,nrcv,desc_index,isglob_in,info) + use psb_caf_mod use psb_desc_mod use psb_realloc_mod use psb_error_mod @@ -165,6 +168,7 @@ subroutine psi_desc_index(desc,index_in,dep_list,& ! to be received/sent (in the final psblas descriptor). ! be careful of the inversion ! + allocate(sdsz(np),rvsz(np),bsdindx(np),brvindx(np),stat=info) if(info /= psb_success_) then info=psb_err_alloc_dealloc_ @@ -186,7 +190,14 @@ subroutine psi_desc_index(desc,index_in,dep_list,& i = i + nerv + 1 end do ihinsz=i - call mpi_alltoall(sdsz,1,psb_mpi_def_integer,rvsz,1,psb_mpi_def_integer,icomm,minfo) + !call mpi_alltoall(sdsz,1,psb_mpi_def_integer,rvsz,1,psb_mpi_def_integer,icomm,minfo) + + if (if_caf) then + call caf_alltoall(sdsz, rvsz,1, minfo) + else + call mpi_alltoall(sdsz,1,psb_mpi_def_integer,rvsz,1,psb_mpi_def_integer,icomm,minfo) + endif + if (minfo /= psb_success_) then call psb_errpush(psb_err_from_subroutine_,name,a_err='mpi_alltoall') goto 9999 @@ -291,9 +302,13 @@ subroutine psi_desc_index(desc,index_in,dep_list,& brvindx(proc+1) = idxr idxr = idxr + rvsz(proc+1) end do + if (if_caf) then + call caf_alltoallv(sndbuf, sdsz, bsdindx, rcvbuf, rvsz, brvindx, minfo) + else + call mpi_alltoallv(sndbuf,sdsz,bsdindx,psb_mpi_ipk_integer,& + & rcvbuf,rvsz,brvindx,psb_mpi_ipk_integer,icomm,minfo) - call mpi_alltoallv(sndbuf,sdsz,bsdindx,psb_mpi_ipk_integer,& - & rcvbuf,rvsz,brvindx,psb_mpi_ipk_integer,icomm,minfo) + endif if (minfo /= psb_success_) then call psb_errpush(psb_err_from_subroutine_,name,a_err='mpi_alltoallv') goto 9999 diff --git a/base/internals/psi_extrct_dl.F90 b/base/internals/psi_extrct_dl.F90 index c189d01b..77abdca9 100644 --- a/base/internals/psi_extrct_dl.F90 +++ b/base/internals/psi_extrct_dl.F90 @@ -126,6 +126,7 @@ subroutine psi_extract_dep_list(ictxt,is_bld,is_upd,desc_str,dep_list,& use psb_const_mod use psb_error_mod use psb_desc_mod + use psb_caf_mod implicit none #ifdef MPI_H include 'mpif.h' @@ -272,8 +273,13 @@ subroutine psi_extract_dep_list(ictxt,is_bld,is_upd,desc_str,dep_list,& endif itmp(1:dl_lda) = dep_list(1:dl_lda,me) dl_mpi = dl_lda - call mpi_allgather(itmp,dl_mpi,psb_mpi_ipk_integer,& - & dep_list,dl_mpi,psb_mpi_ipk_integer,icomm,minfo) + + if (if_caf) then + call caf_allgather(itmp,dl_mpi, dep_list, minfo) + else + call mpi_allgather(itmp,dl_mpi,psb_mpi_ipk_integer,& + & dep_list,dl_mpi,psb_mpi_ipk_integer,icomm,minfo) + endif info = minfo if (info == 0) deallocate(itmp,stat=info) if (info /= psb_success_) then diff --git a/base/modules/Makefile b/base/modules/Makefile index bd882423..6f77d437 100644 --- a/base/modules/Makefile +++ b/base/modules/Makefile @@ -1,6 +1,6 @@ include ../../Make.inc -BASIC_MODS= psb_const_mod.o psb_error_mod.o psb_realloc_mod.o +BASIC_MODS= psb_const_mod.o psb_error_mod.o psb_realloc_mod.o psb_caf_mod.o COMMINT=psi_comm_buffers_mod.o psi_penv_mod.o psi_bcast_mod.o psi_reduce_mod.o psi_p2p_mod.o UTIL_MODS = aux/psb_string_mod.o desc/psb_desc_const_mod.o desc/psb_indx_map_mod.o\ desc/psb_gen_block_map_mod.o desc/psb_list_map_mod.o desc/psb_repl_map_mod.o\ @@ -36,9 +36,8 @@ UTIL_MODS = aux/psb_string_mod.o desc/psb_desc_const_mod.o desc/psb_indx_map_mod serial/psb_c_base_mat_mod.o serial/psb_c_csr_mat_mod.o serial/psb_c_csc_mat_mod.o serial/psb_c_mat_mod.o \ serial/psb_z_base_mat_mod.o serial/psb_z_csr_mat_mod.o serial/psb_z_csc_mat_mod.o serial/psb_z_mat_mod.o - MODULES=$(BASIC_MODS) $(UTIL_MODS) -OBJS = error.o psb_base_mod.o $(EXTRA_COBJS) cutil.o +OBJS = error.o psb_base_mod.o $(EXTRA_COBJS) cutil.o LIBDIR=.. CINCLUDES=-I. FINCLUDES=$(FMFLAG)$(LIBDIR) $(FMFLAG). $(FIFLAG). @@ -57,7 +56,7 @@ psb_realloc_mod.o: psb_error_mod.o $(UTIL_MODS): $(BASIC_MODS) psi_penv_mod.o: psi_comm_buffers_mod.o -psi_bcast_mod.o psi_reduce_mod.o psi_p2p_mod.o: psi_penv_mod.o +psi_bcast_mod.o psi_reduce_mod.o psi_p2p_mod.o: psi_penv_mod.o psb_caf_mod.o aux/psb_string_mod.o desc/psb_desc_const_mod.o psi_comm_buffers_mod.o: psb_const_mod.o @@ -155,6 +154,10 @@ psblas/psb_s_psblas_mod.o psblas/psb_c_psblas_mod.o psblas/psb_d_psblas_mod.o ps psb_base_mod.o: $(MODULES) + +psb_caf_mod.o: psb_caf_mod.f90 psb_const_mod.o + $(F90) $(FINCLUDES) $(FDEFINES) $(F90COPT) $(EXTRA_OPT) -c $< -o $@ + psi_penv_mod.o: psi_penv_mod.F90 $(BASIC_MODS) $(F90) $(FINCLUDES) $(FDEFINES) $(F90COPT) $(EXTRA_OPT) -c $< -o $@ diff --git a/base/modules/desc/psb_desc_mod.F90 b/base/modules/desc/psb_desc_mod.F90 index ded113d3..4ae898ca 100644 --- a/base/modules/desc/psb_desc_mod.F90 +++ b/base/modules/desc/psb_desc_mod.F90 @@ -707,7 +707,7 @@ contains do if (ip > size(idx)) then - write(psb_err_unit,*) trim(name),': Warning: out of size of input vector ' + write(psb_err_unit,*) trim(name),': Warning: out of size of input vector ', this_image(),ip, size(idx) exit end if if (idx(ip) == -1) exit diff --git a/base/modules/psi_bcast_mod.F90 b/base/modules/psi_bcast_mod.F90 index 2bf0809d..665f15ee 100644 --- a/base/modules/psi_bcast_mod.F90 +++ b/base/modules/psi_bcast_mod.F90 @@ -70,6 +70,7 @@ contains subroutine psb_ibcasts(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -79,8 +80,8 @@ contains #endif integer(psb_mpik_), intent(in) :: ictxt integer(psb_ipk_), intent(inout) :: dat + integer(psb_ipk_), allocatable :: dat_buf[:] integer(psb_mpik_), intent(in), optional :: root - integer(psb_mpik_) :: iam, np, root_, info #if !defined(SERIAL_MPI) @@ -90,12 +91,22 @@ contains root_ = psb_root_ endif - call psb_info(ictxt,iam,np) - call mpi_bcast(dat,1,psb_mpi_ipk_integer,root_,ictxt,info) + if (if_caf2) then + if (allocated(dat_buf)) deallocate(dat_buf) + allocate(dat_buf[*]) + dat_buf=dat + sync all + call co_broadcast(dat_buf,root_ + 1) + dat = dat_buf + else + call psb_info(ictxt,iam,np) + call mpi_bcast(dat,1,psb_mpi_ipk_integer,root_,ictxt,info) + endif #endif end subroutine psb_ibcasts subroutine psb_ibcastv(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -105,6 +116,7 @@ contains #endif integer(psb_mpik_), intent(in) :: ictxt integer(psb_ipk_), intent(inout) :: dat(:) + integer(psb_ipk_), allocatable :: dat_buf(:)[:] integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: iam, np, root_, info @@ -114,13 +126,23 @@ contains else root_ = psb_root_ endif - - call psb_info(ictxt,iam,np) - call mpi_bcast(dat,size(dat),psb_mpi_ipk_integer,root_,ictxt,info) + if (if_caf2) then + if (allocated(dat_buf)) deallocate(dat_buf) + allocate(dat_buf(size(dat))[*]) + dat_buf=dat + sync all + call co_broadcast(dat_buf,root_ + 1) + dat = dat_buf + if (allocated(dat_buf)) deallocate(dat_buf) + else + call psb_info(ictxt,iam,np) + call mpi_bcast(dat,size(dat),psb_mpi_ipk_integer,root_,ictxt,info) + endif #endif end subroutine psb_ibcastv subroutine psb_ibcastm(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -130,6 +152,7 @@ contains #endif integer(psb_mpik_), intent(in) :: ictxt integer(psb_ipk_), intent(inout) :: dat(:,:) + integer(psb_ipk_), allocatable :: dat_buf(:,:)[:] integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: iam, np, root_, info @@ -140,14 +163,24 @@ contains else root_ = psb_root_ endif - - call psb_info(ictxt,iam,np) - call mpi_bcast(dat,size(dat),psb_mpi_ipk_integer,root_,ictxt,info) + if (if_caf2) then + if (allocated(dat_buf)) deallocate(dat_buf) + allocate(dat_buf(size(dat,1),size(dat,2))[*]) + dat_buf=dat + sync all + call co_broadcast(dat_buf,root_ + 1) + dat = dat_buf + if (allocated(dat_buf)) deallocate(dat_buf) + else + call psb_info(ictxt,iam,np) + call mpi_bcast(dat,size(dat),psb_mpi_ipk_integer,root_,ictxt,info) + endif #endif end subroutine psb_ibcastm subroutine psb_sbcasts(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -157,6 +190,7 @@ contains #endif integer(psb_mpik_), intent(in) :: ictxt real(psb_spk_), intent(inout) :: dat + real(psb_spk_), allocatable :: dat_buf[:] integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: iam, np, root_, info @@ -168,13 +202,23 @@ contains root_ = psb_root_ endif - call psb_info(ictxt,iam,np) - call mpi_bcast(dat,1,psb_mpi_r_spk_,root_,ictxt,info) + if (if_caf2) then + if (allocated(dat_buf)) deallocate(dat_buf) + allocate(dat_buf[*]) + dat_buf=dat + sync all + call co_broadcast(dat_buf,root_ + 1) + dat = dat_buf + else + call psb_info(ictxt,iam,np) + call mpi_bcast(dat,1,psb_mpi_r_spk_,root_,ictxt,info) + endif #endif end subroutine psb_sbcasts subroutine psb_sbcastv(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -184,6 +228,7 @@ contains #endif integer(psb_mpik_), intent(in) :: ictxt real(psb_spk_), intent(inout) :: dat(:) + real(psb_spk_), allocatable :: dat_buf(:)[:] integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: iam, np, root_, info @@ -195,13 +240,23 @@ contains root_ = psb_root_ endif - call psb_info(ictxt,iam,np) - call mpi_bcast(dat,size(dat),psb_mpi_r_spk_,root_,ictxt,info) - + if (if_caf2) then + if (allocated(dat_buf)) deallocate(dat_buf) + allocate(dat_buf(size(dat))[*]) + dat_buf=dat + sync all + call co_broadcast(dat_buf,root_ + 1) + dat = dat_buf + if (allocated(dat_buf)) deallocate(dat_buf) + else + call psb_info(ictxt,iam,np) + call mpi_bcast(dat,size(dat),psb_mpi_r_spk_,root_,ictxt,info) + endif #endif end subroutine psb_sbcastv subroutine psb_sbcastm(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -211,6 +266,7 @@ contains #endif integer(psb_mpik_), intent(in) :: ictxt real(psb_spk_), intent(inout) :: dat(:,:) + real(psb_spk_), allocatable :: dat_buf(:,:)[:] integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: iam, np, root_, info @@ -222,14 +278,24 @@ contains root_ = psb_root_ endif - call psb_info(ictxt,iam,np) - call mpi_bcast(dat,size(dat),psb_mpi_r_spk_,root_,ictxt,info) - + if (if_caf2) then + if (allocated(dat_buf)) deallocate(dat_buf) + allocate(dat_buf(size(dat,1),size(dat,2))[*]) + dat_buf=dat + sync all + call co_broadcast(dat_buf,root_ + 1) + dat = dat_buf + if (allocated(dat_buf)) deallocate(dat_buf) + else + call psb_info(ictxt,iam,np) + call mpi_bcast(dat,size(dat),psb_mpi_r_spk_,root_,ictxt,info) + endif #endif end subroutine psb_sbcastm subroutine psb_dbcasts(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -239,6 +305,7 @@ contains #endif integer(psb_mpik_), intent(in) :: ictxt real(psb_dpk_), intent(inout) :: dat + real(psb_dpk_), allocatable :: dat_buf[:] integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: iam, np, root_, info @@ -250,13 +317,23 @@ contains root_ = psb_root_ endif - call psb_info(ictxt,iam,np) - call mpi_bcast(dat,1,psb_mpi_r_dpk_,root_,ictxt,info) + if (if_caf2) then + if (allocated(dat_buf)) deallocate(dat_buf) + allocate(dat_buf[*]) + dat_buf=dat + sync all + call co_broadcast(dat_buf,root_ + 1) + dat = dat_buf + else + call psb_info(ictxt,iam,np) + call mpi_bcast(dat,1,psb_mpi_r_dpk_,root_,ictxt,info) + endif #endif end subroutine psb_dbcasts subroutine psb_dbcastv(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -266,6 +343,7 @@ contains #endif integer(psb_mpik_), intent(in) :: ictxt real(psb_dpk_), intent(inout) :: dat(:) + real(psb_dpk_), allocatable :: dat_buf(:)[:] integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: iam, np, root_, info @@ -277,12 +355,23 @@ contains root_ = psb_root_ endif - call psb_info(ictxt,iam,np) - call mpi_bcast(dat,size(dat),psb_mpi_r_dpk_,root_,ictxt,info) + if (if_caf2) then + if (allocated(dat_buf)) deallocate(dat_buf) + allocate(dat_buf(size(dat))[*]) + dat_buf=dat + sync all + call co_broadcast(dat_buf,root_ + 1) + dat = dat_buf + if (allocated(dat_buf)) deallocate(dat_buf) + else + call psb_info(ictxt,iam,np) + call mpi_bcast(dat,size(dat),psb_mpi_r_dpk_,root_,ictxt,info) + endif #endif end subroutine psb_dbcastv subroutine psb_dbcastm(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -292,6 +381,7 @@ contains #endif integer(psb_mpik_), intent(in) :: ictxt real(psb_dpk_), intent(inout) :: dat(:,:) + real(psb_dpk_), allocatable :: dat_buf(:,:)[:] integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: iam, np, root_, info @@ -303,12 +393,23 @@ contains root_ = psb_root_ endif - call psb_info(ictxt,iam,np) - call mpi_bcast(dat,size(dat),psb_mpi_r_dpk_,root_,ictxt,info) + if (if_caf2) then + if (allocated(dat_buf)) deallocate(dat_buf) + allocate(dat_buf(size(dat,1),size(dat,2))[*]) + dat_buf=dat + sync all + call co_broadcast(dat_buf,root_ + 1) + dat = dat_buf + if (allocated(dat_buf)) deallocate(dat_buf) + else + call psb_info(ictxt,iam,np) + call mpi_bcast(dat,size(dat),psb_mpi_r_dpk_,root_,ictxt,info) + endif #endif end subroutine psb_dbcastm subroutine psb_cbcasts(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -318,10 +419,10 @@ contains #endif integer(psb_mpik_), intent(in) :: ictxt complex(psb_spk_), intent(inout) :: dat + complex(psb_spk_), allocatable :: dat_buf[:] integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: iam, np, root_, info - #if !defined(SERIAL_MPI) if (present(root)) then root_ = root @@ -329,12 +430,22 @@ contains root_ = psb_root_ endif - call psb_info(ictxt,iam,np) - call mpi_bcast(dat,1,psb_mpi_c_spk_,root_,ictxt,info) + if (if_caf2) then + if (allocated(dat_buf)) deallocate(dat_buf) + allocate(dat_buf[*]) + dat_buf=dat + sync all + call co_broadcast(dat_buf,root_ + 1) + dat = dat_buf + else + call psb_info(ictxt,iam,np) + call mpi_bcast(dat,1,psb_mpi_r_dpk_,root_,ictxt,info) + endif #endif end subroutine psb_cbcasts subroutine psb_cbcastv(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -344,6 +455,7 @@ contains #endif integer(psb_mpik_), intent(in) :: ictxt complex(psb_spk_), intent(inout) :: dat(:) + complex(psb_spk_), allocatable :: dat_buf(:)[:] integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: iam, np, root_, info @@ -355,12 +467,23 @@ contains root_ = psb_root_ endif - call psb_info(ictxt,iam,np) - call mpi_bcast(dat,size(dat),psb_mpi_c_spk_,root_,ictxt,info) + if (if_caf2) then + if (allocated(dat_buf)) deallocate(dat_buf) + allocate(dat_buf(size(dat))[*]) + dat_buf=dat + sync all + call co_broadcast(dat_buf,root_ + 1) + dat = dat_buf + if (allocated(dat_buf)) deallocate(dat_buf) + else + call psb_info(ictxt,iam,np) + call mpi_bcast(dat,size(dat),psb_mpi_c_spk_,root_,ictxt,info) + endif #endif end subroutine psb_cbcastv subroutine psb_cbcastm(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -370,6 +493,7 @@ contains #endif integer(psb_mpik_), intent(in) :: ictxt complex(psb_spk_), intent(inout) :: dat(:,:) + complex(psb_spk_), allocatable :: dat_buf(:,:)[:] integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: iam, np, root_, info @@ -381,12 +505,23 @@ contains root_ = psb_root_ endif - call psb_info(ictxt,iam,np) - call mpi_bcast(dat,size(dat),psb_mpi_c_spk_,root_,ictxt,info) + if (if_caf2) then + if (allocated(dat_buf)) deallocate(dat_buf) + allocate(dat_buf(size(dat,1),size(dat,2))[*]) + dat_buf=dat + sync all + call co_broadcast(dat_buf,root_ + 1) + dat = dat_buf + if (allocated(dat_buf)) deallocate(dat_buf) + else + call psb_info(ictxt,iam,np) + call mpi_bcast(dat,size(dat),psb_mpi_c_spk_,root_,ictxt,info) + endif #endif end subroutine psb_cbcastm subroutine psb_zbcasts(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -396,6 +531,7 @@ contains #endif integer(psb_mpik_), intent(in) :: ictxt complex(psb_dpk_), intent(inout) :: dat + complex(psb_dpk_), allocatable :: dat_buf[:] integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: iam, np, root_, info @@ -407,12 +543,25 @@ contains root_ = psb_root_ endif - call psb_info(ictxt,iam,np) - call mpi_bcast(dat,1,psb_mpi_c_dpk_,root_,ictxt,info) + if (if_caf2) then + + if (allocated(dat_buf)) deallocate(dat_buf) + allocate(dat_buf[*]) + dat_buf=dat + sync all + !print*,'****',this_image(), dat_buf, dat + call co_broadcast(dat_buf,root_ + 1) + dat = dat_buf + !print*,'***** after',this_image(), dat + else + call psb_info(ictxt,iam,np) + call mpi_bcast(dat,1,psb_mpi_c_dpk_,root_,ictxt,info) + endif #endif end subroutine psb_zbcasts subroutine psb_zbcastv(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -422,6 +571,7 @@ contains #endif integer(psb_mpik_), intent(in) :: ictxt complex(psb_dpk_), intent(inout) :: dat(:) + complex(psb_dpk_), allocatable :: dat_buf(:)[:] integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: iam, np, root_, info @@ -433,12 +583,23 @@ contains root_ = psb_root_ endif - call psb_info(ictxt,iam,np) - call mpi_bcast(dat,size(dat),psb_mpi_c_dpk_,root_,ictxt,info) + if (if_caf2) then + if (allocated(dat_buf)) deallocate(dat_buf) + allocate(dat_buf(size(dat))[*]) + dat_buf=dat + sync all + call co_broadcast(dat_buf,root_ + 1) + dat = dat_buf + if (allocated(dat_buf)) deallocate(dat_buf) + else + call psb_info(ictxt,iam,np) + call mpi_bcast(dat,size(dat),psb_mpi_c_dpk_,root_,ictxt,info) + endif #endif end subroutine psb_zbcastv subroutine psb_zbcastm(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -448,6 +609,7 @@ contains #endif integer(psb_mpik_), intent(in) :: ictxt complex(psb_dpk_), intent(inout) :: dat(:,:) + complex(psb_dpk_), allocatable :: dat_buf(:,:)[:] integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: iam, np, root_, info @@ -459,13 +621,24 @@ contains root_ = psb_root_ endif - call psb_info(ictxt,iam,np) - call mpi_bcast(dat,size(dat),psb_mpi_c_dpk_,root_,ictxt,info) + if (if_caf2) then + if (allocated(dat_buf)) deallocate(dat_buf) + allocate(dat_buf(size(dat,1),size(dat,2))[*]) + dat_buf=dat + sync all + call co_broadcast(dat_buf,root_ + 1) + dat = dat_buf + if (allocated(dat_buf)) deallocate(dat_buf) + else + call psb_info(ictxt,iam,np) + call mpi_bcast(dat,size(dat),psb_mpi_c_dpk_,root_,ictxt,info) + endif #endif end subroutine psb_zbcastm subroutine psb_hbcasts(ictxt,dat,root,length) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -492,13 +665,13 @@ contains endif call psb_info(ictxt,iam,np) - call mpi_bcast(dat,length_,MPI_CHARACTER,root_,ictxt,info) #endif end subroutine psb_hbcasts subroutine psb_hbcastv(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -522,13 +695,13 @@ contains size_ = size(dat) call psb_info(ictxt,iam,np) - call mpi_bcast(dat,length_*size_,MPI_CHARACTER,root_,ictxt,info) #endif end subroutine psb_hbcastv subroutine psb_lbcasts(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -538,6 +711,7 @@ contains #endif integer(psb_mpik_), intent(in) :: ictxt logical, intent(inout) :: dat + logical, allocatable :: dat_buf[:] integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: iam, np, root_,info @@ -549,14 +723,24 @@ contains root_ = psb_root_ endif - call psb_info(ictxt,iam,np) - call mpi_bcast(dat,1,MPI_LOGICAL,root_,ictxt,info) + if (if_caf2) then + if (allocated(dat_buf)) deallocate(dat_buf) + allocate(dat_buf[*]) + dat_buf=dat + sync all + call co_broadcast(dat_buf,root_ + 1) + dat = dat_buf + else + call psb_info(ictxt,iam,np) + call mpi_bcast(dat,1,MPI_LOGICAL,root_,ictxt,info) + endif #endif end subroutine psb_lbcasts subroutine psb_lbcastv(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -566,6 +750,7 @@ contains #endif integer(psb_mpik_), intent(in) :: ictxt logical, intent(inout) :: dat(:) + logical, allocatable :: dat_buf(:)[:] integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: iam, np, root_,info @@ -577,16 +762,25 @@ contains root_ = psb_root_ endif - call psb_info(ictxt,iam,np) - call mpi_bcast(dat,size(dat),MPI_LOGICAL,root_,ictxt,info) + if (if_caf2) then + allocate(dat_buf(size(dat))[*]) + dat_buf=dat + sync all + call co_broadcast(dat_buf,root_ + 1) + dat = dat_buf + if (allocated(dat_buf)) deallocate(dat_buf) + else + call psb_info(ictxt,iam,np) + call mpi_bcast(dat,size(dat),MPI_LOGICAL,root_,ictxt,info) + endif #endif end subroutine psb_lbcastv - #if !defined(LONG_INTEGERS) subroutine psb_i8bcasts(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -596,6 +790,7 @@ contains #endif integer(psb_mpik_), intent(in) :: ictxt integer(psb_long_int_k_), intent(inout) :: dat + integer(psb_long_int_k_), allocatable :: dat_buf[:] integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: iam, np, root_, info @@ -607,12 +802,22 @@ contains root_ = psb_root_ endif - call psb_info(ictxt,iam,np) - call mpi_bcast(dat,1,psb_mpi_lng_integer,root_,ictxt,info) + if (if_caf2) then + if (allocated(dat_buf)) deallocate(dat_buf) + allocate(dat_buf[*]) + dat_buf=dat + sync all + call co_broadcast(dat_buf,root_ + 1) + dat = dat_buf + else + call psb_info(ictxt,iam,np) + call mpi_bcast(dat,1,psb_mpi_lng_integer,root_,ictxt,info) + endif #endif end subroutine psb_i8bcasts subroutine psb_i8bcastv(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -622,6 +827,7 @@ contains #endif integer(psb_mpik_), intent(in) :: ictxt integer(psb_long_int_k_), intent(inout) :: dat(:) + integer(psb_long_int_k_), allocatable :: dat_buf(:)[:] integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: iam, np, root_, info @@ -632,12 +838,22 @@ contains root_ = psb_root_ endif - call psb_info(ictxt,iam,np) - call mpi_bcast(dat,size(dat),psb_mpi_lng_integer,root_,ictxt,info) + if (if_caf2) then + allocate(dat_buf(size(dat))[*]) + dat_buf=dat + sync all + call co_broadcast(dat_buf,root_ + 1) + dat = dat_buf + if (allocated(dat_buf)) deallocate(dat_buf) + else + call psb_info(ictxt,iam,np) + call mpi_bcast(dat,size(dat),psb_mpi_lng_integer,root_,ictxt,info) + endif #endif end subroutine psb_i8bcastv subroutine psb_i8bcastm(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -647,6 +863,7 @@ contains #endif integer(psb_mpik_), intent(in) :: ictxt integer(psb_long_int_k_), intent(inout) :: dat(:,:) + integer(psb_long_int_k_), allocatable :: dat_buf(:,:)[:] integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: iam, np, root_, info @@ -658,8 +875,17 @@ contains root_ = psb_root_ endif - call psb_info(ictxt,iam,np) - call mpi_bcast(dat,size(dat),psb_mpi_lng_integer,root_,ictxt,info) + if (if_caf2) then + allocate(dat_buf(size(dat,1),size(dat,2))[*]) + dat_buf=dat + sync all + call co_broadcast(dat_buf,root_ + 1) + dat = dat_buf + if (allocated(dat_buf)) deallocate(dat_buf) + else + call psb_info(ictxt,iam,np) + call mpi_bcast(dat,size(dat),psb_mpi_lng_integer,root_,ictxt,info) + endif #endif end subroutine psb_i8bcastm diff --git a/base/modules/psi_i_mod.f90 b/base/modules/psi_i_mod.f90 index 1688d824..3bbf44fb 100644 --- a/base/modules/psi_i_mod.f90 +++ b/base/modules/psi_i_mod.f90 @@ -244,6 +244,14 @@ module psi_i_mod type(psb_desc_type), target :: desc_a integer(psb_ipk_), optional :: data end subroutine psi_iswapdata_multivect + subroutine psi_iswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) + import + integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag,m + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: y(:,:) + integer(psb_ipk_) :: beta + class(psb_xch_idx_type), intent(inout) :: xchg + end subroutine psi_iswap_xchg_m subroutine psi_iswapidxm(ictxt,icomm,flag,n,beta,y,idx,& & totxch,totsnd,totrcv,work,info) import @@ -253,6 +261,14 @@ module psi_i_mod integer(psb_ipk_),target :: work(:) integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd,totrcv end subroutine psi_iswapidxm + subroutine psi_iswap_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) + import + integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: y(:) + integer(psb_ipk_) :: beta + class(psb_xch_idx_type), intent(inout) :: xchg + end subroutine psi_iswap_xchg_v subroutine psi_iswapidxv(ictxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) import @@ -262,6 +278,14 @@ module psi_i_mod integer(psb_ipk_),target :: work(:) integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd,totrcv end subroutine psi_iswapidxv + subroutine psi_iswap_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info) + import + integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag + integer(psb_ipk_), intent(out) :: info + class(psb_i_base_vect_type) :: y + integer(psb_ipk_) :: beta + class(psb_xch_idx_type), intent(inout) :: xchg + end subroutine psi_iswap_xchg_vect subroutine psi_iswap_vidx_vect(iictxt,iicomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) import @@ -326,6 +350,14 @@ module psi_i_mod type(psb_desc_type), target :: desc_a integer(psb_ipk_), optional :: data end subroutine psi_iswaptran_multivect + subroutine psi_iswaptran_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) + import + integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag,m + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: y(:,:) + integer(psb_ipk_) :: beta + class(psb_xch_idx_type), intent(inout) :: xchg + end subroutine psi_iswaptran_xchg_m subroutine psi_itranidxm(ictxt,icomm,flag,n,beta,y,idx,& & totxch,totsnd,totrcv,work,info) import @@ -335,6 +367,14 @@ module psi_i_mod integer(psb_ipk_),target :: work(:) integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd,totrcv end subroutine psi_itranidxm + subroutine psi_iswaptran_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) + import + integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: y(:) + integer(psb_ipk_) :: beta + class(psb_xch_idx_type), intent(inout) :: xchg + end subroutine psi_iswaptran_xchg_v subroutine psi_itranidxv(ictxt,icomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) import @@ -344,6 +384,14 @@ module psi_i_mod integer(psb_ipk_),target :: work(:) integer(psb_ipk_), intent(in) :: idx(:),totxch,totsnd,totrcv end subroutine psi_itranidxv + subroutine psi_iswaptran_xchg_vect(iictxt,iicomm,flag,beta,y,xchg,info) + import + integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag + integer(psb_ipk_), intent(out) :: info + class(psb_i_base_vect_type) :: y + integer(psb_ipk_) :: beta + class(psb_xch_idx_type), intent(inout) :: xchg + end subroutine psi_iswaptran_xchg_vect subroutine psi_itran_vidx_vect(iictxt,iicomm,flag,beta,y,idx,& & totxch,totsnd,totrcv,work,info) import diff --git a/base/modules/psi_reduce_mod.F90 b/base/modules/psi_reduce_mod.F90 index dc4fd89b..e7208c9f 100644 --- a/base/modules/psi_reduce_mod.F90 +++ b/base/modules/psi_reduce_mod.F90 @@ -178,7 +178,7 @@ module psi_reduce_mod interface psb_nrm2 module procedure psb_s_nrm2s_ic, psb_s_nrm2v_ic,& - & psb_d_nrm2s_ic, psb_d_nrm2v_ic + & psb_d_nrm2s_ic end interface #endif @@ -199,6 +199,7 @@ contains ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! subroutine psb_imaxs(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -210,31 +211,49 @@ contains integer(psb_ipk_), intent(inout) :: dat integer(psb_mpik_), intent(in), optional :: root integer(psb_ipk_) :: dat_ + integer(psb_ipk_), allocatable :: co_dat_[:] integer(psb_mpik_) :: root_ integer(psb_mpik_) :: iam, np, info - integer(psb_ipk_) :: iinfo + integer(psb_ipk_) :: iinfo, me #if !defined(SERIAL_MPI) call psb_info(ictxt,iam,np) - + me = this_image() if (present(root)) then root_ = root else root_ = -1 endif - if (root_ == -1) then - call mpi_allreduce(dat,dat_,1,psb_mpi_ipk_integer,mpi_max,ictxt,info) - dat = dat_ - else - call mpi_reduce(dat,dat_,1,psb_mpi_ipk_integer,mpi_max,root_,ictxt,info) - if (iam == root_) dat = dat_ + if (if_caf2) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate(co_dat_[*]) + if (root_ == -1) then + co_dat_=dat + call co_max(co_dat_) + dat=co_dat_ + else + me = this_image() + co_dat_=dat + call co_max(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call mpi_allreduce(dat,dat_,1,psb_mpi_ipk_integer,mpi_max,ictxt,info) + dat = dat_ + else + call mpi_reduce(dat,dat_,1,psb_mpi_ipk_integer,mpi_max,root_,ictxt,info) + if (iam == root_) dat = dat_ + endif endif #endif end subroutine psb_imaxs subroutine psb_imaxv(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -246,43 +265,61 @@ contains integer(psb_ipk_), intent(inout) :: dat(:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - integer(psb_ipk_), allocatable :: dat_(:) + integer(psb_ipk_), allocatable :: dat_(:), co_dat_(:)[:] integer(psb_mpik_) :: iam, np, info - integer(psb_ipk_) :: iinfo + integer(psb_ipk_) :: iinfo, me #if !defined(SERIAL_MPI) call psb_info(ictxt,iam,np) - + me = this_image() if (present(root)) then root_ = root else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat),dat_,iinfo) - dat_=dat - if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& - & psb_mpi_ipk_integer,mpi_max,ictxt,info) - else - if (iam == root_) then - call psb_realloc(size(dat),dat_,iinfo) + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_max(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + call co_max(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call psb_realloc(size(dat,1),dat_,iinfo) dat_=dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_ipk_integer,mpi_max,root_,ictxt,info) + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_ipk_integer,mpi_max,ictxt,info) else - call psb_realloc(1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_ipk_integer,mpi_max,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_ipk_integer,mpi_max,root_,ictxt,info) + else + call psb_realloc(1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_ipk_integer,mpi_max,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_imaxv subroutine psb_imaxm(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif - implicit none + implicit none #ifdef MPI_H include 'mpif.h' #endif @@ -290,39 +327,57 @@ contains integer(psb_ipk_), intent(inout) :: dat(:,:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - integer(psb_ipk_), allocatable :: dat_(:,:) + integer(psb_ipk_), allocatable :: dat_(:,:), co_dat_(:,:)[:] integer(psb_mpik_) :: iam, np, info - integer(psb_ipk_) :: iinfo + integer(psb_ipk_) :: iinfo, me #if !defined(SERIAL_MPI) call psb_info(ictxt,iam,np) - + me = this_image() if (present(root)) then root_ = root else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) - dat_=dat - if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& - & psb_mpi_ipk_integer,mpi_max,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_max(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + call co_max(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) dat_=dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_ipk_integer,mpi_max,root_,ictxt,info) + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_ipk_integer,mpi_max,ictxt,info) else - call psb_realloc(1,1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_ipk_integer,mpi_max,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_ipk_integer,mpi_max,root_,ictxt,info) + else + call psb_realloc(1,1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_ipk_integer,mpi_max,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_imaxm #if defined(LONG_INTEGERS) subroutine psb_i4maxs(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -335,7 +390,8 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ integer(psb_mpik_) :: dat_ - integer(psb_mpik_) :: iam, np, info + integer(psb_mpid_), allocatable :: co_dat_[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_mpik_) :: iinfo @@ -347,18 +403,35 @@ contains else root_ = -1 endif - if (root_ == -1) then - call mpi_allreduce(dat,dat_,1,psb_mpi_def_integer,mpi_max,ictxt,info) - dat = dat_ - else - call mpi_reduce(dat,dat_,1,psb_mpi_def_integer,mpi_max,root_,ictxt,info) - if (iam == root_) dat = dat_ + if (if_caf2) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate(co_dat_[*]) + if (root_ == -1) then + co_dat_=dat + call co_max(co_dat_) + dat=co_dat_ + else + me = this_image() + co_dat_=dat + call co_max(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call mpi_allreduce(dat,dat_,1,psb_mpi_def_integer,mpi_max,ictxt,info) + dat = dat_ + else + call mpi_reduce(dat,dat_,1,psb_mpi_def_integer,mpi_max,root_,ictxt,info) + if (iam == root_) dat = dat_ + endif endif #endif end subroutine psb_i4maxs subroutine psb_i4maxv(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -370,8 +443,8 @@ contains integer(psb_mpik_), intent(inout) :: dat(:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - integer(psb_mpik_), allocatable :: dat_(:) - integer(psb_mpik_) :: iam, np, info + integer(psb_mpik_), allocatable :: dat_(:), co_dat_(:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_mpik_) :: iinfo #if !defined(SERIAL_MPI) @@ -383,26 +456,44 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat),dat_,iinfo) - dat_=dat - if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& - & psb_mpi_def_integer,mpi_max,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_max(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + call co_max(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat),dat_,iinfo) dat_=dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_def_integer,mpi_max,root_,ictxt,info) + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_def_integer,mpi_max,ictxt,info) else - call psb_realloc(1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_def_integer,mpi_max,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_def_integer,mpi_max,root_,ictxt,info) + else + call psb_realloc(1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_def_integer,mpi_max,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_i4maxv subroutine psb_i4maxm(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -414,8 +505,8 @@ contains integer(psb_mpik_), intent(inout) :: dat(:,:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - integer(psb_mpik_), allocatable :: dat_(:,:) - integer(psb_mpik_) :: iam, np, info + integer(psb_mpik_), allocatable :: dat_(:,:), co_dat_(:,:)[*] + integer(psb_mpik_) :: iam, np, info, me integer(psb_mpik_) :: iinfo @@ -427,20 +518,37 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) - dat_=dat - if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& - & psb_mpi_def_integer,mpi_max,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_max(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + call co_max(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) dat_=dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_def_integer,mpi_max,root_,ictxt,info) + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_def_integer,mpi_max,ictxt,info) else - call psb_realloc(1,1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_def_integer,mpi_max,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_def_integer,mpi_max,root_,ictxt,info) + else + call psb_realloc(1,1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_def_integer,mpi_max,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_i4maxm @@ -450,6 +558,7 @@ contains #if !defined(LONG_INTEGERS) subroutine psb_i8maxs(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -462,30 +571,41 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ integer(psb_long_int_k_) :: dat_ - integer(psb_mpik_) :: iam, np, info + integer(psb_long_int_k_), allocatable :: co_dat_[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) - call psb_info(ictxt,iam,np) - - if (present(root)) then - root_ = root - else - root_ = -1 - endif - if (root_ == -1) then - call mpi_allreduce(dat,dat_,1,psb_mpi_lng_integer,mpi_max,ictxt,info) - dat = dat_ - else - call mpi_reduce(dat,dat_,1,psb_mpi_lng_integer,mpi_max,root_,ictxt,info) - if (iam == root_) dat = dat_ + if (if_caf2) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate(co_dat_[*]) + if (root_ == -1) then + co_dat_=dat + call co_max(co_dat_) + dat=co_dat_ + else + me = this_image() + co_dat_=dat + call co_max(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call mpi_allreduce(dat,dat_,1,psb_mpi_lng_integer,mpi_max,ictxt,info) + dat = dat_ + else + call mpi_reduce(dat,dat_,1,psb_mpi_lng_integer,mpi_max,root_,ictxt,info) + if (iam == root_) dat = dat_ + endif endif #endif end subroutine psb_i8maxs subroutine psb_i8maxv(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -497,10 +617,9 @@ contains integer(psb_long_int_k_), intent(inout) :: dat(:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - integer(psb_long_int_k_), allocatable :: dat_(:) - integer(psb_mpik_) :: iam, np, info + integer(psb_long_int_k_), allocatable :: dat_(:), co_dat_(:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo - #if !defined(SERIAL_MPI) call psb_info(ictxt,iam,np) @@ -510,26 +629,44 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat),dat_,iinfo) - dat_=dat - if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& - & psb_mpi_lng_integer,mpi_max,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_max(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + call co_max(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat),dat_,iinfo) dat_=dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_lng_integer,mpi_max,root_,ictxt,info) + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_lng_integer,mpi_max,ictxt,info) else - call psb_realloc(1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_lng_integer,mpi_max,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_lng_integer,mpi_max,root_,ictxt,info) + else + call psb_realloc(1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_lng_integer,mpi_max,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_i8maxv subroutine psb_i8maxm(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -541,8 +678,8 @@ contains integer(psb_long_int_k_), intent(inout) :: dat(:,:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - integer(psb_long_int_k_), allocatable :: dat_(:,:) - integer(psb_mpik_) :: iam, np, info + integer(psb_long_int_k_), allocatable :: dat_(:,:), co_dat_(:,:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo @@ -554,20 +691,37 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) - dat_=dat - if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& - & psb_mpi_lng_integer,mpi_max,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_max(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + call co_max(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) dat_=dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_lng_integer,mpi_max,root_,ictxt,info) + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_lng_integer,mpi_max,ictxt,info) else - call psb_realloc(1,1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_lng_integer,mpi_max,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_lng_integer,mpi_max,root_,ictxt,info) + else + call psb_realloc(1,1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_lng_integer,mpi_max,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_i8maxm @@ -576,6 +730,7 @@ contains subroutine psb_smaxs(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -588,29 +743,47 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ real(psb_spk_) :: dat_ - integer(psb_mpik_) :: iam, np, info + real(psb_spk_), allocatable :: co_dat_[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) call psb_info(ictxt,iam,np) - + me = this_image() if (present(root)) then root_ = root else root_ = -1 endif - if (root_ == -1) then - call mpi_allreduce(dat,dat_,1,psb_mpi_r_spk_,mpi_max,ictxt,info) - dat = dat_ - else - call mpi_reduce(dat,dat_,1,psb_mpi_r_spk_,mpi_max,root_,ictxt,info) - if (iam == root_) dat = dat_ + if (if_caf2) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate(co_dat_[*]) + if (root_ == -1) then + co_dat_=dat + call co_max(co_dat_) + dat=co_dat_ + else + me = this_image() + co_dat_=dat + call co_max(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call mpi_allreduce(dat,dat_,1,psb_mpi_r_spk_,mpi_max,ictxt,info) + dat = dat_ + else + call mpi_reduce(dat,dat_,1,psb_mpi_r_spk_,mpi_max,root_,ictxt,info) + if (iam == root_) dat = dat_ + endif endif #endif end subroutine psb_smaxs subroutine psb_smaxv(ictxt,dat,root) + use psb_caf_mod use psb_realloc_mod #ifdef MPI_MOD use mpi @@ -623,39 +796,58 @@ contains real(psb_spk_), intent(inout) :: dat(:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - real(psb_spk_), allocatable :: dat_(:) - integer(psb_mpik_) :: iam, np, info + real(psb_spk_), allocatable :: dat_(:), co_dat_(:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) - call psb_info(ictxt,iam,np) + call psb_info(ictxt,iam,np) + me = this_image() if (present(root)) then root_ = root else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat),dat_,iinfo) - dat_ = dat - if (iinfo == psb_success_) & - & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_max,ictxt,info) - else - if (iam == root_) then - call psb_realloc(size(dat),dat_,iinfo) - dat_ = dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_max,root_,ictxt,info) + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_max(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ else - call psb_realloc(1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_spk_,mpi_max,root_,ictxt,info) - end if + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + call co_max(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call psb_realloc(size(dat,1),dat_,iinfo) + dat_=dat + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_ipk_integer,mpi_max,ictxt,info) + else + if (iam == root_) then + call psb_realloc(size(dat),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_max,root_,ictxt,info) + else + call psb_realloc(1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_spk_,mpi_max,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_smaxv subroutine psb_smaxm(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -667,38 +859,55 @@ contains real(psb_spk_), intent(inout) :: dat(:,:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - real(psb_spk_), allocatable :: dat_(:,:) - integer(psb_mpik_) :: iam, np, info + real(psb_spk_), allocatable :: dat_(:,:), co_dat_(:,:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) - call psb_info(ictxt,iam,np) - + me = this_image() if (present(root)) then root_ = root else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) - dat_ = dat - if (iinfo == psb_success_)& - & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_max,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_max(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + call co_max(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) - dat_ = dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_max,root_,ictxt,info) + dat_=dat + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_r_spk_,mpi_max,ictxt,info) else - call psb_realloc(1,1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_spk_,mpi_max,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_max,root_,ictxt,info) + else + call psb_realloc(1,1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_spk_,mpi_max,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_smaxm subroutine psb_dmaxs(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -711,30 +920,48 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ real(psb_dpk_) :: dat_ - integer(psb_mpik_) :: iam, np, info + real(psb_dpk_), allocatable :: co_dat_[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) call psb_info(ictxt,iam,np) - + me = this_image() if (present(root)) then root_ = root else root_ = -1 endif - if (root_ == -1) then - call mpi_allreduce(dat,dat_,1,psb_mpi_r_dpk_,mpi_max,ictxt,info) - dat = dat_ - else - call mpi_reduce(dat,dat_,1,psb_mpi_r_dpk_,mpi_max,root_,ictxt,info) - if (iam == root_) dat = dat_ + if (if_caf2) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate(co_dat_[*]) + if (root_ == -1) then + co_dat_=dat + call co_max(co_dat_) + dat=co_dat_ + else + me = this_image() + co_dat_=dat + call co_max(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call mpi_allreduce(dat,dat_,1,psb_mpi_r_dpk_,mpi_max,ictxt,info) + dat = dat_ + else + call mpi_reduce(dat,dat_,1,psb_mpi_r_dpk_,mpi_max,root_,ictxt,info) + if (iam == root_) dat = dat_ + endif endif #endif end subroutine psb_dmaxs subroutine psb_dmaxv(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -746,40 +973,59 @@ contains real(psb_dpk_), intent(inout) :: dat(:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - real(psb_dpk_), allocatable :: dat_(:) - integer(psb_mpik_) :: iam, np, info + real(psb_dpk_), allocatable :: dat_(:), co_dat_(:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) - call psb_info(ictxt,iam,np) + call psb_info(ictxt,iam,np) + me = this_image() if (present(root)) then root_ = root else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat),dat_,iinfo) - dat_ = dat - if (iinfo == psb_success_) & - & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_dpk_,& - & mpi_max,ictxt,info) - else - if (iam == root_) then - call psb_realloc(size(dat),dat_,iinfo) - dat_ = dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_dpk_,mpi_max,root_,ictxt,info) + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_max(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + call co_max(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call psb_realloc(size(dat,1),dat_,iinfo) + dat_=dat + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_max,ictxt,info) else - call psb_realloc(1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_dpk_,mpi_max,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_dpk_,mpi_max,root_,ictxt,info) + else + call psb_realloc(1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_dpk_,mpi_max,root_,ictxt,info) + end if + endif endif #endif + end subroutine psb_dmaxv subroutine psb_dmaxm(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -791,33 +1037,49 @@ contains real(psb_dpk_), intent(inout) :: dat(:,:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - real(psb_dpk_), allocatable :: dat_(:,:) - integer(psb_mpik_) :: iam, np, info + real(psb_dpk_), allocatable :: dat_(:,:), co_dat_(:,:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) - call psb_info(ictxt,iam,np) - + me = this_image() if (present(root)) then root_ = root else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) - dat_ = dat - if (iinfo == psb_success_)& - & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_dpk_,mpi_max,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_max(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + call co_max(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) - dat_ = dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_dpk_,mpi_max,root_,ictxt,info) + dat_=dat + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_max,ictxt,info) else - call psb_realloc(1,1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_dpk_,mpi_max,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_dpk_,mpi_max,root_,ictxt,info) + else + call psb_realloc(1,1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_dpk_,mpi_max,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_dmaxm @@ -830,6 +1092,7 @@ contains ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! subroutine psb_imins(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -841,30 +1104,48 @@ contains integer(psb_ipk_), intent(inout) :: dat integer(psb_mpik_), intent(in), optional :: root integer(psb_ipk_) :: dat_ + integer(psb_ipk_), allocatable :: co_dat_[:] integer(psb_mpik_) :: root_ - integer(psb_mpik_) :: iam, np, info + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) call psb_info(ictxt,iam,np) - + me = this_image() if (present(root)) then root_ = root else root_ = -1 endif - if (root_ == -1) then - call mpi_allreduce(dat,dat_,1,psb_mpi_ipk_integer,mpi_min,ictxt,info) - dat = dat_ - else - call mpi_reduce(dat,dat_,1,psb_mpi_ipk_integer,mpi_min,root_,ictxt,info) - if (iam == root_) dat = dat_ + if (if_caf2) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate(co_dat_[*]) + if (root_ == -1) then + co_dat_=dat + call co_min(co_dat_) + dat=co_dat_ + else + me = this_image() + co_dat_=dat + call co_min(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call mpi_allreduce(dat,dat_,1,psb_mpi_ipk_integer,mpi_min,ictxt,info) + dat = dat_ + else + call mpi_reduce(dat,dat_,1,psb_mpi_ipk_integer,mpi_min,root_,ictxt,info) + if (iam == root_) dat = dat_ + endif endif #endif end subroutine psb_imins subroutine psb_iminv(ictxt,dat,root) + use psb_caf_mod use psb_realloc_mod #ifdef MPI_MOD use mpi @@ -877,39 +1158,57 @@ contains integer(psb_ipk_), intent(inout) :: dat(:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - integer(psb_ipk_), allocatable :: dat_(:) - integer(psb_mpik_) :: iam, np, info + integer(psb_ipk_), allocatable :: dat_(:), co_dat_(:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) call psb_info(ictxt,iam,np) - + me = this_image() if (present(root)) then root_ = root else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat),dat_,iinfo) - dat_=dat - if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& - & psb_mpi_ipk_integer,mpi_min,ictxt,info) - else - if (iam == root_) then - call psb_realloc(size(dat),dat_,iinfo) + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_min(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + call co_min(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call psb_realloc(size(dat,1),dat_,iinfo) dat_=dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_ipk_integer,mpi_min,root_,ictxt,info) + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_ipk_integer,mpi_min,ictxt,info) else - call psb_realloc(1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_ipk_integer,mpi_min,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_ipk_integer,mpi_min,root_,ictxt,info) + else + call psb_realloc(1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_ipk_integer,mpi_min,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_iminv subroutine psb_iminm(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -921,39 +1220,57 @@ contains integer(psb_ipk_), intent(inout) :: dat(:,:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - integer(psb_ipk_), allocatable :: dat_(:,:) - integer(psb_mpik_) :: iam, np, info + integer(psb_ipk_), allocatable :: dat_(:,:), co_dat_(:,:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) call psb_info(ictxt,iam,np) - + me = this_image() if (present(root)) then root_ = root else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) - dat_=dat - if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& - & psb_mpi_ipk_integer,mpi_min,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_min(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + call co_min(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) dat_=dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_ipk_integer,mpi_min,root_,ictxt,info) + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_ipk_integer,mpi_min,ictxt,info) else - call psb_realloc(1,1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_ipk_integer,mpi_min,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_ipk_integer,mpi_min,root_,ictxt,info) + else + call psb_realloc(1,1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_ipk_integer,mpi_min,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_iminm #if defined(LONG_INTEGERS) subroutine psb_i4mins(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -966,29 +1283,47 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ integer(psb_mpik_) :: dat_ - integer(psb_mpik_) :: iam, np, info + integer(psb_mpik_), allocatable :: co_dat_[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_mpik_) :: iinfo #if !defined(SERIAL_MPI) call psb_info(ictxt,iam,np) - + me = this_image() if (present(root)) then root_ = root else root_ = -1 endif - if (root_ == -1) then - call mpi_allreduce(dat,dat_,1,psb_mpi_def_integer,mpi_min,ictxt,info) - dat = dat_ - else - call mpi_reduce(dat,dat_,1,psb_mpi_def_integer,mpi_min,root_,ictxt,info) - if (iam == root_) dat = dat_ + if (if_caf2) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate(co_dat_[*]) + if (root_ == -1) then + co_dat_=dat + call co_min(co_dat_) + dat=co_dat_ + else + me = this_image() + co_dat_=dat + call co_min(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call mpi_allreduce(dat,dat_,1,psb_mpi_def_integer,mpi_min,ictxt,info) + dat = dat_ + else + call mpi_reduce(dat,dat_,1,psb_mpi_def_integer,mpi_min,root_,ictxt,info) + if (iam == root_) dat = dat_ + endif endif #endif end subroutine psb_i4mins subroutine psb_i4minv(ictxt,dat,root) + use psb_caf_mod use psb_realloc_mod #ifdef MPI_MOD use mpi @@ -1001,38 +1336,56 @@ contains integer(psb_mpik_), intent(inout) :: dat(:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - integer(psb_mpik_), allocatable :: dat_(:) - integer(psb_mpik_) :: iam, np, info + integer(psb_mpik_), allocatable :: dat_(:), co_dat_(:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_mpik_) :: iinfo #if !defined(SERIAL_MPI) call psb_info(ictxt,iam,np) - + me = this_image() if (present(root)) then root_ = root else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat),dat_,iinfo) - dat_=dat - if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& - & psb_mpi_def_integer,mpi_min,ictxt,info) - else - if (iam == root_) then - call psb_realloc(size(dat),dat_,iinfo) + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_min(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + call co_min(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call psb_realloc(size(dat,1),dat_,iinfo) dat_=dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_def_integer,mpi_min,root_,ictxt,info) + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_def_integer,mpi_min,ictxt,info) else - call psb_realloc(1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_def_integer,mpi_min,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_def_integer,mpi_min,root_,ictxt,info) + else + call psb_realloc(1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_def_integer,mpi_min,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_i4minv subroutine psb_i4minm(ictxt,dat,root) + use psb_caf_mod use psb_realloc_mod #ifdef MPI_MOD use mpi @@ -1045,33 +1398,50 @@ contains integer(psb_mpik_), intent(inout) :: dat(:,:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - integer(psb_mpik_), allocatable :: dat_(:,:) - integer(psb_mpik_) :: iam, np, info + integer(psb_mpik_), allocatable :: dat_(:,:), co_dat_(:,:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_mpik_) :: iinfo #if !defined(SERIAL_MPI) call psb_info(ictxt,iam,np) - + me = this_image() if (present(root)) then root_ = root else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) - dat_=dat - if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& - & psb_mpi_def_integer,mpi_min,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_min(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + call co_min(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) dat_=dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_def_integer,mpi_min,root_,ictxt,info) + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_def_integer,mpi_min,ictxt,info) else - call psb_realloc(1,1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_def_integer,mpi_min,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_def_integer,mpi_min,root_,ictxt,info) + else + call psb_realloc(1,1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_def_integer,mpi_min,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_i4minm @@ -1081,6 +1451,7 @@ contains #if !defined(LONG_INTEGERS) subroutine psb_i8mins(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -1093,29 +1464,47 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ integer(psb_long_int_k_) :: dat_ - integer(psb_mpik_) :: iam, np, info + integer(psb_long_int_k_), allocatable :: co_dat_[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) call psb_info(ictxt,iam,np) - + me = this_image() if (present(root)) then root_ = root else root_ = -1 endif - if (root_ == -1) then - call mpi_allreduce(dat,dat_,1,psb_mpi_lng_integer,mpi_min,ictxt,info) - dat = dat_ - else - call mpi_reduce(dat,dat_,1,psb_mpi_lng_integer,mpi_min,root_,ictxt,info) - if (iam == root_) dat = dat_ + if (if_caf2) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate(co_dat_[*]) + if (root_ == -1) then + co_dat_=dat + call co_min(co_dat_) + dat=co_dat_ + else + me = this_image() + co_dat_=dat + call co_min(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call mpi_allreduce(dat,dat_,1,psb_mpi_lng_integer,mpi_min,ictxt,info) + dat = dat_ + else + call mpi_reduce(dat,dat_,1,psb_mpi_lng_integer,mpi_min,root_,ictxt,info) + if (iam == root_) dat = dat_ + endif endif #endif end subroutine psb_i8mins subroutine psb_i8minv(ictxt,dat,root) + use psb_caf_mod use psb_realloc_mod #ifdef MPI_MOD use mpi @@ -1128,38 +1517,56 @@ contains integer(psb_long_int_k_), intent(inout) :: dat(:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - integer(psb_long_int_k_), allocatable :: dat_(:) - integer(psb_mpik_) :: iam, np, info + integer(psb_long_int_k_), allocatable :: dat_(:), co_dat_(:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) call psb_info(ictxt,iam,np) - + me = this_image() if (present(root)) then root_ = root else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat),dat_,iinfo) - dat_=dat - if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& - & psb_mpi_lng_integer,mpi_min,ictxt,info) - else - if (iam == root_) then - call psb_realloc(size(dat),dat_,iinfo) + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_min(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + call co_min(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call psb_realloc(size(dat,1),dat_,iinfo) dat_=dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_lng_integer,mpi_min,root_,ictxt,info) + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_lng_integer,mpi_min,ictxt,info) else - call psb_realloc(1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_lng_integer,mpi_min,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_lng_integer,mpi_min,root_,ictxt,info) + else + call psb_realloc(1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_lng_integer,mpi_min,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_i8minv subroutine psb_i8minm(ictxt,dat,root) + use psb_caf_mod use psb_realloc_mod #ifdef MPI_MOD use mpi @@ -1172,33 +1579,50 @@ contains integer(psb_long_int_k_), intent(inout) :: dat(:,:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - integer(psb_long_int_k_), allocatable :: dat_(:,:) - integer(psb_mpik_) :: iam, np, info + integer(psb_long_int_k_), allocatable :: dat_(:,:), co_dat_(:,:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) call psb_info(ictxt,iam,np) - + me = this_image() if (present(root)) then root_ = root else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) - dat_=dat - if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& - & psb_mpi_lng_integer,mpi_min,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_min(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + call co_min(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) dat_=dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_lng_integer,mpi_min,root_,ictxt,info) + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_lng_integer,mpi_min,ictxt,info) else - call psb_realloc(1,1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_lng_integer,mpi_min,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_lng_integer,mpi_min,root_,ictxt,info) + else + call psb_realloc(1,1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_lng_integer,mpi_min,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_i8minm @@ -1208,6 +1632,7 @@ contains subroutine psb_smins(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -1220,30 +1645,48 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ real(psb_spk_) :: dat_ - integer(psb_mpik_) :: iam, np, info + real(psb_spk_), allocatable :: co_dat_[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) call psb_info(ictxt,iam,np) - + me = this_image() if (present(root)) then root_ = root else root_ = -1 endif - if (root_ == -1) then - call mpi_allreduce(dat,dat_,1,psb_mpi_r_spk_,mpi_min,ictxt,info) - dat = dat_ - else - call mpi_reduce(dat,dat_,1,psb_mpi_r_spk_,mpi_min,root_,ictxt,info) - if (iam == root_) dat = dat_ + if (if_caf2) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate(co_dat_[*]) + if (root_ == -1) then + co_dat_=dat + call co_min(co_dat_) + dat=co_dat_ + else + me = this_image() + co_dat_=dat + call co_min(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call mpi_allreduce(dat,dat_,1,psb_mpi_r_spk_,mpi_min,ictxt,info) + dat = dat_ + else + call mpi_reduce(dat,dat_,1,psb_mpi_r_spk_,mpi_min,root_,ictxt,info) + if (iam == root_) dat = dat_ + endif endif #endif end subroutine psb_smins subroutine psb_sminv(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -1255,39 +1698,58 @@ contains real(psb_spk_), intent(inout) :: dat(:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - real(psb_spk_), allocatable :: dat_(:) - integer(psb_mpik_) :: iam, np, info + real(psb_spk_), allocatable :: dat_(:), co_dat_(:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) - call psb_info(ictxt,iam,np) + call psb_info(ictxt,iam,np) + me = this_image() if (present(root)) then root_ = root else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat),dat_,iinfo) - dat_ = dat - if (iinfo == psb_success_) & - & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_min,ictxt,info) - else - if (iam == root_) then - call psb_realloc(size(dat),dat_,iinfo) - dat_ = dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_min,root_,ictxt,info) + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_min(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ else - call psb_realloc(1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_spk_,mpi_min,root_,ictxt,info) - end if + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + call co_min(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call psb_realloc(size(dat,1),dat_,iinfo) + dat_=dat + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_r_spk_,mpi_min,ictxt,info) + else + if (iam == root_) then + call psb_realloc(size(dat),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_min,root_,ictxt,info) + else + call psb_realloc(1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_spk_,mpi_min,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_sminv subroutine psb_sminm(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -1299,38 +1761,55 @@ contains real(psb_spk_), intent(inout) :: dat(:,:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - real(psb_spk_), allocatable :: dat_(:,:) - integer(psb_mpik_) :: iam, np, info + real(psb_spk_), allocatable :: dat_(:,:), co_dat_(:,:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) - call psb_info(ictxt,iam,np) - + me = this_image() if (present(root)) then root_ = root else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) - dat_ = dat - if (iinfo == psb_success_)& - & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_min,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_min(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + call co_min(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) - dat_ = dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_min,root_,ictxt,info) + dat_=dat + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_r_spk_,mpi_min,ictxt,info) else - call psb_realloc(1,1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_spk_,mpi_min,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_min,root_,ictxt,info) + else + call psb_realloc(1,1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_spk_,mpi_min,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_sminm subroutine psb_dmins(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -1343,30 +1822,48 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ real(psb_dpk_) :: dat_ - integer(psb_mpik_) :: iam, np, info + real(psb_dpk_), allocatable :: co_dat_[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) call psb_info(ictxt,iam,np) - + me = this_image() if (present(root)) then root_ = root else root_ = -1 endif - if (root_ == -1) then - call mpi_allreduce(dat,dat_,1,psb_mpi_r_dpk_,mpi_min,ictxt,info) - dat = dat_ - else - call mpi_reduce(dat,dat_,1,psb_mpi_r_dpk_,mpi_min,root_,ictxt,info) - if (iam == root_) dat = dat_ + if (if_caf2) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate(co_dat_[*]) + if (root_ == -1) then + co_dat_=dat + call co_min(co_dat_) + dat=co_dat_ + else + me = this_image() + co_dat_=dat + call co_min(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call mpi_allreduce(dat,dat_,1,psb_mpi_r_dpk_,mpi_min,ictxt,info) + dat = dat_ + else + call mpi_reduce(dat,dat_,1,psb_mpi_r_dpk_,mpi_min,root_,ictxt,info) + if (iam == root_) dat = dat_ + endif endif #endif end subroutine psb_dmins subroutine psb_dminv(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -1378,40 +1875,58 @@ contains real(psb_dpk_), intent(inout) :: dat(:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - real(psb_dpk_), allocatable :: dat_(:) - integer(psb_mpik_) :: iam, np, info + real(psb_dpk_), allocatable :: dat_(:), co_dat_(:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) - call psb_info(ictxt,iam,np) + call psb_info(ictxt,iam,np) + me = this_image() if (present(root)) then root_ = root else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat),dat_,iinfo) - dat_ = dat - if (iinfo == psb_success_) & - & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_dpk_,& - & mpi_min,ictxt,info) - else - if (iam == root_) then - call psb_realloc(size(dat),dat_,iinfo) - dat_ = dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_dpk_,mpi_min,root_,ictxt,info) + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_min(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + call co_min(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call psb_realloc(size(dat,1),dat_,iinfo) + dat_=dat + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_min,ictxt,info) else - call psb_realloc(1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_dpk_,mpi_min,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_dpk_,mpi_min,root_,ictxt,info) + else + call psb_realloc(1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_dpk_,mpi_min,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_dminv subroutine psb_dminm(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -1423,33 +1938,49 @@ contains real(psb_dpk_), intent(inout) :: dat(:,:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - real(psb_dpk_), allocatable :: dat_(:,:) - integer(psb_mpik_) :: iam, np, info + real(psb_dpk_), allocatable :: dat_(:,:), co_dat_(:,:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) - call psb_info(ictxt,iam,np) - + me = this_image() if (present(root)) then root_ = root else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) - dat_ = dat - if (iinfo == psb_success_)& - & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_dpk_,mpi_min,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_min(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + call co_min(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) - dat_ = dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_dpk_,mpi_min,root_,ictxt,info) + dat_=dat + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_r_dpk_,mpi_min,ictxt,info) else - call psb_realloc(1,1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_dpk_,mpi_min,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_dpk_,mpi_min,root_,ictxt,info) + else + call psb_realloc(1,1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_dpk_,mpi_min,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_dminm @@ -1463,7 +1994,7 @@ contains subroutine psb_iamxs(ictxt,dat,root) - + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -1476,8 +2007,9 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ integer(psb_ipk_) :: dat_ + integer(psb_ipk_), allocatable :: co_dat_[:] integer(psb_mpik_) :: iam, np, info - integer(psb_ipk_) :: iinfo + integer(psb_ipk_) :: iinfo, me #if !defined(SERIAL_MPI) @@ -1488,18 +2020,36 @@ contains else root_ = -1 endif - if (root_ == -1) then - call mpi_allreduce(dat,dat_,1,psb_mpi_ipk_integer,mpi_iamx_op,ictxt,info) - dat = dat_ - else - call mpi_reduce(dat,dat_,1,psb_mpi_ipk_integer,mpi_iamx_op,root_,ictxt,info) - if (iam == root_) dat = dat_ - endif + if (if_caf2) then + me = this_image() + if (allocated(co_dat_)) deallocate(co_dat_) + allocate(co_dat_[*]) + if (root_ == -1) then + co_dat_=dat + call co_reduce(co_dat_, caf_iamx) + dat=co_dat_ + else + me = this_image() + co_dat_=dat + call co_reduce(co_dat_, caf_iamx, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call mpi_allreduce(dat,dat_,1,psb_mpi_ipk_integer,mpi_iamx_op,ictxt,info) + dat = dat_ + else + call mpi_reduce(dat,dat_,1,psb_mpi_ipk_integer,mpi_iamx_op,root_,ictxt,info) + if (iam == root_) dat = dat_ + endif + endif #endif end subroutine psb_iamxs subroutine psb_iamxv(ictxt,dat,root) + use psb_caf_mod use psb_realloc_mod #ifdef MPI_MOD use mpi @@ -1512,8 +2062,8 @@ contains integer(psb_ipk_), intent(inout) :: dat(:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - integer(psb_ipk_), allocatable :: dat_(:) - integer(psb_mpik_) :: iam, np, info + integer(psb_ipk_), allocatable :: dat_(:), co_dat_(:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) @@ -1525,26 +2075,45 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat),dat_,iinfo) - dat_=dat - if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& - & psb_mpi_ipk_integer,mpi_iamx_op,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + me = this_image() + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_reduce(co_dat_,caf_iamx) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + call co_reduce(co_dat_, caf_iamx, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat),dat_,iinfo) dat_=dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_ipk_integer,mpi_iamx_op,root_,ictxt,info) + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_ipk_integer,mpi_iamx_op,ictxt,info) else - call psb_realloc(1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_ipk_integer,mpi_iamx_op,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_ipk_integer,mpi_iamx_op,root_,ictxt,info) + else + call psb_realloc(1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_ipk_integer,mpi_iamx_op,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_iamxv subroutine psb_iamxm(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -1556,8 +2125,8 @@ contains integer(psb_ipk_), intent(inout) :: dat(:,:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - integer(psb_ipk_), allocatable :: dat_(:,:) - integer(psb_mpik_) :: iam, np, info + integer(psb_ipk_), allocatable :: dat_(:,:), co_dat_(:,:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo @@ -1569,20 +2138,38 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) - dat_=dat - if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& - & psb_mpi_ipk_integer,mpi_iamx_op,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + me = this_image() + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_reduce(co_dat_,caf_iamx) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + call co_reduce(co_dat_,caf_iamx, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) dat_=dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_ipk_integer,mpi_iamx_op,root_,ictxt,info) + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_ipk_integer,mpi_iamx_op,ictxt,info) else - call psb_realloc(1,1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_ipk_integer,mpi_iamx_op,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_ipk_integer,mpi_iamx_op,root_,ictxt,info) + else + call psb_realloc(1,1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_ipk_integer,mpi_iamx_op,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_iamxm @@ -1591,6 +2178,7 @@ contains #if defined(LONG_INTEGERS) subroutine psb_i4amxs(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -1603,6 +2191,7 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ integer(psb_mpik_) :: dat_ + integer(psb_mpik_), allocatable :: co_dat_[:] integer(psb_mpik_) :: iam, np, info integer(psb_mpik_) :: iinfo @@ -1615,19 +2204,36 @@ contains else root_ = -1 endif - if (root_ == -1) then - call mpi_allreduce(dat,dat_,1,psb_mpi_def_integer,mpi_i4amx_op,ictxt,info) - dat = dat_ - else - call mpi_reduce(dat,dat_,1,psb_mpi_def_integer,mpi_i4amx_op,root_,ictxt,info) - if (iam == root_) dat = dat_ - endif + if (if_caf2) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate(co_dat_[*]) + if (root_ == -1) then + co_dat_=abs(dat) + call co_max(co_dat_) + dat=co_dat_ + else + me = this_image() + co_dat_=abs(dat) + call co_max(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call mpi_allreduce(dat,dat_,1,psb_mpi_def_integer,mpi_i4amx_op,ictxt,info) + dat = dat_ + else + call mpi_reduce(dat,dat_,1,psb_mpi_def_integer,mpi_i4amx_op,root_,ictxt,info) + if (iam == root_) dat = dat_ + endif + endif #endif end subroutine psb_i4amxs subroutine psb_i4amxv(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -1639,8 +2245,8 @@ contains integer(psb_mpik_), intent(inout) :: dat(:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - integer(psb_mpik_), allocatable :: dat_(:) - integer(psb_mpik_) :: iam, np, info + integer(psb_mpik_), allocatable :: dat_(:), co_dat_(:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_mpik_) :: iinfo #if !defined(SERIAL_MPI) @@ -1652,26 +2258,51 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat),dat_,iinfo) - dat_=dat - if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& - & psb_mpi_def_integer,mpi_i4amx_op,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=abs(dat) + if (iinfo == psb_success_) call co_max(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (iam == root_) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=abs(dat) + call co_max(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(1)[*], STAT=iinfo) + call co_max(co_dat_, result_image=root_+1) + dat_=co_dat_ + end if + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat),dat_,iinfo) dat_=dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_def_integer,mpi_i4amx_op,root_,ictxt,info) + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_def_integer,mpi_i4amx_op,ictxt,info) else - call psb_realloc(1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_def_integer,mpi_i4amx_op,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_def_integer,mpi_i4amx_op,root_,ictxt,info) + else + call psb_realloc(1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_def_integer,mpi_i4amx_op,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_i4amxv subroutine psb_i4amxm(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -1683,8 +2314,8 @@ contains integer(psb_mpik_), intent(inout) :: dat(:,:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - integer(psb_mpik_), allocatable :: dat_(:,:) - integer(psb_mpik_) :: iam, np, info + integer(psb_mpik_), allocatable :: dat_(:,:), co_dat_(:,:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_mpik_) :: iinfo @@ -1696,20 +2327,44 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) - dat_=dat - if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& - & psb_mpi_def_integer,mpi_i4amx_op,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=abs(dat) + if (iinfo == psb_success_) call co_max(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (iam == root_) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=abs(dat) + call co_max(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(1,1)[*], STAT=iinfo) + call co_max(co_dat_, result_image=root_+1) + dat_=co_dat_ + end if + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) dat_=dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_def_integer,mpi_i4amx_op,root_,ictxt,info) + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_def_integer,mpi_i4amx_op,ictxt,info) else - call psb_realloc(1,1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_def_integer,mpi_i4amx_op,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_def_integer,mpi_i4amx_op,root_,ictxt,info) + else + call psb_realloc(1,1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_def_integer,mpi_i4amx_op,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_i4amxm @@ -1719,6 +2374,7 @@ contains #if !defined(LONG_INTEGERS) subroutine psb_i8amxs(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -1731,7 +2387,8 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ integer(psb_long_int_k_) :: dat_ - integer(psb_mpik_) :: iam, np, info + integer(psb_long_int_k_), allocatable :: co_dat_[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) @@ -1743,18 +2400,35 @@ contains else root_ = -1 endif - if (root_ == -1) then - call mpi_allreduce(dat,dat_,1,psb_mpi_lng_integer,mpi_i8amx_op,ictxt,info) - dat = dat_ - else - call mpi_reduce(dat,dat_,1,psb_mpi_lng_integer,mpi_i8amx_op,root_,ictxt,info) - if (iam == root_) dat = dat_ - endif + if (if_caf2) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate(co_dat_[*]) + if (root_ == -1) then + co_dat_=abs(dat) + call co_max(co_dat_) + dat=co_dat_ + else + me = this_image() + co_dat_=abs(dat) + call co_max(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call mpi_allreduce(dat,dat_,1,psb_mpi_lng_integer,mpi_i8amx_op,ictxt,info) + dat = dat_ + else + call mpi_reduce(dat,dat_,1,psb_mpi_lng_integer,mpi_i8amx_op,root_,ictxt,info) + if (iam == root_) dat = dat_ + endif + endif #endif end subroutine psb_i8amxs subroutine psb_i8amxv(ictxt,dat,root) + use psb_caf_mod use psb_realloc_mod #ifdef MPI_MOD use mpi @@ -1767,8 +2441,8 @@ contains integer(psb_long_int_k_), intent(inout) :: dat(:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - integer(psb_long_int_k_), allocatable :: dat_(:) - integer(psb_mpik_) :: iam, np, info + integer(psb_long_int_k_), allocatable :: dat_(:), co_dat_(:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) @@ -1780,26 +2454,51 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat),dat_,iinfo) - dat_=dat - if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& - & psb_mpi_lng_integer,mpi_i8amx_op,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=abs(dat) + if (iinfo == psb_success_) call co_max(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (iam == root_) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=abs(dat) + call co_max(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(1)[*], STAT=iinfo) + call co_max(co_dat_, result_image=root_+1) + dat_=co_dat_ + end if + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat),dat_,iinfo) dat_=dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_lng_integer,mpi_i8amx_op,root_,ictxt,info) + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_lng_integer,mpi_i8amx_op,ictxt,info) else - call psb_realloc(1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_lng_integer,mpi_i8amx_op,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_lng_integer,mpi_i8amx_op,root_,ictxt,info) + else + call psb_realloc(1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_lng_integer,mpi_i8amx_op,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_i8amxv subroutine psb_i8amxm(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -1811,8 +2510,8 @@ contains integer(psb_long_int_k_), intent(inout) :: dat(:,:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - integer(psb_long_int_k_), allocatable :: dat_(:,:) - integer(psb_mpik_) :: iam, np, info + integer(psb_long_int_k_), allocatable :: dat_(:,:), co_dat_(:,:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo @@ -1824,20 +2523,44 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) - dat_=dat - if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& - & psb_mpi_lng_integer,mpi_i8amx_op,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=abs(dat) + if (iinfo == psb_success_) call co_max(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (iam == root_) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=abs(dat) + call co_max(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(1,1)[*], STAT=iinfo) + call co_max(co_dat_, result_image=root_+1) + dat_=co_dat_ + end if + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) dat_=dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_lng_integer,mpi_i8amx_op,root_,ictxt,info) + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_lng_integer,mpi_i8amx_op,ictxt,info) else - call psb_realloc(1,1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_lng_integer,mpi_i8amx_op,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_lng_integer,mpi_i8amx_op,root_,ictxt,info) + else + call psb_realloc(1,1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_lng_integer,mpi_i8amx_op,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_i8amxm @@ -1847,6 +2570,7 @@ contains subroutine psb_samxs(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -1859,11 +2583,13 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ real(psb_spk_) :: dat_ - integer(psb_mpik_) :: iam, np, info + real(psb_spk_), allocatable :: co_dat_[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) + call psb_info(ictxt,iam,np) if (present(root)) then @@ -1871,6 +2597,23 @@ contains else root_ = -1 endif + + if (if_caf2) then + me = this_image() + if (allocated(co_dat_)) deallocate(co_dat_) + allocate(co_dat_[*]) + if (root_ == -1) then + co_dat_=dat + call co_reduce(co_dat_, caf_samx) + dat=co_dat_ + else + me = this_image() + co_dat_=dat + call co_reduce(co_dat_, caf_samx, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else if (root_ == -1) then call mpi_allreduce(dat,dat_,1,psb_mpi_r_spk_,mpi_samx_op,ictxt,info) dat = dat_ @@ -1878,11 +2621,13 @@ contains call mpi_reduce(dat,dat_,1,psb_mpi_r_spk_,mpi_samx_op,root_,ictxt,info) if (iam == root_) dat = dat_ endif + endif #endif end subroutine psb_samxs subroutine psb_samxv(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -1894,12 +2639,13 @@ contains real(psb_spk_), intent(inout) :: dat(:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - real(psb_spk_), allocatable :: dat_(:) - integer(psb_mpik_) :: iam, np, info + real(psb_spk_), allocatable :: dat_(:), co_dat_(:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) + call psb_info(ictxt,iam,np) if (present(root)) then @@ -1907,26 +2653,45 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat),dat_,iinfo) - dat_ = dat - if (iinfo == psb_success_) & - & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_samx_op,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + me = this_image() + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_reduce(co_dat_,caf_samx) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + call co_reduce(co_dat_, caf_samx, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat),dat_,iinfo) dat_ = dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_samx_op,root_,ictxt,info) + if (iinfo == psb_success_) & + & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_samx_op,ictxt,info) else - call psb_realloc(1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_spk_,mpi_samx_op,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat),dat_,iinfo) + dat_ = dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_samx_op,root_,ictxt,info) + else + call psb_realloc(1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_spk_,mpi_samx_op,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_samxv subroutine psb_samxm(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -1938,12 +2703,11 @@ contains real(psb_spk_), intent(inout) :: dat(:,:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - real(psb_spk_), allocatable :: dat_(:,:) - integer(psb_mpik_) :: iam, np, info + real(psb_spk_), allocatable :: dat_(:,:), co_dat_(:,:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) - call psb_info(ictxt,iam,np) if (present(root)) then @@ -1951,25 +2715,44 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) - dat_ = dat - if (iinfo == psb_success_)& - & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_samx_op,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + me = this_image() + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_reduce(co_dat_,caf_samx) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + call co_reduce(co_dat_,caf_samx, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) dat_ = dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_samx_op,root_,ictxt,info) + if (iinfo == psb_success_)& + & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_samx_op,ictxt,info) else - call psb_realloc(1,1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_spk_,mpi_samx_op,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) + dat_ = dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_samx_op,root_,ictxt,info) + else + call psb_realloc(1,1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_spk_,mpi_samx_op,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_samxm subroutine psb_damxs(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -1982,11 +2765,13 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ real(psb_dpk_) :: dat_ - integer(psb_mpik_) :: iam, np, info + real(psb_dpk_), allocatable :: co_dat_[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) + call psb_info(ictxt,iam,np) if (present(root)) then @@ -1994,18 +2779,38 @@ contains else root_ = -1 endif - if (root_ == -1) then - call mpi_allreduce(dat,dat_,1,psb_mpi_r_dpk_,mpi_damx_op,ictxt,info) - dat = dat_ - else - call mpi_reduce(dat,dat_,1,psb_mpi_r_dpk_,mpi_damx_op,root_,ictxt,info) - if (iam == root_) dat = dat_ + + if (if_caf2) then + me = this_image() + if (allocated(co_dat_)) deallocate(co_dat_) + allocate(co_dat_[*]) + if (root_ == -1) then + co_dat_=dat + sync all + call co_reduce(co_dat_, caf_damx) + dat=co_dat_ + else + me = this_image() + co_dat_=dat + call co_reduce(co_dat_, caf_damx, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call mpi_allreduce(dat,dat_,1,psb_mpi_r_dpk_,mpi_damx_op,ictxt,info) + dat = dat_ + else + call mpi_reduce(dat,dat_,1,psb_mpi_r_dpk_,mpi_damx_op,root_,ictxt,info) + if (iam == root_) dat = dat_ + endif endif #endif end subroutine psb_damxs subroutine psb_damxv(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -2017,12 +2822,13 @@ contains real(psb_dpk_), intent(inout) :: dat(:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - real(psb_dpk_), allocatable :: dat_(:) - integer(psb_mpik_) :: iam, np, info + real(psb_dpk_), allocatable :: dat_(:), co_dat_(:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) + call psb_info(ictxt,iam,np) if (present(root)) then @@ -2030,27 +2836,46 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat),dat_,iinfo) - dat_ = dat - if (iinfo == psb_success_) & - & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_dpk_,& - & mpi_damx_op,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + me = this_image() + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_reduce(co_dat_,caf_damx) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + call co_reduce(co_dat_, caf_damx, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat),dat_,iinfo) dat_ = dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_dpk_,mpi_damx_op,root_,ictxt,info) + if (iinfo == psb_success_) & + & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_dpk_,& + & mpi_damx_op,ictxt,info) else - call psb_realloc(1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_dpk_,mpi_damx_op,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat),dat_,iinfo) + dat_ = dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_dpk_,mpi_damx_op,root_,ictxt,info) + else + call psb_realloc(1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_dpk_,mpi_damx_op,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_damxv subroutine psb_damxm(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -2062,12 +2887,11 @@ contains real(psb_dpk_), intent(inout) :: dat(:,:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - real(psb_dpk_), allocatable :: dat_(:,:) - integer(psb_mpik_) :: iam, np, info + real(psb_dpk_), allocatable :: dat_(:,:), co_dat_(:,:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) - call psb_info(ictxt,iam,np) if (present(root)) then @@ -2075,26 +2899,45 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) - dat_ = dat - if (iinfo == psb_success_)& - & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_dpk_,mpi_damx_op,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + me = this_image() + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_reduce(co_dat_,caf_damx) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + call co_reduce(co_dat_,caf_damx, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) dat_ = dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_dpk_,mpi_damx_op,root_,ictxt,info) + if (iinfo == psb_success_)& + & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_dpk_,mpi_damx_op,ictxt,info) else - call psb_realloc(1,1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_dpk_,mpi_damx_op,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) + dat_ = dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_dpk_,mpi_damx_op,root_,ictxt,info) + else + call psb_realloc(1,1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_dpk_,mpi_damx_op,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_damxm subroutine psb_camxs(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -2107,11 +2950,13 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ complex(psb_spk_) :: dat_ - integer(psb_mpik_) :: iam, np, info + complex(psb_spk_), allocatable :: co_dat_[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) + call psb_info(ictxt,iam,np) if (present(root)) then @@ -2119,17 +2964,36 @@ contains else root_ = -1 endif - if (root_ == -1) then - call mpi_allreduce(dat,dat_,1,psb_mpi_c_spk_,mpi_camx_op,ictxt,info) - dat = dat_ - else - call mpi_reduce(dat,dat_,1,psb_mpi_c_spk_,mpi_camx_op,root_,ictxt,info) - if (iam == root_) dat = dat_ + + if (if_caf2) then + me = this_image() + if (allocated(co_dat_)) deallocate(co_dat_) + allocate(co_dat_[*]) + if (root_ == -1) then + co_dat_=dat + call caf_amx_reduce(co_dat_) + dat=co_dat_ + else + me = this_image() + co_dat_=dat + call caf_amx_reduce(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call mpi_allreduce(dat,dat_,1,psb_mpi_c_spk_,mpi_camx_op,ictxt,info) + dat = dat_ + else + call mpi_reduce(dat,dat_,1,psb_mpi_c_spk_,mpi_camx_op,root_,ictxt,info) + if (iam == root_) dat = dat_ + endif endif #endif end subroutine psb_camxs subroutine psb_camxv(ictxt,dat,root) + use psb_caf_mod use psb_realloc_mod #ifdef MPI_MOD use mpi @@ -2143,11 +3007,13 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ complex(psb_spk_), allocatable :: dat_(:) - integer(psb_mpik_) :: iam, np, info + complex(psb_spk_), allocatable :: co_dat_(:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) + call psb_info(ictxt,iam,np) if (present(root)) then @@ -2155,25 +3021,44 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat),dat_,iinfo) - dat_ = dat - if (iinfo == psb_success_) & - & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_c_spk_,mpi_camx_op,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + me = this_image() + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call caf_amx_reduce(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + call caf_amx_reduce(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat),dat_,iinfo) dat_ = dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_c_spk_,mpi_camx_op,root_,ictxt,info) + if (iinfo == psb_success_) & + & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_c_spk_,mpi_camx_op,ictxt,info) else - call psb_realloc(1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_c_spk_,mpi_camx_op,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat),dat_,iinfo) + dat_ = dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_c_spk_,mpi_camx_op,root_,ictxt,info) + else + call psb_realloc(1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_c_spk_,mpi_camx_op,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_camxv subroutine psb_camxm(ictxt,dat,root) + use psb_caf_mod use psb_realloc_mod #ifdef MPI_MOD use mpi @@ -2187,11 +3072,11 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ complex(psb_spk_), allocatable :: dat_(:,:) - integer(psb_mpik_) :: iam, np, info + complex(psb_spk_), allocatable :: co_dat_(:,:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) - call psb_info(ictxt,iam,np) if (present(root)) then @@ -2199,25 +3084,44 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) - dat_ = dat - if (iinfo == psb_success_)& - & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_c_spk_,mpi_camx_op,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + me = this_image() + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call caf_amx_reduce(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + call caf_amx_reduce(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) dat_ = dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_c_spk_,mpi_camx_op,root_,ictxt,info) + if (iinfo == psb_success_)& + & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_c_spk_,mpi_camx_op,ictxt,info) else - call psb_realloc(1,1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_c_spk_,mpi_camx_op,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) + dat_ = dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_c_spk_,mpi_camx_op,root_,ictxt,info) + else + call psb_realloc(1,1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_c_spk_,mpi_camx_op,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_camxm subroutine psb_zamxs(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -2230,11 +3134,13 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ complex(psb_dpk_) :: dat_ - integer(psb_mpik_) :: iam, np, info + complex(psb_dpk_), allocatable :: co_dat_[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) + call psb_info(ictxt,iam,np) if (present(root)) then @@ -2242,17 +3148,36 @@ contains else root_ = -1 endif - if (root_ == -1) then - call mpi_allreduce(dat,dat_,1,psb_mpi_c_dpk_,mpi_zamx_op,ictxt,info) - dat = dat_ - else - call mpi_reduce(dat,dat_,1,psb_mpi_c_dpk_,mpi_zamx_op,root_,ictxt,info) - if (iam == root_) dat = dat_ + + if (if_caf2) then + me = this_image() + if (allocated(co_dat_)) deallocate(co_dat_) + allocate(co_dat_[*]) + if (root_ == -1) then + co_dat_=dat + call caf_amx_reduce(co_dat_) + dat=co_dat_ + else + me = this_image() + co_dat_=dat + call caf_amx_reduce(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call mpi_allreduce(dat,dat_,1,psb_mpi_c_dpk_,mpi_zamx_op,ictxt,info) + dat = dat_ + else + call mpi_reduce(dat,dat_,1,psb_mpi_c_dpk_,mpi_zamx_op,root_,ictxt,info) + if (iam == root_) dat = dat_ + endif endif #endif end subroutine psb_zamxs subroutine psb_zamxv(ictxt,dat,root) + use psb_caf_mod use psb_realloc_mod #ifdef MPI_MOD use mpi @@ -2266,11 +3191,13 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ complex(psb_dpk_), allocatable :: dat_(:) - integer(psb_mpik_) :: iam, np, info + complex(psb_dpk_), allocatable :: co_dat_(:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) + call psb_info(ictxt,iam,np) if (present(root)) then @@ -2278,27 +3205,46 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat),dat_,iinfo) - dat_ = dat - if (iinfo == psb_success_) & - & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_c_dpk_,& - & mpi_zamx_op,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + me = this_image() + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call caf_amx_reduce(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + call caf_amx_reduce(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat),dat_,iinfo) dat_ = dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_c_dpk_,mpi_zamx_op,root_,ictxt,info) + if (iinfo == psb_success_) & + & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_c_dpk_,& + & mpi_zamx_op,ictxt,info) else - call psb_realloc(1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_c_dpk_,mpi_zamx_op,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat),dat_,iinfo) + dat_ = dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_c_dpk_,mpi_zamx_op,root_,ictxt,info) + else + call psb_realloc(1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_c_dpk_,mpi_zamx_op,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_zamxv subroutine psb_zamxm(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -2311,11 +3257,11 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ complex(psb_dpk_), allocatable :: dat_(:,:) - integer(psb_mpik_) :: iam, np, info + complex(psb_dpk_), allocatable :: co_dat_(:,:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) - call psb_info(ictxt,iam,np) if (present(root)) then @@ -2323,26 +3269,45 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) - dat_ = dat - if (iinfo == psb_success_)& - & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_c_dpk_,mpi_zamx_op,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + me = this_image() + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call caf_amx_reduce(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + call caf_amx_reduce(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) dat_ = dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_c_dpk_,mpi_zamx_op,root_,ictxt,info) + if (iinfo == psb_success_)& + & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_c_dpk_,mpi_zamx_op,ictxt,info) else - call psb_realloc(1,1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_c_dpk_,mpi_zamx_op,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) + dat_ = dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_c_dpk_,mpi_zamx_op,root_,ictxt,info) + else + call psb_realloc(1,1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_c_dpk_,mpi_zamx_op,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_zamxm + ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! ! AMN: minimum absolute value @@ -2350,7 +3315,8 @@ contains ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! subroutine psb_iamns(ictxt,dat,root) - + + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -2363,7 +3329,8 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ integer(psb_ipk_) :: dat_ - integer(psb_mpik_) :: iam, np, info + integer(psb_ipk_), allocatable :: co_dat_[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) @@ -2375,19 +3342,37 @@ contains else root_ = -1 endif - if (root_ == -1) then - call mpi_allreduce(dat,dat_,1,psb_mpi_ipk_integer,mpi_iamn_op,ictxt,info) - dat = dat_ - else - call mpi_reduce(dat,dat_,1,psb_mpi_ipk_integer,mpi_iamn_op,root_,ictxt,info) - if (iam == root_) dat = dat_ - endif + if (if_caf2) then + me = this_image() + if (allocated(co_dat_)) deallocate(co_dat_) + allocate(co_dat_[*]) + if (root_ == -1) then + co_dat_=dat + call co_reduce(co_dat_, caf_iamn) + dat=co_dat_ + else + me = this_image() + co_dat_=dat + call co_reduce(co_dat_, caf_iamn, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call mpi_allreduce(dat,dat_,1,psb_mpi_ipk_integer,mpi_iamn_op,ictxt,info) + dat = dat_ + else + call mpi_reduce(dat,dat_,1,psb_mpi_ipk_integer,mpi_iamn_op,root_,ictxt,info) + if (iam == root_) dat = dat_ + endif + endif #endif end subroutine psb_iamns subroutine psb_iamnv(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -2399,8 +3384,8 @@ contains integer(psb_ipk_), intent(inout) :: dat(:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - integer(psb_ipk_), allocatable :: dat_(:) - integer(psb_mpik_) :: iam, np, info + integer(psb_ipk_), allocatable :: dat_(:), co_dat_(:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) @@ -2412,26 +3397,46 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat),dat_,iinfo) - dat_=dat - if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& - & psb_mpi_ipk_integer,mpi_iamn_op,ictxt,info) - else - if (iam == root_) then + + if (if_caf2) then + me = this_image() + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_reduce(co_dat_,caf_iamn) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + call co_reduce(co_dat_, caf_iamn, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat),dat_,iinfo) dat_=dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_ipk_integer,mpi_iamn_op,root_,ictxt,info) - else - call psb_realloc(1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_ipk_integer,mpi_iamn_op,root_,ictxt,info) - end if + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_ipk_integer,mpi_iamn_op,ictxt,info) + else + if (iam == root_) then + call psb_realloc(size(dat),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_ipk_integer,mpi_iamn_op,root_,ictxt,info) + else + call psb_realloc(1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_ipk_integer,mpi_iamn_op,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_iamnv subroutine psb_iamnm(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -2443,8 +3448,8 @@ contains integer(psb_ipk_), intent(inout) :: dat(:,:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - integer(psb_ipk_), allocatable :: dat_(:,:) - integer(psb_mpik_) :: iam, np, info + integer(psb_ipk_), allocatable :: dat_(:,:), co_dat_(:,:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo @@ -2456,20 +3461,38 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) - dat_=dat - if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& - & psb_mpi_ipk_integer,mpi_iamn_op,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + me = this_image() + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_reduce(co_dat_,caf_iamn) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + call co_reduce(co_dat_,caf_iamn, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) dat_=dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_ipk_integer,mpi_iamn_op,root_,ictxt,info) + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_ipk_integer,mpi_iamn_op,ictxt,info) else - call psb_realloc(1,1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_ipk_integer,mpi_iamn_op,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_ipk_integer,mpi_iamn_op,root_,ictxt,info) + else + call psb_realloc(1,1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_ipk_integer,mpi_iamn_op,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_iamnm @@ -2478,6 +3501,7 @@ contains #if defined(LONG_INTEGERS) subroutine psb_i4amns(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -2490,7 +3514,8 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ integer(psb_mpik_) :: dat_ - integer(psb_mpik_) :: iam, np, info + integer(psb_mpik_), allocatable :: co_dat_[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_mpik_) :: iinfo #if !defined(SERIAL_MPI) @@ -2502,12 +3527,26 @@ contains else root_ = -1 endif - if (root_ == -1) then - call mpi_allreduce(dat,dat_,1,psb_mpi_def_integer,mpi_i4amn_op,ictxt,info) - dat = dat_ - else - call mpi_reduce(dat,dat_,1,psb_mpi_def_integer,mpi_i4amn_op,root_,ictxt,info) - if (iam == root_) dat = dat_ + + if (if_caf) then + if (root_ == -1) then + co_dat_=abs(dat) + call co_min(co_dat_) + dat=co_dat_ + else + me = this_image() + co_dat_=abs(dat) + call co_min(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + else + if (root_ == -1) then + call mpi_allreduce(dat,dat_,1,psb_mpi_def_integer,,mpi_i4amn_op,ictxt,info) + dat = dat_ + else + call mpi_reduce(dat,dat_,1,psb_mpi_def_integer,,mpi_i4amn_op,root_,ictxt,info) + if (iam == root_) dat = dat_ + endif endif #endif @@ -2515,6 +3554,7 @@ contains subroutine psb_i4amnv(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -2526,8 +3566,8 @@ contains integer(psb_mpik_), intent(inout) :: dat(:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - integer(psb_mpik_), allocatable :: dat_(:) - integer(psb_mpik_) :: iam, np, info + integer(psb_mpik_), allocatable :: dat_(:), co_dat_(:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_mpik_) :: iinfo #if !defined(SERIAL_MPI) @@ -2539,26 +3579,51 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat),dat_,iinfo) - dat_=dat - if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& - & psb_mpi_def_integer,mpi_i4amn_op,ictxt,info) - else - if (iam == root_) then + if (if_caf) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=abs(dat) + if (iinfo == psb_success_) call co_min(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (iam == root_) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=abs(dat) + call co_min(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(1)[*], STAT=iinfo) + call co_min(co_dat_, result_image=root_+1) + dat_=co_dat_ + end if + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat),dat_,iinfo) dat_=dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_def_integer,mpi_i4amn_op,root_,ictxt,info) + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_def_integer,mpi_i4amn_op,ictxt,info) else - call psb_realloc(1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_def_integer,mpi_i4amn_op,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_def_integer,mpi_i4amn_op,root_,ictxt,info) + else + call psb_realloc(1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_def_integer,mpi_i4amn_op,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_i4amnv subroutine psb_i4amnm(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -2570,8 +3635,8 @@ contains integer(psb_mpik_), intent(inout) :: dat(:,:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - integer(psb_mpik_), allocatable :: dat_(:,:) - integer(psb_mpik_) :: iam, np, info + integer(psb_mpik_), allocatable :: dat_(:,:), co_dat_(:,:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_mpik_) :: iinfo @@ -2583,20 +3648,44 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) - dat_=dat - if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& - & psb_mpi_def_integer,mpi_i4amn_op,ictxt,info) - else - if (iam == root_) then + if (if_caf) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=abs(dat) + if (iinfo == psb_success_) call co_min(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (iam == root_) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=abs(dat) + call co_min(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(1,1)[*], STAT=iinfo) + call co_min(co_dat_, result_image=root_+1) + dat_=co_dat_ + end if + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) dat_=dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_def_integer,mpi_i4amn_op,root_,ictxt,info) + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_def_integer,mpi_i4amn_op,ictxt,info) else - call psb_realloc(1,1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_def_integer,mpi_i4amn_op,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_def_integer,mpi_i4amn_op,root_,ictxt,info) + else + call psb_realloc(1,1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_def_integer,mpi_i4amn_op,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_i4amnm @@ -2606,6 +3695,7 @@ contains #if !defined(LONG_INTEGERS) subroutine psb_i8amns(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -2618,7 +3708,8 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ integer(psb_long_int_k_) :: dat_ - integer(psb_mpik_) :: iam, np, info + integer(psb_long_int_k_), save :: co_dat_[*] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) @@ -2630,19 +3721,32 @@ contains else root_ = -1 endif - if (root_ == -1) then - call mpi_allreduce(dat,dat_,1,psb_mpi_lng_integer,mpi_i8amn_op,ictxt,info) - dat = dat_ - else - call mpi_reduce(dat,dat_,1,psb_mpi_lng_integer,mpi_i8amn_op,root_,ictxt,info) - if (iam == root_) dat = dat_ + if (if_caf) then + if (root_ == -1) then + co_dat_=abs(dat) + call co_min(co_dat_) + dat=co_dat_ + else + me = this_image() + co_dat_=abs(dat) + call co_min(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + else + if (root_ == -1) then + call mpi_allreduce(dat,dat_,1,psb_mpi_lng_integer,mpi_i8amn_op,ictxt,info) + dat = dat_ + else + call mpi_reduce(dat,dat_,1,psb_mpi_lng_integer,mpi_i8amn_op,root_,ictxt,info) + if (iam == root_) dat = dat_ + endif endif - #endif end subroutine psb_i8amns subroutine psb_i8amnv(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -2654,8 +3758,8 @@ contains integer(psb_long_int_k_), intent(inout) :: dat(:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - integer(psb_long_int_k_), allocatable :: dat_(:) - integer(psb_mpik_) :: iam, np, info + integer(psb_long_int_k_), allocatable :: dat_(:), co_dat_(:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) @@ -2667,26 +3771,51 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat),dat_,iinfo) - dat_=dat - if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& - & psb_mpi_lng_integer,mpi_i8amn_op,ictxt,info) - else - if (iam == root_) then + if (if_caf) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=abs(dat) + if (iinfo == psb_success_) call co_min(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (iam == root_) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=abs(dat) + call co_min(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(1)[*], STAT=iinfo) + call co_min(co_dat_, result_image=root_+1) + dat_=co_dat_ + end if + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat),dat_,iinfo) dat_=dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_lng_integer,mpi_i8amn_op,root_,ictxt,info) + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_lng_integer,mpi_i8amn_op,ictxt,info) else - call psb_realloc(1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_lng_integer,mpi_i8amn_op,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_lng_integer,mpi_i8amn_op,root_,ictxt,info) + else + call psb_realloc(1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_lng_integer,mpi_i8amn_op,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_i8amnv subroutine psb_i8amnm(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -2698,8 +3827,8 @@ contains integer(psb_long_int_k_), intent(inout) :: dat(:,:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - integer(psb_long_int_k_), allocatable :: dat_(:,:) - integer(psb_mpik_) :: iam, np, info + integer(psb_long_int_k_), allocatable :: dat_(:,:), co_dat_(:,:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo @@ -2711,20 +3840,44 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) - dat_=dat - if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& - & psb_mpi_lng_integer,mpi_i8amn_op,ictxt,info) - else - if (iam == root_) then + if (if_caf) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=abs(dat) + if (iinfo == psb_success_) call co_min(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (iam == root_) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=abs(dat) + call co_min(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(1,1)[*], STAT=iinfo) + call co_min(co_dat_, result_image=root_+1) + dat_=co_dat_ + end if + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) dat_=dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_lng_integer,mpi_i8amn_op,root_,ictxt,info) + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_lng_integer,mpi_i8amn_op,ictxt,info) else - call psb_realloc(1,1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_lng_integer,mpi_i8amn_op,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_lng_integer,mpi_i8amn_op,root_,ictxt,info) + else + call psb_realloc(1,1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_lng_integer,mpi_i8amn_op,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_i8amnm @@ -2734,6 +3887,7 @@ contains subroutine psb_samns(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -2746,7 +3900,8 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ real(psb_spk_) :: dat_ - integer(psb_mpik_) :: iam, np, info + real(psb_spk_), allocatable :: co_dat_[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo @@ -2758,18 +3913,36 @@ contains else root_ = -1 endif - if (root_ == -1) then - call mpi_allreduce(dat,dat_,1,psb_mpi_r_spk_,mpi_samn_op,ictxt,info) - dat = dat_ - else - call mpi_reduce(dat,dat_,1,psb_mpi_r_spk_,mpi_samn_op,root_,ictxt,info) - if (iam == root_) dat = dat_ + if (if_caf2) then + me = this_image() + if (allocated(co_dat_)) deallocate(co_dat_) + allocate(co_dat_[*]) + if (root_ == -1) then + co_dat_=dat + call co_reduce(co_dat_, caf_samn) + dat=co_dat_ + else + me = this_image() + co_dat_=dat + call co_reduce(co_dat_, caf_samn, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call mpi_allreduce(dat,dat_,1,psb_mpi_r_spk_,mpi_samn_op,ictxt,info) + dat = dat_ + else + call mpi_reduce(dat,dat_,1,psb_mpi_r_spk_,mpi_samn_op,root_,ictxt,info) + if (iam == root_) dat = dat_ + endif endif #endif end subroutine psb_samns subroutine psb_samnv(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -2781,8 +3954,8 @@ contains real(psb_spk_), intent(inout) :: dat(:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - real(psb_spk_), allocatable :: dat_(:) - integer(psb_mpik_) :: iam, np, info + real(psb_spk_), allocatable :: dat_(:), co_dat_(:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo @@ -2794,26 +3967,45 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat),dat_,iinfo) - dat_ = dat - if (iinfo == psb_success_) & - & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_samn_op,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + me = this_image() + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_reduce(co_dat_,caf_samn) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + call co_reduce(co_dat_, caf_samn, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat),dat_,iinfo) dat_ = dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_samn_op,root_,ictxt,info) + if (iinfo == psb_success_) & + & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_samn_op,ictxt,info) else - call psb_realloc(1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_spk_,mpi_samn_op,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat),dat_,iinfo) + dat_ = dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_samn_op,root_,ictxt,info) + else + call psb_realloc(1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_spk_,mpi_samn_op,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_samnv subroutine psb_samnm(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -2825,8 +4017,8 @@ contains real(psb_spk_), intent(inout) :: dat(:,:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - real(psb_spk_), allocatable :: dat_(:,:) - integer(psb_mpik_) :: iam, np, info + real(psb_spk_), allocatable :: dat_(:,:), co_dat_(:,:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) @@ -2838,25 +4030,45 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) - dat_ = dat - if (iinfo == psb_success_)& - & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_samn_op,ictxt,info) - else - if (iam == root_) then + + if (if_caf2) then + me = this_image() + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_reduce(co_dat_,caf_samn) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + call co_reduce(co_dat_,caf_samn, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) dat_ = dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_samn_op,root_,ictxt,info) + if (iinfo == psb_success_)& + & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_samn_op,ictxt,info) else - call psb_realloc(1,1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_spk_,mpi_samn_op,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) + dat_ = dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_samn_op,root_,ictxt,info) + else + call psb_realloc(1,1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_spk_,mpi_samn_op,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_samnm subroutine psb_damns(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -2869,7 +4081,8 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ real(psb_dpk_) :: dat_ - integer(psb_mpik_) :: iam, np, info + real(psb_dpk_), allocatable :: co_dat_[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo @@ -2881,18 +4094,37 @@ contains else root_ = -1 endif - if (root_ == -1) then - call mpi_allreduce(dat,dat_,1,psb_mpi_r_dpk_,mpi_damn_op,ictxt,info) - dat = dat_ - else - call mpi_reduce(dat,dat_,1,psb_mpi_r_dpk_,mpi_damn_op,root_,ictxt,info) - if (iam == root_) dat = dat_ + + if (if_caf2) then + me = this_image() + if (allocated(co_dat_)) deallocate(co_dat_) + allocate(co_dat_[*]) + if (root_ == -1) then + co_dat_=dat + call co_reduce(co_dat_, caf_damn) + dat=co_dat_ + else + me = this_image() + co_dat_=dat + call co_reduce(co_dat_, caf_damn, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call mpi_allreduce(dat,dat_,1,psb_mpi_r_dpk_,mpi_damn_op,ictxt,info) + dat = dat_ + else + call mpi_reduce(dat,dat_,1,psb_mpi_r_dpk_,mpi_damn_op,root_,ictxt,info) + if (iam == root_) dat = dat_ + endif endif #endif end subroutine psb_damns subroutine psb_damnv(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -2904,8 +4136,8 @@ contains real(psb_dpk_), intent(inout) :: dat(:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - real(psb_dpk_), allocatable :: dat_(:) - integer(psb_mpik_) :: iam, np, info + real(psb_dpk_), allocatable :: dat_(:), co_dat_(:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo @@ -2917,27 +4149,46 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat),dat_,iinfo) - dat_ = dat - if (iinfo == psb_success_) & - & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_dpk_,& - & mpi_damn_op,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + me = this_image() + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_reduce(co_dat_,caf_damn) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + call co_reduce(co_dat_, caf_damn, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat),dat_,iinfo) dat_ = dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_dpk_,mpi_damn_op,root_,ictxt,info) + if (iinfo == psb_success_) & + & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_dpk_,& + & mpi_damn_op,ictxt,info) else - call psb_realloc(1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_dpk_,mpi_damn_op,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat),dat_,iinfo) + dat_ = dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_dpk_,mpi_damn_op,root_,ictxt,info) + else + call psb_realloc(1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_dpk_,mpi_damn_op,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_damnv subroutine psb_damnm(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -2949,8 +4200,8 @@ contains real(psb_dpk_), intent(inout) :: dat(:,:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - real(psb_dpk_), allocatable :: dat_(:,:) - integer(psb_mpik_) :: iam, np, info + real(psb_dpk_), allocatable :: dat_(:,:), co_dat_(:,:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) @@ -2962,26 +4213,45 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) - dat_ = dat - if (iinfo == psb_success_)& - & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_dpk_,mpi_damn_op,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + me = this_image() + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_reduce(co_dat_,caf_damn) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + call co_reduce(co_dat_,caf_damn, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) dat_ = dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_dpk_,mpi_damn_op,root_,ictxt,info) + if (iinfo == psb_success_)& + & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_dpk_,mpi_damn_op,ictxt,info) else - call psb_realloc(1,1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_dpk_,mpi_damn_op,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) + dat_ = dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_dpk_,mpi_damn_op,root_,ictxt,info) + else + call psb_realloc(1,1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_dpk_,mpi_damn_op,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_damnm subroutine psb_camns(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -2994,7 +4264,8 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ complex(psb_spk_) :: dat_ - integer(psb_mpik_) :: iam, np, info + complex(psb_spk_), allocatable :: co_dat_[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo @@ -3006,18 +4277,38 @@ contains else root_ = -1 endif - if (root_ == -1) then - call mpi_allreduce(dat,dat_,1,psb_mpi_c_spk_,mpi_camn_op,ictxt,info) - dat = dat_ - else - call mpi_reduce(dat,dat_,1,psb_mpi_c_spk_,mpi_camn_op,root_,ictxt,info) - if (iam == root_) dat = dat_ + if (if_caf2) then + me = this_image() + if (allocated(co_dat_)) deallocate(co_dat_) + allocate(co_dat_[*]) + if (root_ == -1) then + co_dat_=dat + sync all + call caf_camn_reduces(co_dat_) + sync all + dat=co_dat_ + else + me = this_image() + co_dat_=dat + call caf_camn_reduces(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call mpi_allreduce(dat,dat_,1,psb_mpi_c_spk_,mpi_camn_op,ictxt,info) + dat = dat_ + else + call mpi_reduce(dat,dat_,1,psb_mpi_c_spk_,mpi_camn_op,root_,ictxt,info) + if (iam == root_) dat = dat_ + endif endif #endif end subroutine psb_camns subroutine psb_camnv(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -3030,7 +4321,8 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ complex(psb_spk_), allocatable :: dat_(:) - integer(psb_mpik_) :: iam, np, info + complex(psb_spk_), allocatable :: co_dat_(:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo @@ -3042,26 +4334,45 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat),dat_,iinfo) - dat_ = dat - if (iinfo == psb_success_) & - & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_c_spk_,mpi_camn_op,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + me = this_image() + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call caf_amn_reduce(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + call caf_amn_reduce(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat),dat_,iinfo) dat_ = dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_c_spk_,mpi_camn_op,root_,ictxt,info) + if (iinfo == psb_success_) & + & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_c_spk_,mpi_camn_op,ictxt,info) else - call psb_realloc(1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_c_spk_,mpi_camn_op,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat),dat_,iinfo) + dat_ = dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_c_spk_,mpi_camn_op,root_,ictxt,info) + else + call psb_realloc(1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_c_spk_,mpi_camn_op,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_camnv subroutine psb_camnm(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -3074,7 +4385,8 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ complex(psb_spk_), allocatable :: dat_(:,:) - integer(psb_mpik_) :: iam, np, info + complex(psb_spk_), allocatable :: co_dat_(:,:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) @@ -3086,25 +4398,44 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) - dat_ = dat - if (iinfo == psb_success_)& - & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_c_spk_,mpi_camn_op,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + me = this_image() + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call caf_amn_reduce(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + call caf_amn_reduce(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) dat_ = dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_c_spk_,mpi_camn_op,root_,ictxt,info) + if (iinfo == psb_success_)& + & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_c_spk_,mpi_camn_op,ictxt,info) else - call psb_realloc(1,1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_c_spk_,mpi_camn_op,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) + dat_ = dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_c_spk_,mpi_camn_op,root_,ictxt,info) + else + call psb_realloc(1,1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_c_spk_,mpi_camn_op,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_camnm subroutine psb_zamns(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -3117,7 +4448,8 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ complex(psb_dpk_) :: dat_ - integer(psb_mpik_) :: iam, np, info + complex(psb_dpk_), allocatable :: co_dat_[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo @@ -3129,18 +4461,36 @@ contains else root_ = -1 endif - if (root_ == -1) then - call mpi_allreduce(dat,dat_,1,psb_mpi_c_dpk_,mpi_zamn_op,ictxt,info) - dat = dat_ - else - call mpi_reduce(dat,dat_,1,psb_mpi_c_dpk_,mpi_zamn_op,root_,ictxt,info) - if (iam == root_) dat = dat_ + if (if_caf2) then + me = this_image() + if (allocated(co_dat_)) deallocate(co_dat_) + allocate(co_dat_[*]) + if (root_ == -1) then + co_dat_=dat + call caf_amn_reduce(co_dat_) + dat=co_dat_ + else + me = this_image() + co_dat_=dat + call caf_amn_reduce(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call mpi_allreduce(dat,dat_,1,psb_mpi_c_dpk_,mpi_zamn_op,ictxt,info) + dat = dat_ + else + call mpi_reduce(dat,dat_,1,psb_mpi_c_dpk_,mpi_zamn_op,root_,ictxt,info) + if (iam == root_) dat = dat_ + endif endif #endif end subroutine psb_zamns subroutine psb_zamnv(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -3153,7 +4503,8 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ complex(psb_dpk_), allocatable :: dat_(:) - integer(psb_mpik_) :: iam, np, info + complex(psb_dpk_), allocatable :: co_dat_(:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo @@ -3165,27 +4516,46 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat),dat_,iinfo) - dat_ = dat - if (iinfo == psb_success_) & - & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_c_dpk_,& - & mpi_zamn_op,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + me = this_image() + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call caf_amn_reduce(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + call caf_amn_reduce(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat),dat_,iinfo) dat_ = dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_c_dpk_,mpi_zamn_op,root_,ictxt,info) + if (iinfo == psb_success_) & + & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_c_dpk_,& + & mpi_zamn_op,ictxt,info) else - call psb_realloc(1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_c_dpk_,mpi_zamn_op,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat),dat_,iinfo) + dat_ = dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_c_dpk_,mpi_zamn_op,root_,ictxt,info) + else + call psb_realloc(1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_c_dpk_,mpi_zamn_op,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_zamnv subroutine psb_zamnm(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -3198,7 +4568,8 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ complex(psb_dpk_), allocatable :: dat_(:,:) - integer(psb_mpik_) :: iam, np, info + complex(psb_dpk_), allocatable :: co_dat_(:,:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) @@ -3210,25 +4581,41 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) - dat_ = dat - if (iinfo == psb_success_)& - & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_c_dpk_,mpi_zamn_op,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + me = this_image() + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call caf_amn_reduce(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + call caf_amn_reduce(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) dat_ = dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_c_dpk_,mpi_zamn_op,root_,ictxt,info) - else - call psb_realloc(1,1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_c_dpk_,mpi_zamn_op,root_,ictxt,info) - end if + if (iinfo == psb_success_)& + & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_c_dpk_,mpi_zamn_op,ictxt,info) + else + if (iam == root_) then + call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) + dat_ = dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_c_dpk_,mpi_zamn_op,root_,ictxt,info) + else + call psb_realloc(1,1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_c_dpk_,mpi_zamn_op,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_zamnm - - ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! ! SUM @@ -3236,7 +4623,7 @@ contains ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! subroutine psb_isums(ictxt,dat,root) - + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -3249,7 +4636,8 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ integer(psb_ipk_) :: dat_ - integer(psb_mpik_) :: iam, np, info + integer(psb_ipk_), allocatable :: co_dat_[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) @@ -3261,18 +4649,34 @@ contains else root_ = -1 endif - if (root_ == -1) then - call mpi_allreduce(dat,dat_,1,psb_mpi_ipk_integer,mpi_sum,ictxt,info) - dat = dat_ - else - call mpi_reduce(dat,dat_,1,psb_mpi_ipk_integer,mpi_sum,root_,ictxt,info) - if (iam == root_) dat = dat_ + if (if_caf2) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate(co_dat_[*]) + if (root_ == -1) then + co_dat_=dat + call co_sum(co_dat_) + dat=co_dat_ + else + me = this_image() + co_dat_=dat + call co_sum(co_dat_, result_image=root_+1) + dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call mpi_allreduce(dat,dat_,1,psb_mpi_ipk_integer,mpi_sum,ictxt,info) + dat = dat_ + else + call mpi_reduce(dat,dat_,1,psb_mpi_ipk_integer,mpi_sum,root_,ictxt,info) + if (iam == root_) dat = dat_ + endif endif - #endif end subroutine psb_isums subroutine psb_isumv(ictxt,dat,root) + use psb_caf_mod use psb_realloc_mod #ifdef MPI_MOD use mpi @@ -3285,7 +4689,7 @@ contains integer(psb_ipk_), intent(inout) :: dat(:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - integer(psb_ipk_), allocatable :: dat_(:) + integer(psb_ipk_), allocatable :: dat_(:), co_dat_(:)[:] integer(psb_mpik_) :: iam, np, info integer(psb_ipk_) :: iinfo @@ -3298,26 +4702,44 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat),dat_,iinfo) - dat_=dat - if (iinfo == psb_success_)& - & call mpi_allreduce(dat_,dat,size(dat),& - & psb_mpi_ipk_integer,mpi_sum,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_sum(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + call co_sum(co_dat_, result_image=root_+1) + dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat),dat_,iinfo) dat_=dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_ipk_integer,mpi_sum,root_,ictxt,info) + if (iinfo == psb_success_)& + & call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_ipk_integer,mpi_sum,ictxt,info) else - call psb_realloc(1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_ipk_integer,mpi_sum,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_ipk_integer,mpi_sum,root_,ictxt,info) + else + call psb_realloc(1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_ipk_integer,mpi_sum,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_isumv subroutine psb_isumm(ictxt,dat,root) + use psb_caf_mod use psb_realloc_mod #ifdef MPI_MOD use mpi @@ -3330,7 +4752,7 @@ contains integer(psb_ipk_), intent(inout) :: dat(:,:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - integer(psb_ipk_), allocatable :: dat_(:,:) + integer(psb_ipk_), allocatable :: dat_(:,:), co_dat_(:,:)[:] integer(psb_mpik_) :: iam, np, info integer(psb_ipk_) :: iinfo @@ -3343,20 +4765,37 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) - dat_=dat - if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& - & psb_mpi_ipk_integer,mpi_sum,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_sum(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + call co_sum(co_dat_, result_image=root_+1) + dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) dat_=dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_ipk_integer,mpi_sum,root_,ictxt,info) + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_ipk_integer,mpi_sum,ictxt,info) else - call psb_realloc(1,1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_ipk_integer,mpi_sum,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_ipk_integer,mpi_sum,root_,ictxt,info) + else + call psb_realloc(1,1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_ipk_integer,mpi_sum,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_isumm @@ -3426,19 +4865,36 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat),dat_,iinfo) - dat_=dat - if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& - & psb_mpi_def_integer2,mpi_sum,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_sum(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + call co_sum(co_dat_, result_image=root_+1) + dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat),dat_,iinfo) dat_=dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_def_integer2,mpi_sum,root_,ictxt,info) + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_def_integer2,mpi_sum,ictxt,info) else - call mpi_reduce(dat,dat_,size(dat),psb_mpi_def_integer2,mpi_sum,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_def_integer2,mpi_sum,root_,ictxt,info) + else + call mpi_reduce(dat,dat_,size(dat),psb_mpi_def_integer2,mpi_sum,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_i2sumv @@ -3469,19 +4925,36 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) - dat_=dat - if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& - & psb_mpi_def_integer2,mpi_sum,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_sum(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + call co_sum(co_dat_, result_image=root_+1) + dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) dat_=dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_def_integer2,mpi_sum,root_,ictxt,info) + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_def_integer2,mpi_sum,ictxt,info) else - call mpi_reduce(dat,dat_,size(dat),psb_mpi_def_integer2,mpi_sum,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_def_integer2,mpi_sum,root_,ictxt,info) + else + call mpi_reduce(dat,dat_,size(dat),psb_mpi_def_integer2,mpi_sum,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_i2summ @@ -3492,6 +4965,7 @@ contains #if defined(LONG_INTEGERS) subroutine psb_i4sums(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -3504,7 +4978,8 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ integer(psb_mpik_) :: dat_ - integer(psb_mpik_) :: iam, np, info + integer(psb_mpik_), allocatable :: co_dat_[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) @@ -3516,18 +4991,35 @@ contains else root_ = -1 endif - if (root_ == -1) then - call mpi_allreduce(dat,dat_,1,psb_mpi_def_integer,mpi_sum,ictxt,info) - dat = dat_ - else - call mpi_reduce(dat,dat_,1,psb_mpi_def_integer,mpi_sum,root_,ictxt,info) - if (iam == root_) dat = dat_ + if (if_caf2) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate(co_dat_[*]) + if (root_ == -1) then + co_dat_=dat + call co_sum(co_dat_) + dat=co_dat_ + else + me = this_image() + co_dat_=dat + call co_sum(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call mpi_allreduce(dat,dat_,1,psb_mpi_def_integer,mpi_sum,ictxt,info) + dat = dat_ + else + call mpi_reduce(dat,dat_,1,psb_mpi_def_integer,mpi_sum,root_,ictxt,info) + if (iam == root_) dat = dat_ + endif endif #endif end subroutine psb_i4sums subroutine psb_i4sumv(ictxt,dat,root) + use psb_caf_mod use psb_realloc_mod #ifdef MPI_MOD use mpi @@ -3540,7 +5032,7 @@ contains integer(psb_mpik_), intent(inout) :: dat(:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - integer(psb_mpik_), allocatable :: dat_(:) + integer(psb_mpik_), allocatable :: dat_(:), co_dat_(:)[:] integer(psb_mpik_) :: iam, np, info #if !defined(SERIAL_MPI) @@ -3552,25 +5044,43 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat),dat_,info) - dat_=dat - if (info == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& - & psb_mpi_def_integer,mpi_sum,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_sum(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + call co_sum(co_dat_, result_image=root_+1) + dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat),dat_,info) dat_=dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_def_integer,mpi_sum,root_,ictxt,info) + if (info == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_def_integer,mpi_sum,ictxt,info) else - call psb_realloc(1,dat_,info) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_def_integer,mpi_sum,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat),dat_,info) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_def_integer,mpi_sum,root_,ictxt,info) + else + call psb_realloc(1,dat_,info) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_def_integer,mpi_sum,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_i4sumv subroutine psb_i4summ(ictxt,dat,root) + use psb_caf_mod use psb_realloc_mod #ifdef MPI_MOD use mpi @@ -3583,7 +5093,7 @@ contains integer(psb_mpik_), intent(inout) :: dat(:,:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - integer(psb_mpik_), allocatable :: dat_(:,:) + integer(psb_mpik_), allocatable :: dat_(:,:), co_dat_(:,:)[:] integer(psb_mpik_) :: iam, np, info #if !defined(SERIAL_MPI) @@ -3594,20 +5104,37 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat,1),size(dat,2),dat_,info) - dat_=dat - if (info == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& - & psb_mpi_def_integer,mpi_sum,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_sum(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + call co_sum(co_dat_, result_image=root_+1) + dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat,1),size(dat,2),dat_,info) dat_=dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_def_integer,mpi_sum,root_,ictxt,info) + if (info == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_def_integer,mpi_sum,ictxt,info) else - call psb_realloc(1,1,dat_,info) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_def_integer,mpi_sum,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat,1),size(dat,2),dat_,info) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_def_integer,mpi_sum,root_,ictxt,info) + else + call psb_realloc(1,1,dat_,info) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_def_integer,mpi_sum,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_i4summ @@ -3616,7 +5143,7 @@ contains #if !defined(LONG_INTEGERS) subroutine psb_i8sums(ictxt,dat,root) - + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -3629,7 +5156,8 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ integer(psb_long_int_k_) :: dat_ - integer(psb_mpik_) :: iam, np, info + integer(psb_long_int_k_), allocatable :: co_dat_[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) @@ -3641,18 +5169,34 @@ contains else root_ = -1 endif - if (root_ == -1) then - call mpi_allreduce(dat,dat_,1,psb_mpi_lng_integer,mpi_sum,ictxt,info) - dat = dat_ - else - call mpi_reduce(dat,dat_,1,psb_mpi_lng_integer,mpi_sum,root_,ictxt,info) - if (iam == root_) dat = dat_ + if (if_caf2) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate(co_dat_[*]) + if (root_ == -1) then + co_dat_=dat + call co_sum(co_dat_) + dat=co_dat_ + else + me = this_image() + co_dat_=dat + call co_sum(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call mpi_allreduce(dat,dat_,1,psb_mpi_lng_integer,mpi_sum,ictxt,info) + dat = dat_ + else + call mpi_reduce(dat,dat_,1,psb_mpi_lng_integer,mpi_sum,root_,ictxt,info) + if (iam == root_) dat = dat_ + endif endif - #endif end subroutine psb_i8sums subroutine psb_i8sumv(ictxt,dat,root) + use psb_caf_mod use psb_realloc_mod #ifdef MPI_MOD use mpi @@ -3665,7 +5209,7 @@ contains integer(psb_long_int_k_), intent(inout) :: dat(:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - integer(psb_long_int_k_), allocatable :: dat_(:) + integer(psb_long_int_k_), allocatable :: dat_(:), co_dat_(:)[:] integer(psb_mpik_) :: iam, np, info integer(psb_ipk_) :: iinfo @@ -3678,25 +5222,43 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat),dat_,iinfo) - dat_=dat - if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& - & psb_mpi_lng_integer,mpi_sum,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_sum(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + call co_sum(co_dat_, result_image=root_+1) + dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat),dat_,iinfo) dat_=dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_lng_integer,mpi_sum,root_,ictxt,info) + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_lng_integer,mpi_sum,ictxt,info) else - call psb_realloc(1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_lng_integer,mpi_sum,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_lng_integer,mpi_sum,root_,ictxt,info) + else + call psb_realloc(1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_lng_integer,mpi_sum,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_i8sumv subroutine psb_i8summ(ictxt,dat,root) + use psb_caf_mod use psb_realloc_mod #ifdef MPI_MOD use mpi @@ -3709,7 +5271,7 @@ contains integer(psb_long_int_k_), intent(inout) :: dat(:,:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - integer(psb_long_int_k_), allocatable :: dat_(:,:) + integer(psb_long_int_k_), allocatable :: dat_(:,:), co_dat_(:,:)[:] integer(psb_mpik_) :: iam, np, info integer(psb_ipk_) :: iinfo @@ -3722,20 +5284,37 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) - dat_=dat - if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& - & psb_mpi_lng_integer,mpi_sum,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_sum(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + call co_sum(co_dat_, result_image=root_+1) + dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) dat_=dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_lng_integer,mpi_sum,root_,ictxt,info) + if (iinfo == psb_success_) call mpi_allreduce(dat_,dat,size(dat),& + & psb_mpi_lng_integer,mpi_sum,ictxt,info) else - call psb_realloc(1,1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_lng_integer,mpi_sum,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) + dat_=dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_lng_integer,mpi_sum,root_,ictxt,info) + else + call psb_realloc(1,1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_lng_integer,mpi_sum,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_i8summ @@ -3745,6 +5324,7 @@ contains subroutine psb_ssums(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -3757,7 +5337,8 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ real(psb_spk_) :: dat_ - integer(psb_mpik_) :: iam, np, info + real(psb_spk_), allocatable :: co_dat_[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo @@ -3769,17 +5350,34 @@ contains else root_ = -1 endif - if (root_ == -1) then - call mpi_allreduce(dat,dat_,1,psb_mpi_r_spk_,mpi_sum,ictxt,info) - dat = dat_ - else - call mpi_reduce(dat,dat_,1,psb_mpi_r_spk_,mpi_sum,root_,ictxt,info) - if (iam == root_) dat = dat_ + if (if_caf2) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate(co_dat_[*]) + if (root_ == -1) then + co_dat_=dat + call co_sum(co_dat_) + dat=co_dat_ + else + me = this_image() + co_dat_=dat + call co_sum(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call mpi_allreduce(dat,dat_,1,psb_mpi_r_spk_,mpi_sum,ictxt,info) + dat = dat_ + else + call mpi_reduce(dat,dat_,1,psb_mpi_r_spk_,mpi_sum,root_,ictxt,info) + if (iam == root_) dat = dat_ + endif endif #endif end subroutine psb_ssums subroutine psb_ssumv(ictxt,dat,root) + use psb_caf_mod use psb_realloc_mod #ifdef MPI_MOD use mpi @@ -3792,7 +5390,7 @@ contains real(psb_spk_), intent(inout) :: dat(:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - real(psb_spk_), allocatable :: dat_(:) + real(psb_spk_), allocatable :: dat_(:), co_dat_(:)[:] integer(psb_mpik_) :: iam, np, info integer(psb_ipk_) :: iinfo @@ -3805,25 +5403,43 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat),dat_,iinfo) - dat_ = dat - if (iinfo == psb_success_) & - & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_sum,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_sum(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + call co_sum(co_dat_, result_image=root_+1) + dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat),dat_,iinfo) dat_ = dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_sum,root_,ictxt,info) + if (iinfo == psb_success_) & + & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_sum,ictxt,info) else - call psb_realloc(1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_spk_,mpi_sum,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat),dat_,iinfo) + dat_ = dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_sum,root_,ictxt,info) + else + call psb_realloc(1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_spk_,mpi_sum,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_ssumv subroutine psb_ssumm(ictxt,dat,root) + use psb_caf_mod use psb_realloc_mod #ifdef MPI_MOD use mpi @@ -3836,7 +5452,7 @@ contains real(psb_spk_), intent(inout) :: dat(:,:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - real(psb_spk_), allocatable :: dat_(:,:) + real(psb_spk_), allocatable :: dat_(:,:), co_dat_(:,:)[:] integer(psb_mpik_) :: iam, np, info integer(psb_ipk_) :: iinfo @@ -3849,25 +5465,43 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) - dat_ = dat - if (iinfo == psb_success_)& - & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_sum,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_sum(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + call co_sum(co_dat_, result_image=root_+1) + dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) dat_ = dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_sum,root_,ictxt,info) + if (iinfo == psb_success_)& + & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_sum,ictxt,info) else - call psb_realloc(1,1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_spk_,mpi_sum,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) + dat_ = dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_spk_,mpi_sum,root_,ictxt,info) + else + call psb_realloc(1,1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_spk_,mpi_sum,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_ssumm subroutine psb_dsums(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -3880,7 +5514,8 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ real(psb_dpk_) :: dat_ - integer(psb_mpik_) :: iam, np, info + real(psb_dpk_), allocatable :: co_dat_[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo @@ -3892,18 +5527,35 @@ contains else root_ = -1 endif - if (root_ == -1) then - call mpi_allreduce(dat,dat_,1,psb_mpi_r_dpk_,mpi_sum,ictxt,info) - dat = dat_ - else - call mpi_reduce(dat,dat_,1,psb_mpi_r_dpk_,mpi_sum,root_,ictxt,info) - if (iam == root_) dat = dat_ + if (if_caf2) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate(co_dat_[*]) + if (root_ == -1) then + co_dat_=dat + call co_sum(co_dat_) + dat=co_dat_ + else + me = this_image() + co_dat_=dat + call co_sum(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call mpi_allreduce(dat,dat_,1,psb_mpi_r_dpk_,mpi_sum,ictxt,info) + dat = dat_ + else + call mpi_reduce(dat,dat_,1,psb_mpi_r_dpk_,mpi_sum,root_,ictxt,info) + if (iam == root_) dat = dat_ + endif endif #endif end subroutine psb_dsums subroutine psb_dsumv(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -3915,7 +5567,7 @@ contains real(psb_dpk_), intent(inout) :: dat(:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - real(psb_dpk_), allocatable :: dat_(:) + real(psb_dpk_), allocatable :: dat_(:), co_dat_(:)[:] integer(psb_mpik_) :: iam, np, info integer(psb_ipk_) :: iinfo @@ -3928,27 +5580,45 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat),dat_,iinfo) - dat_ = dat - if (iinfo == psb_success_) & - & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_dpk_,& - & mpi_sum,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_sum(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + call co_sum(co_dat_, result_image=root_+1) + dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat),dat_,iinfo) dat_ = dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_dpk_,mpi_sum,root_,ictxt,info) + if (iinfo == psb_success_) & + & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_dpk_,& + & mpi_sum,ictxt,info) else - call psb_realloc(1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_dpk_,mpi_sum,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat),dat_,iinfo) + dat_ = dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_dpk_,mpi_sum,root_,ictxt,info) + else + call psb_realloc(1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_dpk_,mpi_sum,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_dsumv subroutine psb_dsumm(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -3960,7 +5630,7 @@ contains real(psb_dpk_), intent(inout) :: dat(:,:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - real(psb_dpk_), allocatable :: dat_(:,:) + real(psb_dpk_), allocatable :: dat_(:,:), co_dat_(:,:)[:] integer(psb_mpik_) :: iam, np, info integer(psb_ipk_) :: iinfo @@ -3973,26 +5643,44 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) - dat_ = dat - if (iinfo == psb_success_)& - & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_dpk_,mpi_sum,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_sum(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + call co_sum(co_dat_, result_image=root_+1) + dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) dat_ = dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_dpk_,mpi_sum,root_,ictxt,info) + if (iinfo == psb_success_)& + & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_dpk_,mpi_sum,ictxt,info) else - call psb_realloc(1,1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_dpk_,mpi_sum,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) + dat_ = dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_dpk_,mpi_sum,root_,ictxt,info) + else + call psb_realloc(1,1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_dpk_,mpi_sum,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_dsumm subroutine psb_csums(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -4005,7 +5693,8 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ complex(psb_spk_) :: dat_ - integer(psb_mpik_) :: iam, np, info + complex(psb_spk_), allocatable :: co_dat_[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo @@ -4017,17 +5706,34 @@ contains else root_ = -1 endif - if (root_ == -1) then - call mpi_allreduce(dat,dat_,1,psb_mpi_c_spk_,mpi_sum,ictxt,info) - dat = dat_ - else - call mpi_reduce(dat,dat_,1,psb_mpi_c_spk_,mpi_sum,root_,ictxt,info) - if (iam == root_) dat = dat_ + if (if_caf2) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate(co_dat_[*]) + if (root_ == -1) then + co_dat_=dat + call co_sum(co_dat_) + dat=co_dat_ + else + me = this_image() + co_dat_=dat + call co_sum(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call mpi_allreduce(dat,dat_,1,psb_mpi_c_spk_,mpi_sum,ictxt,info) + dat = dat_ + else + call mpi_reduce(dat,dat_,1,psb_mpi_c_spk_,mpi_sum,root_,ictxt,info) + if (iam == root_) dat = dat_ + endif endif #endif end subroutine psb_csums subroutine psb_csumv(ictxt,dat,root) + use psb_caf_mod use psb_realloc_mod #ifdef MPI_MOD use mpi @@ -4040,7 +5746,7 @@ contains complex(psb_spk_), intent(inout) :: dat(:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - complex(psb_spk_), allocatable :: dat_(:) + complex(psb_spk_), allocatable :: dat_(:), co_dat_(:)[:] integer(psb_mpik_) :: iam, np, info integer(psb_ipk_) :: iinfo @@ -4053,26 +5759,44 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat),dat_,iinfo) - dat_ = dat - if (iinfo == psb_success_) & - & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_c_spk_,mpi_sum,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_sum(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + call co_sum(co_dat_, result_image=root_+1) + dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat),dat_,iinfo) dat_ = dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_c_spk_,mpi_sum,root_,ictxt,info) + if (iinfo == psb_success_) & + & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_c_spk_,mpi_sum,ictxt,info) else - call psb_realloc(1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_c_spk_,mpi_sum,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat),dat_,iinfo) + dat_ = dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_c_spk_,mpi_sum,root_,ictxt,info) + else + call psb_realloc(1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_c_spk_,mpi_sum,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_csumv subroutine psb_csumm(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -4084,7 +5808,7 @@ contains complex(psb_spk_), intent(inout) :: dat(:,:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - complex(psb_spk_), allocatable :: dat_(:,:) + complex(psb_spk_), allocatable :: dat_(:,:), co_dat_(:,:)[:] integer(psb_mpik_) :: iam, np, info integer(psb_ipk_) :: iinfo @@ -4097,25 +5821,43 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) - dat_ = dat - if (iinfo == psb_success_)& - & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_c_spk_,mpi_sum,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_sum(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + call co_sum(co_dat_, result_image=root_+1) + dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) dat_ = dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_c_spk_,mpi_sum,root_,ictxt,info) + if (iinfo == psb_success_)& + & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_c_spk_,mpi_sum,ictxt,info) else - call psb_realloc(1,1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_c_spk_,mpi_sum,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) + dat_ = dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_c_spk_,mpi_sum,root_,ictxt,info) + else + call psb_realloc(1,1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_c_spk_,mpi_sum,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_csumm subroutine psb_zsums(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -4128,7 +5870,8 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ complex(psb_dpk_) :: dat_ - integer(psb_mpik_) :: iam, np, info + complex(psb_dpk_), allocatable :: co_dat_[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo @@ -4140,17 +5883,34 @@ contains else root_ = -1 endif - if (root_ == -1) then - call mpi_allreduce(dat,dat_,1,psb_mpi_c_dpk_,mpi_sum,ictxt,info) - dat = dat_ - else - call mpi_reduce(dat,dat_,1,psb_mpi_c_dpk_,mpi_sum,root_,ictxt,info) - if (iam == root_) dat = dat_ + if (if_caf2) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate(co_dat_[*]) + if (root_ == -1) then + co_dat_=dat + call co_sum(co_dat_) + dat=co_dat_ + else + me = this_image() + co_dat_=dat + call co_sum(co_dat_, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then + call mpi_allreduce(dat,dat_,1,psb_mpi_c_dpk_,mpi_sum,ictxt,info) + dat = dat_ + else + call mpi_reduce(dat,dat_,1,psb_mpi_c_dpk_,mpi_sum,root_,ictxt,info) + if (iam == root_) dat = dat_ + endif endif #endif end subroutine psb_zsums subroutine psb_zsumv(ictxt,dat,root) + use psb_caf_mod use psb_realloc_mod #ifdef MPI_MOD use mpi @@ -4163,7 +5923,7 @@ contains complex(psb_dpk_), intent(inout) :: dat(:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - complex(psb_dpk_), allocatable :: dat_(:) + complex(psb_dpk_), allocatable :: dat_(:), co_dat_(:)[:] integer(psb_mpik_) :: iam, np, info integer(psb_ipk_) :: iinfo @@ -4176,27 +5936,45 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat),dat_,iinfo) - dat_ = dat - if (iinfo == psb_success_) & - & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_c_dpk_,& - & mpi_sum,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_sum(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + call co_sum(co_dat_, result_image=root_+1) + dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat),dat_,iinfo) dat_ = dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_c_dpk_,mpi_sum,root_,ictxt,info) + if (iinfo == psb_success_) & + & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_c_dpk_,& + & mpi_sum,ictxt,info) else - call psb_realloc(1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_c_dpk_,mpi_sum,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat),dat_,iinfo) + dat_ = dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_c_dpk_,mpi_sum,root_,ictxt,info) + else + call psb_realloc(1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_c_dpk_,mpi_sum,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_zsumv subroutine psb_zsumm(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -4208,7 +5986,7 @@ contains complex(psb_dpk_), intent(inout) :: dat(:,:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - complex(psb_dpk_), allocatable :: dat_(:,:) + complex(psb_dpk_), allocatable :: dat_(:,:), co_dat_(:,:)[:] integer(psb_mpik_) :: iam, np, info integer(psb_ipk_) :: iinfo @@ -4221,30 +5999,49 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) - dat_ = dat - if (iinfo == psb_success_)& - & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_c_dpk_,mpi_sum,ictxt,info) - else - if (iam == root_) then + if (if_caf2) then + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_sum(co_dat_) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1),size(dat,2))[*], STAT=iinfo) + co_dat_=dat + call co_sum(co_dat_, result_image=root_+1) + dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) dat_ = dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_c_dpk_,mpi_sum,root_,ictxt,info) + if (iinfo == psb_success_)& + & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_c_dpk_,mpi_sum,ictxt,info) else - call psb_realloc(1,1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_c_dpk_,mpi_sum,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat,1),size(dat,2),dat_,iinfo) + dat_ = dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_c_dpk_,mpi_sum,root_,ictxt,info) + else + call psb_realloc(1,1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_c_dpk_,mpi_sum,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_zsumm + ! !!!!!!!!!!!! ! ! Norm 2 ! ! !!!!!!!!!!!! subroutine psb_s_nrm2s(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -4257,7 +6054,8 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ real(psb_spk_) :: dat_ - integer(psb_mpik_) :: iam, np, info + real(psb_spk_), allocatable :: co_dat_[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo @@ -4269,17 +6067,48 @@ contains else root_ = -1 endif - if (root_ == -1) then - call mpi_allreduce(dat,dat_,1,psb_mpi_r_spk_,mpi_snrm2_op,ictxt,info) - dat = dat_ - else - call mpi_reduce(dat,dat_,1,psb_mpi_r_spk_,mpi_snrm2_op,root_,ictxt,info) - if (iam == root_) dat = dat_ + if (if_caf) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate(co_dat_[*]) + if (root_ == -1) then + co_dat_=dat + call co_reduce(co_dat_,caf_snrm2) + dat=co_dat_ + else + me = this_image() + co_dat_=dat + call co_reduce(co_dat_,caf_snrm2, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + else + if (root_ == -1) then + call mpi_allreduce(dat,dat_,1,psb_mpi_r_spk_,mpi_snrm2_op,ictxt,info) + dat = dat_ + else + call mpi_reduce(dat,dat_,1,psb_mpi_r_spk_,mpi_snrm2_op,root_,ictxt,info) + if (iam == root_) dat = dat_ + endif endif #endif end subroutine psb_s_nrm2s + pure real(psb_dpk_) function caf_inrm2b(vin, vinout) + implicit none + real(psb_dpk_), intent(in) :: vin + real(psb_dpk_), intent(in) :: vinout + integer(psb_ipk_) :: i + real(psb_dpk_) :: w, z + w = max( vin, vinout ) + z = min( vin, vinout ) + if ( z == dzero ) then + caf_inrm2b = w + else + caf_inrm2b = w*sqrt( done +( z / w )**2 ) + end if + end function caf_inrm2b + subroutine psb_d_nrm2s(ictxt,dat,root) + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -4292,30 +6121,46 @@ contains integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ real(psb_dpk_) :: dat_ - integer(psb_mpik_) :: iam, np, info + real(psb_dpk_), allocatable :: co_dat_[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo #if !defined(SERIAL_MPI) call psb_info(ictxt,iam,np) - if (present(root)) then root_ = root else root_ = -1 endif - if (root_ == -1) then - call mpi_allreduce(dat,dat_,1,psb_mpi_r_dpk_,mpi_dnrm2_op,ictxt,info) - dat = dat_ - else - call mpi_reduce(dat,dat_,1,psb_mpi_r_dpk_,mpi_dnrm2_op,root_,ictxt,info) - if (iam == root_) dat = dat_ + if (if_caf) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate(co_dat_[*]) + if (root_ == -1) then + co_dat_=dat + call co_reduce(co_dat_,caf_dnrm2) + dat=co_dat_ + else + me = this_image() + co_dat_=dat + call co_reduce(co_dat_,caf_dnrm2, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + else + if (root_ == -1) then + call mpi_allreduce(dat,dat_,1,psb_mpi_r_dpk_,mpi_dnrm2_op,ictxt,info) + dat = dat_ + else + call mpi_reduce(dat,dat_,1,psb_mpi_r_dpk_,mpi_dnrm2_op,root_,ictxt,info) + if (iam == root_) dat = dat_ + endif endif #endif end subroutine psb_d_nrm2s subroutine psb_s_nrm2v(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -4327,8 +6172,8 @@ contains real(psb_spk_), intent(inout) :: dat(:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - real(psb_spk_), allocatable :: dat_(:) - integer(psb_mpik_) :: iam, np, info + real(psb_spk_), allocatable :: dat_(:), co_dat_(:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo @@ -4340,29 +6185,48 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat),dat_,iinfo) - dat_ = dat - if (iinfo == psb_success_) & - & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_spk_,& - & mpi_snrm2_op,ictxt,info) - else - if (iam == root_) then + if (if_caf) then + me = this_image() + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_reduce(co_dat_,caf_snrm2) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + call co_reduce(co_dat_, caf_snrm2, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat),dat_,iinfo) dat_ = dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_spk_,& - & mpi_snrm2_op,root_,ictxt,info) + if (iinfo == psb_success_) & + & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_spk_,& + & mpi_snrm2_op,ictxt,info) else - call psb_realloc(1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_spk_,& - & mpi_snrm2_op,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat),dat_,iinfo) + dat_ = dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_spk_,& + & mpi_snrm2_op,root_,ictxt,info) + else + call psb_realloc(1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_spk_,& + & mpi_snrm2_op,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_s_nrm2v subroutine psb_d_nrm2v(ictxt,dat,root) use psb_realloc_mod + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -4374,8 +6238,8 @@ contains real(psb_dpk_), intent(inout) :: dat(:) integer(psb_mpik_), intent(in), optional :: root integer(psb_mpik_) :: root_ - real(psb_dpk_), allocatable :: dat_(:) - integer(psb_mpik_) :: iam, np, info + real(psb_dpk_), allocatable :: dat_(:), co_dat_(:)[:] + integer(psb_mpik_) :: iam, np, info, me integer(psb_ipk_) :: iinfo @@ -4387,23 +6251,41 @@ contains else root_ = -1 endif - if (root_ == -1) then - call psb_realloc(size(dat),dat_,iinfo) - dat_ = dat - if (iinfo == psb_success_) & - & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_dpk_,& - & mpi_dnrm2_op,ictxt,info) - else - if (iam == root_) then + if (if_caf) then + me = this_image() + if (root_ == -1) then + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + if (iinfo == psb_success_) call co_reduce(co_dat_,caf_dnrm2) + if (iinfo == psb_success_) dat=co_dat_ + else + if (allocated(co_dat_)) deallocate(co_dat_) + allocate (co_dat_(size(dat,1))[*], STAT=iinfo) + co_dat_=dat + call co_reduce(co_dat_, caf_dnrm2, result_image=root_+1) + if (me == root_ + 1) dat=co_dat_ + endif + if (allocated(co_dat_)) deallocate(co_dat_) + else + if (root_ == -1) then call psb_realloc(size(dat),dat_,iinfo) dat_ = dat - call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_dpk_,& - & mpi_dnrm2_op,root_,ictxt,info) + if (iinfo == psb_success_) & + & call mpi_allreduce(dat_,dat,size(dat),psb_mpi_r_dpk_,& + & mpi_dnrm2_op,ictxt,info) else - call psb_realloc(1,dat_,iinfo) - call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_dpk_,& - & mpi_dnrm2_op,root_,ictxt,info) - end if + if (iam == root_) then + call psb_realloc(size(dat),dat_,iinfo) + dat_ = dat + call mpi_reduce(dat_,dat,size(dat),psb_mpi_r_dpk_,& + & mpi_dnrm2_op,root_,ictxt,info) + else + call psb_realloc(1,dat_,iinfo) + call mpi_reduce(dat,dat_,size(dat),psb_mpi_r_dpk_,& + & mpi_dnrm2_op,root_,ictxt,info) + end if + endif endif #endif end subroutine psb_d_nrm2v diff --git a/base/tools/psb_ccdbldext.F90 b/base/tools/psb_ccdbldext.F90 index fc2652ff..d31b433d 100644 --- a/base/tools/psb_ccdbldext.F90 +++ b/base/tools/psb_ccdbldext.F90 @@ -63,7 +63,7 @@ Subroutine psb_ccdbldext(a,desc_a,novr,desc_ov,info, extype) use psb_base_mod, psb_protect_name => psb_ccdbldext use psi_mod - + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -467,9 +467,13 @@ Subroutine psb_ccdbldext(a,desc_a,novr,desc_ov,info, extype) ! Exchange data requests with everybody else: so far we have ! accumulated RECV requests, we have an all-to-all to build ! matchings SENDs. - ! - call mpi_alltoall(sdsz,1,psb_mpi_def_integer,rvsz,1, & - & psb_mpi_def_integer,icomm,minfo) + ! + if (if_caf) then + call caf_alltoall(sdsz,rvsz,1, minfo) + else + call mpi_alltoall(sdsz,1,psb_mpi_def_integer,rvsz,1, & + & psb_mpi_def_integer,icomm,minfo) + endif if (minfo /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='mpi_alltoall') diff --git a/base/tools/psb_cdalv.f90 b/base/tools/psb_cdalv.f90 index 41a9b122..362221e3 100644 --- a/base/tools/psb_cdalv.f90 +++ b/base/tools/psb_cdalv.f90 @@ -1,4 +1,5 @@ -!!$ +!se/tools/psb_zallc.f90 +!$ !!$ Parallel Sparse BLAS version 3.4 !!$ (C) Copyright 2006, 2010, 2015 !!$ Salvatore Filippone University of Rome Tor Vergata diff --git a/base/tools/psb_csphalo.F90 b/base/tools/psb_csphalo.F90 index 979cd1b1..923b3ba2 100644 --- a/base/tools/psb_csphalo.F90 +++ b/base/tools/psb_csphalo.F90 @@ -58,7 +58,7 @@ Subroutine psb_csphalo(a,desc_a,blk,info,rowcnv,colcnv,& & rowscale,colscale,outfmt,data) use psb_base_mod, psb_protect_name => psb_csphalo - + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -195,8 +195,12 @@ Subroutine psb_csphalo(a,desc_a,blk,info,rowcnv,colcnv,& counter = counter+n_el_send+3 Enddo - call mpi_alltoall(sdsz,1,psb_mpi_def_integer,& - & rvsz,1,psb_mpi_def_integer,icomm,minfo) + if (if_caf) then + call caf_alltoall(sdsz,rvsz,1, minfo) + else + call mpi_alltoall(sdsz,1,psb_mpi_def_integer,& + & rvsz,1,psb_mpi_def_integer,icomm,minfo) + endif if (info /= psb_success_) then info=psb_err_from_subroutine_ ch_err='mpi_alltoall' diff --git a/base/tools/psb_dcdbldext.F90 b/base/tools/psb_dcdbldext.F90 index d69f21b1..7ba68355 100644 --- a/base/tools/psb_dcdbldext.F90 +++ b/base/tools/psb_dcdbldext.F90 @@ -63,6 +63,7 @@ Subroutine psb_dcdbldext(a,desc_a,novr,desc_ov,info, extype) use psb_base_mod, psb_protect_name => psb_dcdbldext use psi_mod + use psb_caf_mod #ifdef MPI_MOD use mpi @@ -468,8 +469,12 @@ Subroutine psb_dcdbldext(a,desc_a,novr,desc_ov,info, extype) ! accumulated RECV requests, we have an all-to-all to build ! matchings SENDs. ! - call mpi_alltoall(sdsz,1,psb_mpi_def_integer,rvsz,1, & - & psb_mpi_def_integer,icomm,minfo) + if (if_caf) then + call caf_alltoall(sdsz,rvsz,1, minfo) + else + call mpi_alltoall(sdsz,1,psb_mpi_def_integer,rvsz,1, & + & psb_mpi_def_integer,icomm,minfo) + endif if (minfo /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='mpi_alltoall') @@ -503,9 +508,12 @@ Subroutine psb_dcdbldext(a,desc_a,novr,desc_ov,info, extype) end if lworkr = max(iszr,1) end if - - call mpi_alltoallv(works,sdsz,bsdindx,psb_mpi_ipk_integer,& - & workr,rvsz,brvindx,psb_mpi_ipk_integer,icomm,minfo) + if (if_caf) then + call caf_alltoallv(works,sdsz,bsdindx, workr,rvsz,brvindx, minfo) + else + call mpi_alltoallv(works,sdsz,bsdindx,psb_mpi_ipk_integer,& + & workr,rvsz,brvindx,psb_mpi_ipk_integer,icomm,minfo) + endif if (minfo /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='mpi_alltoallv') diff --git a/base/tools/psb_dsphalo.F90 b/base/tools/psb_dsphalo.F90 index de17953b..485e66e4 100644 --- a/base/tools/psb_dsphalo.F90 +++ b/base/tools/psb_dsphalo.F90 @@ -58,7 +58,7 @@ Subroutine psb_dsphalo(a,desc_a,blk,info,rowcnv,colcnv,& & rowscale,colscale,outfmt,data) use psb_base_mod, psb_protect_name => psb_dsphalo - + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -195,8 +195,12 @@ Subroutine psb_dsphalo(a,desc_a,blk,info,rowcnv,colcnv,& counter = counter+n_el_send+3 Enddo - call mpi_alltoall(sdsz,1,psb_mpi_def_integer,& - & rvsz,1,psb_mpi_def_integer,icomm,minfo) + if (if_caf) then + call caf_alltoall(sdsz,rvsz,1, minfo) + else + call mpi_alltoall(sdsz,1,psb_mpi_def_integer,& + & rvsz,1,psb_mpi_def_integer,icomm,minfo) + endif if (info /= psb_success_) then info=psb_err_from_subroutine_ ch_err='mpi_alltoall' @@ -278,13 +282,21 @@ Subroutine psb_dsphalo(a,desc_a,blk,info,rowcnv,colcnv,& goto 9999 end if - - call mpi_alltoallv(valsnd,sdsz,bsdindx,psb_mpi_r_dpk_,& - & acoo%val,rvsz,brvindx,psb_mpi_r_dpk_,icomm,minfo) - call mpi_alltoallv(iasnd,sdsz,bsdindx,psb_mpi_ipk_integer,& - & acoo%ia,rvsz,brvindx,psb_mpi_ipk_integer,icomm,minfo) - call mpi_alltoallv(jasnd,sdsz,bsdindx,psb_mpi_ipk_integer,& - & acoo%ja,rvsz,brvindx,psb_mpi_ipk_integer,icomm,minfo) + if (if_caf) then + call caf_alltoallv(valsnd,sdsz,bsdindx, acoo%val, rvsz,& + & brvindx, minfo) + call caf_alltoallv(iasnd,sdsz,bsdindx, acoo%ia, rvsz,& + & brvindx, minfo) + call caf_alltoallv(jasnd,sdsz,bsdindx, acoo%ja, rvsz,& + & brvindx, minfo) + else + call mpi_alltoallv(valsnd,sdsz,bsdindx,psb_mpi_r_dpk_,& + & acoo%val,rvsz,brvindx,psb_mpi_r_dpk_,icomm,minfo) + call mpi_alltoallv(iasnd,sdsz,bsdindx,psb_mpi_ipk_integer,& + & acoo%ia,rvsz,brvindx,psb_mpi_ipk_integer,icomm,minfo) + call mpi_alltoallv(jasnd,sdsz,bsdindx,psb_mpi_ipk_integer,& + & acoo%ja,rvsz,brvindx,psb_mpi_ipk_integer,icomm,minfo) + endif if (info /= psb_success_) then info=psb_err_from_subroutine_ ch_err='mpi_alltoallv' diff --git a/base/tools/psb_scdbldext.F90 b/base/tools/psb_scdbldext.F90 index cec4b07e..d36318ae 100644 --- a/base/tools/psb_scdbldext.F90 +++ b/base/tools/psb_scdbldext.F90 @@ -63,6 +63,7 @@ Subroutine psb_scdbldext(a,desc_a,novr,desc_ov,info, extype) use psb_base_mod, psb_protect_name => psb_scdbldext use psi_mod + use psb_caf_mod #ifdef MPI_MOD use mpi @@ -467,9 +468,13 @@ Subroutine psb_scdbldext(a,desc_a,novr,desc_ov,info, extype) ! Exchange data requests with everybody else: so far we have ! accumulated RECV requests, we have an all-to-all to build ! matchings SENDs. - ! - call mpi_alltoall(sdsz,1,psb_mpi_def_integer,rvsz,1, & - & psb_mpi_def_integer,icomm,minfo) + ! + if (if_caf) then + call caf_alltoall(sdsz,rvsz,1,minfo) + else + call mpi_alltoall(sdsz,1,psb_mpi_def_integer,rvsz,1, & + & psb_mpi_def_integer,icomm,minfo) + endif if (minfo /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='mpi_alltoall') diff --git a/base/tools/psb_ssphalo.F90 b/base/tools/psb_ssphalo.F90 index 6c71928c..17ab5900 100644 --- a/base/tools/psb_ssphalo.F90 +++ b/base/tools/psb_ssphalo.F90 @@ -58,7 +58,7 @@ Subroutine psb_ssphalo(a,desc_a,blk,info,rowcnv,colcnv,& & rowscale,colscale,outfmt,data) use psb_base_mod, psb_protect_name => psb_ssphalo - + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -195,8 +195,12 @@ Subroutine psb_ssphalo(a,desc_a,blk,info,rowcnv,colcnv,& counter = counter+n_el_send+3 Enddo - call mpi_alltoall(sdsz,1,psb_mpi_def_integer,& - & rvsz,1,psb_mpi_def_integer,icomm,minfo) + if (if_caf) then + call caf_alltoall(sdsz,rvsz,1, minfo) + else + call mpi_alltoall(sdsz,1,psb_mpi_def_integer,& + & rvsz,1,psb_mpi_def_integer,icomm,minfo) + endif if (info /= psb_success_) then info=psb_err_from_subroutine_ ch_err='mpi_alltoall' diff --git a/base/tools/psb_zcdbldext.F90 b/base/tools/psb_zcdbldext.F90 index 0c5f6d57..2349a499 100644 --- a/base/tools/psb_zcdbldext.F90 +++ b/base/tools/psb_zcdbldext.F90 @@ -63,6 +63,7 @@ Subroutine psb_zcdbldext(a,desc_a,novr,desc_ov,info, extype) use psb_base_mod, psb_protect_name => psb_zcdbldext use psi_mod + use psb_caf_mod #ifdef MPI_MOD use mpi @@ -467,9 +468,13 @@ Subroutine psb_zcdbldext(a,desc_a,novr,desc_ov,info, extype) ! Exchange data requests with everybody else: so far we have ! accumulated RECV requests, we have an all-to-all to build ! matchings SENDs. - ! - call mpi_alltoall(sdsz,1,psb_mpi_def_integer,rvsz,1, & - & psb_mpi_def_integer,icomm,minfo) + ! + if (if_caf) then + call caf_alltoall(sdsz,rvsz,1, minfo) + else + call mpi_alltoall(sdsz,1,psb_mpi_def_integer,rvsz,1, & + & psb_mpi_def_integer,icomm,minfo) + endif if (minfo /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='mpi_alltoall') diff --git a/base/tools/psb_zsphalo.F90 b/base/tools/psb_zsphalo.F90 index 00d7d4dd..68c23268 100644 --- a/base/tools/psb_zsphalo.F90 +++ b/base/tools/psb_zsphalo.F90 @@ -58,7 +58,7 @@ Subroutine psb_zsphalo(a,desc_a,blk,info,rowcnv,colcnv,& & rowscale,colscale,outfmt,data) use psb_base_mod, psb_protect_name => psb_zsphalo - + use psb_caf_mod #ifdef MPI_MOD use mpi #endif @@ -195,8 +195,12 @@ Subroutine psb_zsphalo(a,desc_a,blk,info,rowcnv,colcnv,& counter = counter+n_el_send+3 Enddo - call mpi_alltoall(sdsz,1,psb_mpi_def_integer,& - & rvsz,1,psb_mpi_def_integer,icomm,minfo) + if (if_caf) then + call caf_alltoall(sdsz,rvsz,1, minfo) + else + call mpi_alltoall(sdsz,1,psb_mpi_def_integer,& + & rvsz,1,psb_mpi_def_integer,icomm,minfo) + endif if (info /= psb_success_) then info=psb_err_from_subroutine_ ch_err='mpi_alltoall' diff --git a/test/fileread/Makefile b/test/fileread/Makefile index a5b078f1..330fc88d 100644 --- a/test/fileread/Makefile +++ b/test/fileread/Makefile @@ -5,7 +5,7 @@ include $(INCDIR)/Make.inc.psblas # Libraries used # LIBDIR=$(BASEDIR)/lib/ -PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base +PSBLAS_LIB= -L$(LIBDIR) -L/opencoarray-1.7.4 -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base -lcaf_mpi LDLIBS=$(PSBLDLIBS) FINCLUDES=$(FMFLAG)$(INCDIR) $(FMFLAG). diff --git a/test/fileread/runs/dfs.inp b/test/fileread/runs/dfs.inp index ac241a7e..d50a5e00 100644 --- a/test/fileread/runs/dfs.inp +++ b/test/fileread/runs/dfs.inp @@ -1,11 +1,11 @@ 11 Number of inputs -A_Z.mtx kivap005.mtx This (and others) from: http://math.nist.gov/MatrixMarket/ or +bcsstk26.mtx kivap005.mtx This (and others) from: http://math.nist.gov/MatrixMarket/ or NONE sherman3_b.mtx http://www.cise.ufl.edu/research/sparse/matrices/index.html MM File format: MM: Matrix Market HB: Harwell-Boeing. CG BiCGSTAB Iterative method: BiCGSTAB CGS RGMRES BiCGSTABL BICG CG NONE BJAC Preconditioner NONE DIAG BJAC CSR Storage format CSR COO JAD -2 IPART: Partition method 0: BLK 2: graph (with Metis) +0 IPART: Partition method 0: BLK 2: graph (with Metis) 2 ISTOPC 00100 ITMAX -1 ITRACE diff --git a/test/integrationTest/Makefile b/test/integrationTest/Makefile index ece9c3d1..900a8d41 100644 --- a/test/integrationTest/Makefile +++ b/test/integrationTest/Makefile @@ -3,7 +3,7 @@ INCDIR=$(BASEDIR)/include include $(INCDIR)/Make.inc.psblas # # Libraries used -PFUNIT = /opt/pfunit/pfunit-coarrays-last +PFUNIT = /opt/pfunit/pfunit-marzo2017 FFLAGS += -I$(INCDIR) -I$(PFUNIT)/mod -ISuites LIBDIR=$(BASEDIR)/lib PSBLAS_LIB= -L/opencoarrays6.2 -L$(LIBDIR) -lcaf_mpi -lpsb_util -lpsb_base -llapack -lblas @@ -17,7 +17,7 @@ FINCLUDES=$(FMFLAG)$(INCDIR) $(FMFLAG). all: prova -prova.x: test_psb_dmatdist.o test_psb_dhalo.o test_psb_shalo.o test_psb_chalo.o test_psb_zhalo.o test_psb_reduce_nrm2.o test_psb_sum.o test_psb_max.o test_psb_amx.o test_psb_min.o test_psb_amn.o test_psb_broadcast.o driver.o +prova.x: test_psb_dmatdist.o test_psb_ihalo.o test_psb_dhalo.o test_psb_shalo.o test_psb_chalo.o test_psb_zhalo.o test_psb_reduce_nrm2.o test_psb_sum.o test_psb_max.o test_psb_amx.o test_psb_min.o test_psb_amn.o test_psb_broadcast.o test_psb_caf.o driver.o $(FCFLAGS)$(F90LINK) -g $(LOPT) -o $@ $^ -L$(PFUNIT)/lib $(PSBLAS_LIB) $(LDLIBS) -DUSE_CAF -lpfunit -lmpi %: %.x mpirun -np 8 ./$^ diff --git a/test/integrationTest/Suites/testSuites.inc b/test/integrationTest/Suites/testSuites.inc index 9eefe150..5e1340f2 100644 --- a/test/integrationTest/Suites/testSuites.inc +++ b/test/integrationTest/Suites/testSuites.inc @@ -1,3 +1,4 @@ +ADD_TEST_SUITE(test_psb_caf_suite) ADD_TEST_SUITE(test_psb_reduce_nrm2_suite) ADD_TEST_SUITE(test_psb_max_suite) ADD_TEST_SUITE(test_psb_amx_suite) @@ -5,6 +6,7 @@ ADD_TEST_SUITE(test_psb_min_suite) ADD_TEST_SUITE(test_psb_amn_suite) ADD_TEST_SUITE(test_psb_sum_suite) ADD_TEST_SUITE(test_psb_broadcast_suite) +ADD_TEST_SUITE(test_psb_ihalo_suite) ADD_TEST_SUITE(test_psb_dhalo_suite) ADD_TEST_SUITE(test_psb_shalo_suite) ADD_TEST_SUITE(test_psb_chalo_suite) diff --git a/test/integrationTest/test_psb_broadcast.pf b/test/integrationTest/test_psb_broadcast.pf index aa6152ef..86e977b8 100644 --- a/test/integrationTest/test_psb_broadcast.pf +++ b/test/integrationTest/test_psb_broadcast.pf @@ -347,7 +347,6 @@ subroutine test_psb_dbroadcast_s(this) double precision :: dat, check integer :: root, info, np, icontxt call prepare_test(dat,check,root,info, np, icontxt) - print*,'from test:', this_image(), dat call psb_bcast(icontxt, dat, root) @assertEqual(dat,check) call psb_exit(icontxt) diff --git a/test/integrationTest/test_psb_chalo.pf b/test/integrationTest/test_psb_chalo.pf index 0210f991..8ba231b4 100644 --- a/test/integrationTest/test_psb_chalo.pf +++ b/test/integrationTest/test_psb_chalo.pf @@ -2354,7 +2354,6 @@ subroutine test_psb_chalo_tran_8imgs_vect_b(this) !GETTING BACK X v = x%get_vect() - PRINT*,'-------', ME, V if ((me==1).or.(me==2)) then true = 1 else diff --git a/test/integrationTest/test_psb_dhalo.pf b/test/integrationTest/test_psb_dhalo.pf index a0137b6a..1c3d3ab2 100644 --- a/test/integrationTest/test_psb_dhalo.pf +++ b/test/integrationTest/test_psb_dhalo.pf @@ -2350,7 +2350,6 @@ subroutine test_psb_dhalo_tran_8imgs_vect_b(this) !GETTING BACK X v = x%get_vect() - PRINT*,'-------', ME, V if ((me==1).or.(me==2)) then true = 1 else diff --git a/test/integrationTest/test_psb_max.pf b/test/integrationTest/test_psb_max.pf index c6c5d09a..797242cd 100644 --- a/test/integrationTest/test_psb_max.pf +++ b/test/integrationTest/test_psb_max.pf @@ -416,9 +416,7 @@ subroutine test_psb_imax_s(this) Class(CafTestMethod), intent(inout) :: this integer :: dat, check, root, info, np, icontxt call prepare_test(dat,check,root,info, np, icontxt) - print*,'dat, check before', dat, check, this_image() call psb_max(icontxt, dat, root) - print*,'dat after', dat, this_image() @assertEqual(dat,check) call psb_exit(icontxt) end subroutine test_psb_imax_s @@ -442,9 +440,7 @@ subroutine test_psb_dmax_s(this) double precision :: dat, check integer :: root, info, np, icontxt call prepare_test(dat,check,root,info, np, icontxt) - print*,'dat, check before', dat, check, this_image() call psb_max(icontxt, dat, root) - print*,'dat after', dat, this_image() @assertEqual(dat,check) call psb_exit(icontxt) end subroutine test_psb_dmax_s @@ -535,9 +531,7 @@ subroutine test2_psb_imax_s(this) Class(CafTestMethod), intent(inout) :: this integer :: dat, check, root, info, np, icontxt call prepare_test2(dat,check,root,info, np, icontxt) - print*,'dat, check before', dat, check, this_image() call psb_max(icontxt, dat, root) - print*,'dat after', dat, this_image() @assertEqual(dat,check) call psb_exit(icontxt) end subroutine test2_psb_imax_s @@ -561,9 +555,7 @@ subroutine test2_psb_dmax_s(this) double precision :: dat, check integer :: root, info, np, icontxt call prepare_test2(dat,check,root,info, np, icontxt) - print*,'dat, check before', dat, check, this_image() call psb_max(icontxt, dat, root) - print*,'dat after', dat, this_image() @assertEqual(dat,check) call psb_exit(icontxt) end subroutine test2_psb_dmax_s diff --git a/test/integrationTest/test_psb_shalo.pf b/test/integrationTest/test_psb_shalo.pf index 55d49673..337507c6 100644 --- a/test/integrationTest/test_psb_shalo.pf +++ b/test/integrationTest/test_psb_shalo.pf @@ -2350,7 +2350,6 @@ subroutine test_psb_shalo_tran_8imgs_vect_b(this) !GETTING BACK X v = x%get_vect() - PRINT*,'-------', ME, V if ((me==1).or.(me==2)) then true = 1 else diff --git a/test/integrationTest/test_psb_zhalo.pf b/test/integrationTest/test_psb_zhalo.pf index 18f5e8cc..00ed1e40 100644 --- a/test/integrationTest/test_psb_zhalo.pf +++ b/test/integrationTest/test_psb_zhalo.pf @@ -2354,7 +2354,6 @@ subroutine test_psb_zhalo_tran_8imgs_vect_b(this) !GETTING BACK X v = x%get_vect() - PRINT*,'-------', ME, V if ((me==1).or.(me==2)) then true = 1 else diff --git a/test/pargen/Makefile b/test/pargen/Makefile index f462f0f0..88c8c7d1 100644 --- a/test/pargen/Makefile +++ b/test/pargen/Makefile @@ -4,7 +4,7 @@ include $(INCDIR)/Make.inc.psblas # # Libraries used LIBDIR=$(BASEDIR)/lib -PSBLAS_LIB= -L$(LIBDIR) -L/opencoarrays6.2 -lcaf_mpi -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base +PSBLAS_LIB= -L$(LIBDIR) -L/opencoarrays6.2 -lcaf_mpi -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base LDLIBS=$(PSBLDLIBS) # # Compilers and such diff --git a/test/pargen/ppde2d.f90 b/test/pargen/ppde2d.f90 index 6569fef0..7f69242d 100644 --- a/test/pargen/ppde2d.f90 +++ b/test/pargen/ppde2d.f90 @@ -136,7 +136,7 @@ program ppde2d integer(psb_ipk_) :: iter, itmax,itrace, istopc, irst integer(psb_long_int_k_) :: amatsize, precsize, descsize, d2size real(psb_dpk_) :: err, eps - + real(psb_dpk_), allocatable :: v(:) ! other variables integer(psb_ipk_) :: info, i character(len=20) :: name,ch_err @@ -216,6 +216,11 @@ program ppde2d call psb_krylov(kmethd,a,prec,bv,xxv,eps,desc_a,info,& & itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istopc,irst=irst) + if (iam == psb_root_) then + fname="sol.mtx" + v=xxv%get_vect() + call mm_array_write(v,"exact solution", info,iunit=138, filename=fname) + endif if(info /= psb_success_) then info=psb_err_from_subroutine_ ch_err='solver routine' diff --git a/test/pargen/ppde3d.f90 b/test/pargen/ppde3d.f90 index 87787588..683cdefd 100644 --- a/test/pargen/ppde3d.f90 +++ b/test/pargen/ppde3d.f90 @@ -147,6 +147,7 @@ program ppde3d integer(psb_ipk_) :: iter, itmax,itrace, istopc, irst integer(psb_long_int_k_) :: amatsize, precsize, descsize, d2size real(psb_dpk_) :: err, eps + real(psb_dpk_), allocatable :: v(:) ! other variables integer(psb_ipk_) :: info, i @@ -236,6 +237,11 @@ program ppde3d eps = 1.d-9 call psb_krylov(kmethd,a,prec,bv,xxv,eps,desc_a,info,& & itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istopc,irst=irst) + if (iam == psb_root_) then + fname="sol3d.mtx" + v=xxv%get_vect() + call mm_array_write(v,"exact solution", info,iunit=138, filename=fname) + endif if(info /= psb_success_) then info=psb_err_from_subroutine_ diff --git a/test/pargen/runs/ppde.inp b/test/pargen/runs/ppde.inp index 85b46bdd..b2d6f54c 100644 --- a/test/pargen/runs/ppde.inp +++ b/test/pargen/runs/ppde.inp @@ -2,8 +2,10 @@ BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL RGMRES BJAC Preconditioner NONE DIAG BJAC CSR Storage format for matrix A: CSR COO JAD -700 Domain size (acutal system is this**3) +10 Domain size (acutal system is this**3) 2 Stopping criterion 0404 MAXIT -1 ITRACE 002 IRST restart for RGMRES and BiCGSTABL + + diff --git a/test/unitTest/Makefile b/test/unitTest/Makefile deleted file mode 100644 index ed9c31f0..00000000 --- a/test/unitTest/Makefile +++ /dev/null @@ -1,39 +0,0 @@ -BASEDIR=../.. -INCDIR=$(BASEDIR)/include -include $(INCDIR)/Make.inc.psblas -# -# Libraries used -PFUNIT = /opt/pfunit/pfunit-coarrays-last -FFLAGS += -I$(INCDIR) -I$(PFUNIT)/mod -ISuites -LIBDIR=$(BASEDIR)/lib -PSBLAS_LIB= -L/opencoarrays6.2 -L$(LIBDIR) -lcaf_mpi -lpsb_util -lpsb_base -llapack -lblas -LDLIBS=$(PSBLDLIBS) -# -# Compilers and such -# -CCOPT= -g -FINCLUDES=$(FMFLAG)$(INCDIR) $(FMFLAG). - - -all: test_psb_swapdata - -%: %.x - mpirun -np 12 ./$^ -%.x:%.o driver.o - $(FCFLAGS)$(F90LINK) -g $(LOPT) -o $@ $^ -L$(PFUNIT)/lib $(PSBLAS_LIB) $(LDLIBS) -DUSE_CAF -lpfunit -lmpi -#Create .F90 file -%.F90: %.pf - $(PFUNIT)/bin/pFUnitParser.py $< $@ -lmpi -#Create .o file -%.o: %.F90 - $(FC) -g -DUSE_PFUNIT -DUSE_CAF -c $(FFLAGS) $(FPPFLAGS) $^ $(PFUNIT)/include/driver.F90 -L$(PFUNIT)/lib $(PSBLAS_LIB) $(LDLIBS) -lpfunit -lmpi - - - -clean: - /bin/rm -f *.F90 *.o *.mod -verycleanlib: - (cd ../..; make veryclean) -lib: - (cd ../../; make library) - diff --git a/test/unitTest/Suites/testSuites.inc b/test/unitTest/Suites/testSuites.inc deleted file mode 100644 index 739c913f..00000000 --- a/test/unitTest/Suites/testSuites.inc +++ /dev/null @@ -1 +0,0 @@ -ADD_TEST_SUITE(test_psb_swapdata_suite) diff --git a/test/unitTest/test_psb_swapdata.pf b/test/unitTest/test_psb_swapdata.pf deleted file mode 100644 index 3f2678e9..00000000 --- a/test/unitTest/test_psb_swapdata.pf +++ /dev/null @@ -1,1631 +0,0 @@ -module test_psb_swapdata -use pfunit_mod -use psb_base_mod -implicit none -include 'mpif.h' -interface -subroutine psi_sswap_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) - use psi_mod, psb_protect_name => psi_sswap_xchg_v - use psb_error_mod - use psb_realloc_mod - use psb_desc_mod - use psb_penv_mod - use psb_s_base_vect_mod - use iso_fortran_env - implicit none - integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag - integer(psb_ipk_), intent(out) :: info - real(psb_spk_) :: y(:) - real(psb_spk_) :: beta - class(psb_xch_idx_type), intent(inout) :: xchg - ! locals - integer(psb_mpik_) :: ictxt, icomm, np, me,& - & proc_to_comm, p2ptag, iret - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,& - & snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself - integer :: count - real(psb_spk_), allocatable, save :: buffer(:)[:], sndbuf(:) - type(event_type), allocatable, save :: ufg(:)[:] - type(event_type), allocatable, save :: clear[:] - integer, save :: last_clear_count = 0 - logical :: swap_mpi, swap_sync, swap_send, swap_recv,& - & albf,do_send,do_recv - integer(psb_ipk_) :: ierr(5) - character(len=20) :: name - -end subroutine psi_sswap_xchg_v - -subroutine psi_sswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) - use psi_mod, psb_protect_name => psi_sswap_xchg_m - use psb_error_mod - use psb_realloc_mod - use psb_desc_mod - use psb_penv_mod - use psb_s_base_vect_mod - use iso_fortran_env - implicit none - integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag, m - integer(psb_ipk_), intent(out) :: info - real(psb_spk_) :: y(:,:) - real(psb_spk_) :: beta - class(psb_xch_idx_type), intent(inout) :: xchg - ! locals - integer(psb_mpik_) :: ictxt, icomm, np, me,& - & proc_to_comm, p2ptag, iret - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,& - & snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself - integer :: count - real(psb_spk_), allocatable, save :: buffer(:)[:], sndbuf(:) - type(event_type), allocatable, save :: ufg(:)[:] - type(event_type), allocatable, save :: clear[:] - integer, save :: last_clear_count = 0 - logical :: swap_mpi, swap_sync, swap_send, swap_recv,& - & albf,do_send,do_recv - integer(psb_ipk_) :: ierr(5) - character(len=20) :: name - -end subroutine psi_sswap_xchg_m -subroutine psi_dswap_xchg_v(iictxt,iicomm,flag,beta,y,xchg,info) - use psi_mod, psb_protect_name => psi_dswap_xchg_v - use psb_error_mod - use psb_realloc_mod - use psb_desc_mod - use psb_penv_mod - use psb_d_base_vect_mod - use iso_fortran_env - implicit none - integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag - integer(psb_ipk_), intent(out) :: info - real(psb_dpk_) :: y(:) - real(psb_dpk_) :: beta - class(psb_xch_idx_type), intent(inout) :: xchg - ! locals - integer(psb_mpik_) :: ictxt, icomm, np, me,& - & proc_to_comm, p2ptag, iret - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,& - & snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself - integer :: count - real(psb_dpk_), allocatable, save :: buffer(:)[:], sndbuf(:) - type(event_type), allocatable, save :: ufg(:)[:] - type(event_type), allocatable, save :: clear[:] - integer, save :: last_clear_count = 0 - logical :: swap_mpi, swap_sync, swap_send, swap_recv,& - & albf,do_send,do_recv - integer(psb_ipk_) :: ierr(5) - character(len=20) :: name - -end subroutine psi_dswap_xchg_v -end interface - -interface -subroutine psi_dswap_xchg_m(iictxt,iicomm,flag,m,beta,y,xchg,info) - use psi_mod, psb_protect_name => psi_dswap_xchg_m - use psb_error_mod - use psb_realloc_mod - use psb_desc_mod - use psb_penv_mod - use psb_d_base_vect_mod - use iso_fortran_env - implicit none - integer(psb_ipk_), intent(in) :: iictxt,iicomm,flag,m - integer(psb_ipk_), intent(out) :: info - real(psb_dpk_) :: y(:,:) - real(psb_dpk_) :: beta - class(psb_xch_idx_type), intent(inout) :: xchg - ! locals - integer(psb_mpik_) :: ictxt, icomm, np, me,& - & proc_to_comm, p2ptag, iret - integer(psb_ipk_) :: nesd, nerv,& - & err_act, i, idx_pt, totsnd_, totrcv_,p1,p2,isz,rp1,rp2,& - & snd_pt, rcv_pt, pnti, n, ip, img, nxch, myself - integer :: count - real(psb_dpk_), allocatable, save :: buffer(:)[:], sndbuf(:) - type(event_type), allocatable, save :: ufg(:)[:] - type(event_type), allocatable, save :: clear[:] - integer, save :: last_clear_count = 0 - logical :: swap_mpi, swap_sync, swap_send, swap_recv,& - & albf,do_send,do_recv - integer(psb_ipk_) :: ierr(5) - character(len=20) :: name -end subroutine psi_dswap_xchg_m -end interface - -contains - -@test(nimgs=[std]) -subroutine test_psb_sswapdatav_2imgs(this) - implicit none - Class(CafTestMethod), intent(inout) :: this - integer :: msg, me, i=0, np, j, info - integer, parameter :: nrows=6 - integer :: icontxt, mid, true - integer, allocatable :: vg(:), ia(:) - real(psb_spk_), allocatable :: val(:) - real(psb_spk_), allocatable :: y(:), check(:) - class(psb_xch_idx_type), pointer :: xchg - integer(psb_ipk_) :: iictxt, icomm, flag - type(psb_desc_type):: desc_a - type(psb_s_vect_type) :: x - - np = this%getNumImages() - if (np < 2) then - print*,'You need at least 2 processes to run this test.' - return - endif - call psb_init(icontxt,np,MPI_COMM_WORLD) - !call psb_info(icontxt, me, np) - me = this_image() - !Allocate vectors - allocate(vg(nrows)) - allocate(ia(nrows)) - allocate(val(nrows)) - allocate(y(nrows)) - i = 0 - do j=1,size(vg,1) - vg(j)= i - i = i+1 - if (i==np) then - i=0 - endif - enddo - - - !Use only 2 processes - !Assuming nrows is a multiple of 2 so mid is an integer - !Distribute equally to the two processes - mid=nrows/2 - - do i=1, mid - vg(i)=0 - enddo - do i=mid+1, nrows - vg(i)=1 - enddo - - - do i=1,size(ia,1) - ia(i)=i - enddo - - do i=1,mid - val(i)=1. - enddo - - do i=mid + 1,nrows - val(i)=2. - enddo - - call psb_cdall(icontxt,desc_a,info, vg=vg) - if ( me == 1) call psb_cdins(nz=1,ia=(/mid/),ja=(/mid+1/), desc_a=desc_a, info=info) - if ( me == 2) call psb_cdins(nz=1,ia=(/mid+1/),ja=(/mid/), desc_a=desc_a, info=info) - call psb_cdasb(desc_a, info) - - call psb_geall(x,desc_a,info) - call psb_geins(m=nrows, irw=ia, val=val,x=x, desc_a=desc_a, info=info) - call psb_geasb(x,desc_a,info) - - call psb_barrier(icontxt) - y = x%get_vect() - !Let's modify x, so we need to update halo indices - - if ((me == 1).or.(me == 2)) then - y(mid +1)=y(mid+1) + 2.0 - endif - call psb_barrier(icontxt) - - ! END OF SETUP - - - iictxt = desc_a%get_context() - - icomm = desc_a%get_mpic() - - call desc_a%get_list(psb_comm_halo_,xchg,info) - - flag = IOR(psb_swap_send_, psb_swap_recv_) - - call psi_sswap_xchg_v(iictxt,icomm,flag,0.0,y,xchg,info) - !GETTING BACK X - call psb_barrier(icontxt) - - !Let's build the expected solution - if (allocated(check)) deallocate(check) - if ((me == 1).or.(me==2)) then - allocate(check(mid+1)) - else - allocate(check(1)) - endif - if (me == 1 ) then - check(1:mid)=1.0 - check(mid + 1)=2.0 - else if (me == 2) then - check(1:mid)=2.0 - check(mid + 1)=1.0 - else - check(1)=0.0 - endif - !call psb_barrier(icontxt) - - if ((me==1).or.(me==2)) then - true = 1 - else - true=0 - endif - - @assertEqual(real(true*check),real(true*y)) - deallocate(vg,ia,val,y,check) - - call psb_gefree(x, desc_a, info) - call psb_cdfree(desc_a, info) - - call psb_exit(icontxt) - -end subroutine test_psb_sswapdatav_2imgs - -@test(nimgs=[std]) -subroutine test_psb_swapdatam_2imgs(this) - implicit none - Class(CafTestMethod), intent(inout) :: this - integer :: msg, me, i=0, np, j, info - integer, parameter :: nrows=6 - integer :: icontxt, mid, true - integer, allocatable :: vg(:), ia(:) - real(psb_dpk_), allocatable :: val(:) - real(psb_dpk_), allocatable :: y(:,:), check(:), v(:) - class(psb_xch_idx_type), pointer :: xchg - integer(psb_ipk_) :: iictxt, icomm, flag - type(psb_desc_type):: desc_a - type(psb_d_vect_type) :: x - - np = this%getNumImages() - if (np < 2) then - print*,'You need at least 2 processes to run this test.' - return - endif - call psb_init(icontxt,np,MPI_COMM_WORLD) - !call psb_info(icontxt, me, np) - me = this_image() - !Allocate vectors - allocate(vg(nrows)) - allocate(ia(nrows)) - allocate(val(nrows)) - allocate(v(nrows)) - i = 0 - do j=1,size(vg,1) - vg(j)= i - i = i+1 - if (i==np) then - i=0 - endif - enddo - - - !Use only 2 processes - !Assuming nrows is a multiple of 2 so mid is an integer - !Distribute equally to the two processes - mid=nrows/2 - - do i=1, mid - vg(i)=0 - enddo - do i=mid+1, nrows - vg(i)=1 - enddo - - - do i=1,size(ia,1) - ia(i)=i - enddo - - do i=1,mid - val(i)=1. - enddo - - do i=mid + 1,nrows - val(i)=2. - enddo - - call psb_cdall(icontxt,desc_a,info, vg=vg) - if ( me == 1) call psb_cdins(nz=1,ia=(/mid/),ja=(/mid+1/), desc_a=desc_a, info=info) - if ( me == 2) call psb_cdins(nz=1,ia=(/mid+1/),ja=(/mid/), desc_a=desc_a, info=info) - call psb_cdasb(desc_a, info) - - call psb_geall(x,desc_a,info) - call psb_geins(m=nrows, irw=ia, val=val,x=x, desc_a=desc_a, info=info) - call psb_geasb(x,desc_a,info) - - call psb_barrier(icontxt) - v = x%get_vect() - allocate(y(size(v,1),1)) - y(:,1)=v - !Let's modify x, so we need to update halo indices - - if ((me == 1).or.(me == 2)) then - y(mid +1,1)=y(mid+1,1) + 2.0d0 - endif - call psb_barrier(icontxt) - - ! END OF SETUP - - - iictxt = desc_a%get_context() - - icomm = desc_a%get_mpic() - - call desc_a%get_list(psb_comm_halo_,xchg,info) - - flag = IOR(psb_swap_send_, psb_swap_recv_) - - call psi_dswap_xchg_m(iictxt,icomm,flag,1,0.0d0,y,xchg,info) - !GETTING BACK X - call psb_barrier(icontxt) - - !Let's build the expected solution - if (allocated(check)) deallocate(check) - if ((me == 1).or.(me==2)) then - allocate(check(mid+1)) - else - allocate(check(1)) - endif - if (me == 1 ) then - check(1:mid)=1.0d0 - check(mid + 1)=2.0d0 - else if (me == 2) then - check(1:mid)=2.0d0 - check(mid + 1)=1.0d0 - else - check(1)=0.0d0 - endif - !call psb_barrier(icontxt) - - if ((me==1).or.(me==2)) then - true = 1 - else - true=0 - endif - - @assertEqual(real(true*check),real(true*y(:,1))) - deallocate(vg,ia,val,y,v,check) - - call psb_gefree(x, desc_a, info) - call psb_cdfree(desc_a, info) - - call psb_exit(icontxt) - -end subroutine test_psb_swapdatam_2imgs - -@test(nimgs=[std]) -subroutine test_psb_swapdatav_2imgs(this) - implicit none - Class(CafTestMethod), intent(inout) :: this - integer :: msg, me, i=0, np, j, info - integer, parameter :: nrows=6 - integer :: icontxt, mid, true - integer, allocatable :: vg(:), ia(:) - real(psb_dpk_), allocatable :: val(:) - real(psb_dpk_), allocatable :: y(:), check(:) - class(psb_xch_idx_type), pointer :: xchg - integer(psb_ipk_) :: iictxt, icomm, flag - type(psb_desc_type):: desc_a - type(psb_d_vect_type) :: x - - np = this%getNumImages() - if (np < 2) then - print*,'You need at least 2 processes to run this test.' - return - endif - call psb_init(icontxt,np,MPI_COMM_WORLD) - !call psb_info(icontxt, me, np) - me = this_image() - !Allocate vectors - allocate(vg(nrows)) - allocate(ia(nrows)) - allocate(val(nrows)) - allocate(y(nrows)) - i = 0 - do j=1,size(vg,1) - vg(j)= i - i = i+1 - if (i==np) then - i=0 - endif - enddo - - - !Use only 2 processes - !Assuming nrows is a multiple of 2 so mid is an integer - !Distribute equally to the two processes - mid=nrows/2 - - do i=1, mid - vg(i)=0 - enddo - do i=mid+1, nrows - vg(i)=1 - enddo - - - do i=1,size(ia,1) - ia(i)=i - enddo - - do i=1,mid - val(i)=1. - enddo - - do i=mid + 1,nrows - val(i)=2. - enddo - - call psb_cdall(icontxt,desc_a,info, vg=vg) - if ( me == 1) call psb_cdins(nz=1,ia=(/mid/),ja=(/mid+1/), desc_a=desc_a, info=info) - if ( me == 2) call psb_cdins(nz=1,ia=(/mid+1/),ja=(/mid/), desc_a=desc_a, info=info) - call psb_cdasb(desc_a, info) - - call psb_geall(x,desc_a,info) - call psb_geins(m=nrows, irw=ia, val=val,x=x, desc_a=desc_a, info=info) - call psb_geasb(x,desc_a,info) - - call psb_barrier(icontxt) - y = x%get_vect() - !Let's modify x, so we need to update halo indices - - if ((me == 1).or.(me == 2)) then - y(mid +1)=y(mid+1) + 2.0d0 - endif - call psb_barrier(icontxt) - - ! END OF SETUP - - - iictxt = desc_a%get_context() - - icomm = desc_a%get_mpic() - - call desc_a%get_list(psb_comm_halo_,xchg,info) - - flag = IOR(psb_swap_send_, psb_swap_recv_) - - call psi_dswap_xchg_v(iictxt,icomm,flag,0.0d0,y,xchg,info) - !GETTING BACK X - call psb_barrier(icontxt) - - !Let's build the expected solution - if (allocated(check)) deallocate(check) - if ((me == 1).or.(me==2)) then - allocate(check(mid+1)) - else - allocate(check(1)) - endif - if (me == 1 ) then - check(1:mid)=1.0d0 - check(mid + 1)=2.0d0 - else if (me == 2) then - check(1:mid)=2.0d0 - check(mid + 1)=1.0d0 - else - check(1)=0.0d0 - endif - !call psb_barrier(icontxt) - - if ((me==1).or.(me==2)) then - true = 1 - else - true=0 - endif - - @assertEqual(real(true*check),real(true*y)) - deallocate(vg,ia,val,y,check) - - call psb_gefree(x, desc_a, info) - call psb_cdfree(desc_a, info) - - call psb_exit(icontxt) - -end subroutine test_psb_swapdatav_2imgs - -@test(nimgs=[std]) -subroutine test_psb_swapdata_2imgs(this) - implicit none - Class(CafTestMethod), intent(inout) :: this - integer :: msg, me, i=0, np, j, info - integer, parameter :: nrows=6 - integer :: icontxt, mid, true - integer, allocatable :: vg(:), ia(:) - real(psb_dpk_), allocatable :: val(:) - real(psb_dpk_), allocatable :: v(:), check(:) - class(psb_xch_idx_type), pointer :: xchg - integer(psb_ipk_) :: iictxt, icomm, flag - type(psb_desc_type):: desc_a - type(psb_d_vect_type) :: x - - np = this%getNumImages() - if (np < 2) then - print*,'You need at least 2 processes to run this test.' - return - endif - call psb_init(icontxt,np,MPI_COMM_WORLD) - !call psb_info(icontxt, me, np) - me = this_image() - !Allocate vectors - allocate(vg(nrows)) - allocate(ia(nrows)) - allocate(val(nrows)) - allocate(v(nrows)) - i = 0 - do j=1,size(vg,1) - vg(j)= i - i = i+1 - if (i==np) then - i=0 - endif - enddo - - - !Use only 2 processes - !Assuming nrows is a multiple of 2 so mid is an integer - !Distribute equally to the two processes - mid=nrows/2 - - do i=1, mid - vg(i)=0 - enddo - do i=mid+1, nrows - vg(i)=1 - enddo - - - do i=1,size(ia,1) - ia(i)=i - enddo - - do i=1,mid - val(i)=1. - enddo - - do i=mid + 1,nrows - val(i)=2. - enddo - - call psb_cdall(icontxt,desc_a,info, vg=vg) - if ( me == 1) call psb_cdins(nz=1,ia=(/mid/),ja=(/mid+1/), desc_a=desc_a, info=info) - if ( me == 2) call psb_cdins(nz=1,ia=(/mid+1/),ja=(/mid/), desc_a=desc_a, info=info) - call psb_cdasb(desc_a, info) - - call psb_geall(x,desc_a,info) - call psb_geins(m=nrows, irw=ia, val=val,x=x, desc_a=desc_a, info=info) - call psb_geasb(x,desc_a,info) - - call psb_barrier(icontxt) - v = x%get_vect() - !Let's modify x, so we need to update halo indices - - if ((me == 1).or.(me == 2)) then - x%v%v(mid +1)=x%v%v(mid+1) + 2.0d0 - endif - call psb_barrier(icontxt) - - ! END OF SETUP - - - iictxt = desc_a%get_context() - - icomm = desc_a%get_mpic() - - call desc_a%get_list(psb_comm_halo_,xchg,info) - - flag = IOR(psb_swap_send_, psb_swap_recv_) - - call psi_dswap_xchg_vect(iictxt,icomm,flag,0.0d0,x%v,xchg,info) - !GETTING BACK X - call psb_barrier(icontxt) - v = x%get_vect() - - !Let's build the expected solution - if (allocated(check)) deallocate(check) - if ((me == 1).or.(me==2)) then - allocate(check(mid+1)) - else - allocate(check(1)) - endif - if (me == 1 ) then - check(1:mid)=1.0d0 - check(mid + 1)=2.0d0 - else if (me == 2) then - check(1:mid)=2.0d0 - check(mid + 1)=1.0d0 - else - check(1)=0.0d0 - endif - !call psb_barrier(icontxt) - - if ((me==1).or.(me==2)) then - true = 1 - else - true=0 - endif - - @assertEqual(real(true*check),real(true*v)) - deallocate(vg,ia,val,v,check) - - call psb_gefree(x, desc_a, info) - call psb_cdfree(desc_a, info) - - call psb_exit(icontxt) - -end subroutine test_psb_swapdata_2imgs - - -@test(nimgs=[std]) -subroutine test_psb_swapdata_4imgs(this) - implicit none - Class(CafTestMethod), intent(inout) :: this - integer :: msg, me, i=0, np, j, info, nz - integer, parameter :: nrows = 8 - integer :: icontxt, mid, true - integer, allocatable :: vg(:), ia(:), ja(:), irw(:) - real(psb_dpk_), allocatable :: val(:) - real(psb_dpk_), allocatable :: v(:), check(:) - integer(psb_ipk_) :: iictxt, icomm, flag - class(psb_xch_idx_type), pointer :: xchg - type(psb_desc_type):: desc_a - type(psb_d_vect_type) :: x - - np = this%getNumImages() - if (np < 4) then - print*,'You need at least 4 processes to run this test.' - return - endif - call psb_init(icontxt,np,MPI_COMM_WORLD) - me = this_image() - !Allocate vectors - allocate(vg(nrows)) - allocate(val(nrows)) - allocate(v(nrows)) - - !Use only 4 processes - !Assuming nrows is a multiple of 4 so mid is an integer - !Distribute equally to the two processes - - mid=nrows/4 - - do i=1, mid - vg(i)=0 - enddo - do i=mid+1, 2*mid - vg(i)=1 - enddo - do i=2*mid + 1, 3*mid - vg(i)=2 - enddo - do i=3*mid+1, nrows - vg(i)=3 - enddo - - if (me == 1) nz = 5 - if (me == 2) nz = 7 - if (me == 3) nz = 7 - if (me == 4) nz = 5 - if (me > 4) nz = 0 - - allocate(ia(nz),ja(nz)) - - if (me == 1) then - - ia(1)=2 - ja(1)=1 - - ia(2)=1 - ja(2)=2 - - ia(3)=2 - ja(3)=3 - - ia(4)=1 - ja(4)=4 - - ia(5)=2 - ja(5)=5 - endif - - if (me == 2) then - - ia(1)=4 - ja(1)=1 - - ia(2)=3 - ja(2)=2 - - ia(3)=4 - ja(3)=3 - - ia(4)=3 - ja(4)=4 - - ia(5)=4 - ja(5)=5 - - ia(6)=3 - ja(6)=6 - - ia(7)=4 - ja(7)=6 - - endif - - if (me == 3) then - ia(1)=5 - ja(1)=2 - - ia(2)=6 - ja(2)=3 - - ia(3)=5 - ja(3)=4 - - ia(4)=6 - ja(4)=5 - - ia(5)=5 - ja(5)=6 - - ia(6)=6 - ja(6)=7 - - ia(7)=5 - ja(7)=8 - - - endif - - if (me == 4) then - ia(1)=7 - ja(1)=4 - - ia(2)=8 - ja(2)=5 - - ia(3)=7 - ja(3)=6 - - ia(4)=8 - ja(4)=7 - - ia(5)=7 - ja(5)=8 - endif - - - - do i=1,mid - val(i)=1. - enddo - do i= mid + 1, 2*mid - val(i)=2. - enddo - do i=2*mid + 1, 3*mid - val(i)=3. - enddo - do i=3*mid + 1, nrows - val(i)=4. - enddo - - call psb_cdall(icontxt,desc_a,info, vg=vg) - - call psb_cdins(nz=nz,ia=ia,ja=ja, desc_a=desc_a, info=info) - - call psb_cdasb(desc_a, info) - - allocate(irw(nrows)) - do i=1,nrows - irw(i)=i - enddo - - call psb_geall(x,desc_a,info) - call psb_geins(m=nrows, irw=irw, val=val,x=x, desc_a=desc_a, info=info) - call psb_geasb(x,desc_a,info) - - if (me==1) nz = 5 - if (me==2) nz = 6 - if (me==3) nz = 7 - if (me==4) nz = 5 - if (me > 4) nz = 1 - allocate (check(nz)) - if (me == 1) then - check(1)=2 - check(2)=2 - check(3)=8 - check(4)=8 - check(5)=18 - endif - if (me == 2) then - check(1)=8 - check(2)=8 - check(3)=2 - check(4)=2 - check(5)=18 - check(6)=18 - endif - if (me == 3) then - check(1)=18 - check(2)=18 - check(3)=1 - check(4)=8 - check(5)=8 - check(6)=32 - check(7)=32 - - endif - if (me == 4) then - check(1)=32 - check(2)=32 - check(3)=8 - check(4)=18 - check(5)=18 - endif - - ! END OF SETUP - - call psb_barrier(icontxt) - - !We can do something better here - x%v%v = x%v%v*2*me - - call psb_barrier(icontxt) - - iictxt = desc_a%get_context() - - icomm = desc_a%get_mpic() - - call desc_a%get_list(psb_comm_halo_,xchg,info) - - flag = IOR(psb_swap_send_, psb_swap_recv_) - - call psi_dswap_xchg_vect(iictxt,icomm,flag,0.0d0,x%v,xchg,info) - - - - call psb_barrier(icontxt) - v = x%get_vect() - - - - !call psb_barrier(icontxt) - - if ((me==1).or.(me==2)) then - true = 1 - else - true=0 - endif - - @assertEqual(real(true*check),real(true*v)) - deallocate(vg,ia,val,v,check) - - call psb_gefree(x, desc_a, info) - call psb_cdfree(desc_a, info) - - call psb_exit(icontxt) - - -end subroutine test_psb_swapdata_4imgs - -@test(nimgs=[std]) -subroutine test_psb_swapdata_8imgs(this) - implicit none - Class(CafTestMethod), intent(inout) :: this - integer :: msg, me, i=0, np, j, info, nz - integer, parameter :: nrows = 8 - integer :: icontxt, true - integer, allocatable :: vg(:), ia(:), ja(:), irw(:) - real(psb_dpk_), allocatable :: val(:) - real(psb_dpk_), allocatable :: v(:), check(:) - integer(psb_ipk_) :: iictxt, icomm, flag - class(psb_xch_idx_type), pointer :: xchg - type(psb_desc_type):: desc_a - type(psb_d_vect_type) :: x - - np = this%getNumImages() - if (np < 8) then - print*,'You need at least 8 processes to run this test.' - return - endif - call psb_init(icontxt,np,MPI_COMM_WORLD) - me = this_image() - !Allocate vectors - allocate(vg(nrows)) - allocate(val(nrows)) - allocate(v(nrows)) - - !Use only 8 processes - !Each process has a line - - do i=1, nrows - vg(i)=i-1 - enddo - - if (me == 1) nz = 1 - if (me == 2) nz = 2 - if (me == 3) nz = 3 - if ((me >= 4).and.(me <= 8)) nz = 2 - if (me > 8) nz = 0 - allocate(ia(nz),ja(nz)) - - if (me == 1) then - - ia(1)=1 - ja(1)=6 - - endif - - if (me == 2) then - - ia(1)=2 - ja(1)=1 - - ia(2)=2 - ja(2)=7 - - endif - - if (me == 3) then - ia(1)=3 - ja(1)=1 - - ia(2)=3 - ja(2)=2 - - ia(3)=3 - ja(3)=8 - - endif - - if (me == 4) then - ia(1)=4 - ja(1)=2 - - ia(2)=4 - ja(2)=3 - endif - - if (me == 5) then - ia(1)=5 - ja(1)=3 - - ia(2)=5 - ja(2)=4 - endif - - if (me == 6) then - ia(1)=6 - ja(1)=4 - - ia(2)=6 - ja(2)=5 - endif - - if (me == 7) then - ia(1)=7 - ja(1)=5 - - ia(2)=7 - ja(2)=6 - endif - - if (me == 8) then - ia(1)=8 - ja(1)=6 - - ia(2)=8 - ja(2)=7 - endif - - - do i=1,nrows - val(i)=me - enddo - - - call psb_cdall(icontxt,desc_a,info, vg=vg) - call psb_cdins(nz=nz,ia=ia,ja=ja, desc_a=desc_a, info=info) - call psb_cdasb(desc_a, info) - - allocate(irw(nrows)) - do i=1,nrows - irw(i)=i - enddo - - - call psb_geall(x,desc_a,info) - call psb_geins(m=nrows, irw=irw, val=val,x=x, desc_a=desc_a, info=info) - call psb_geasb(x,desc_a,info) - - - - if (me == 1) nz = 2 - if (me == 2) nz = 3 - if (me == 3) nz = 4 - if ((me >= 4).and.(me <= 8)) nz = 3 - if (me > 8) nz = 1 - - if (allocated(check)) deallocate(check) - allocate (check(nz)) - if (me == 1) then - check(1)=2 - check(2)=12 - endif - if (me == 2) then - check(1)=4 - check(2)=2 - check(3)=14 - endif - if (me == 3) then - check(1)=6 - check(2)=2 - check(3)=4 - check(4)=16 - - endif - if (me == 4) then - check(1)=8 - check(2)=4 - check(3)=6 - endif - if (me == 5) then - check(1)=10 - check(2)=6 - check(3)=8 - endif - if (me == 6) then - check(1)=12 - check(2)=8 - check(3)=10 - endif - if (me == 7) then - check(1)=14 - check(2)=10 - check(3)=12 - endif - if (me == 8) then - check(1)=16 - check(2)=12 - check(3)=14 - endif - if (me > 8) then - check(1)=0 - endif - ! END OF SETUP - - - - call psb_barrier(icontxt) - !We can do something better here - x%v%v = x%v%v + me - - - call psb_barrier(icontxt) - - iictxt = desc_a%get_context() - - icomm = desc_a%get_mpic() - - call desc_a%get_list(psb_comm_halo_,xchg,info) - - flag = IOR(psb_swap_send_, psb_swap_recv_) - - call psi_dswap_xchg_vect(iictxt,icomm,flag,0.0d0,x%v,xchg,info) - - - call psb_barrier(icontxt) - v = x%get_vect() - - - !call psb_barrier(icontxt) - - if ((me==1).or.(me==2)) then - true = 1 - else - true=0 - endif - - @assertEqual(real(true*check),real(true*v)) - - deallocate(vg,ia,val,v,check) - - call psb_gefree(x, desc_a, info) - call psb_cdfree(desc_a, info) - - call psb_exit(icontxt) - - -end subroutine test_psb_swapdata_8imgs - - -@test(nimgs=[std]) -subroutine test_psb_swapdata_8imgs_b(this) - implicit none - Class(CafTestMethod), intent(inout) :: this - integer :: msg, me, i=0, np, j, info, nz - integer, parameter :: nrows = 8 - integer :: icontxt, true - integer, allocatable :: vg(:), ia(:), ja(:), irw(:) - real(psb_dpk_), allocatable :: val(:) - real(psb_dpk_), allocatable :: v(:), check(:) - integer(psb_ipk_) :: iictxt, icomm, flag - class(psb_xch_idx_type), pointer :: xchg - type(psb_desc_type):: desc_a - type(psb_d_vect_type) :: x - - np = this%getNumImages() - if (np < 8) then - print*,'You need at least 8 processes to run this test.' - return - endif - call psb_init(icontxt,np,MPI_COMM_WORLD) - me = this_image() - !Allocate vectors - allocate(vg(nrows)) - allocate(val(nrows)) - allocate(v(nrows)) - - !Use only 8 processes - !Each process has a line - - do i=1, nrows - vg(i)=i-1 - enddo - - if (me == 1) nz = 7 - if ((me >= 2).and.(me <= 8)) nz = 1 - if (me > 8) nz = 0 - allocate(ia(nz),ja(nz)) - - if (me == 1) then - - ia(1)=1 - ja(1)=2 - - ia(2)=1 - ja(2)=3 - - ia(3)=1 - ja(3)=4 - - ia(4)=1 - ja(4)=5 - - ia(5)=1 - ja(5)=6 - - ia(6)=1 - ja(6)=7 - - ia(7)=1 - ja(7)=8 - - endif - - if (me == 2) then - ia(1)=2 - ja(1)=1 - endif - - if (me == 3) then - ia(1)=3 - ja(1)=1 - endif - - if (me == 4) then - ia(1)=4 - ja(1)=1 - endif - - if (me == 5) then - ia(1)=5 - ja(1)=1 - endif - - if (me == 6) then - ia(1)=6 - ja(1)=1 - endif - - if (me == 7) then - ia(1)=7 - ja(1)=1 - endif - - if (me == 8) then - ia(1)=8 - ja(1)=1 - endif - - - do i=1,nrows - val(i)=me - enddo - - - call psb_cdall(icontxt,desc_a,info, vg=vg) - call psb_cdins(nz=nz,ia=ia,ja=ja, desc_a=desc_a, info=info) - call psb_cdasb(desc_a, info) - - allocate(irw(nrows)) - do i=1,nrows - irw(i)=i - enddo - - - call psb_geall(x,desc_a,info) - call psb_geins(m=nrows, irw=irw, val=val,x=x, desc_a=desc_a, info=info) - call psb_geasb(x,desc_a,info) - - - - if (me == 1) nz = 8 - if ((me >= 2).and.(me <= 8)) nz = 2 - if (me > 8) nz = 1 - - if (allocated(check)) deallocate(check) - allocate (check(nz)) - if (me == 1) then - check(1)=2 - check(2)=4 - check(3)=6 - check(4)=8 - check(5)=10 - check(6)=12 - check(7)=14 - check(8)=16 - endif - if (me == 2) then - check(1)=4 - check(2)=2 - endif - if (me == 3) then - check(1)=6 - check(2)=2 - - endif - if (me == 4) then - check(1)=8 - check(2)=2 - endif - if (me == 5) then - check(1)=10 - check(2)=2 - endif - if (me == 6) then - check(1)=12 - check(2)=2 - endif - if (me == 7) then - check(1)=14 - check(2)=2 - endif - if (me == 8) then - check(1)=16 - check(2)=2 - endif - if (me > 8) then - check(1)=0 - endif - ! END OF SETUP - - - - call psb_barrier(icontxt) - !We can do something better here - x%v%v = x%v%v + me - - - call psb_barrier(icontxt) - - iictxt = desc_a%get_context() - - icomm = desc_a%get_mpic() - - call desc_a%get_list(psb_comm_halo_,xchg,info) - - flag = IOR(psb_swap_send_, psb_swap_recv_) - - - call psi_dswap_xchg_vect(iictxt,icomm,flag,0.0d0,x%v,xchg,info) - - - call psb_barrier(icontxt) - v = x%get_vect() - - !call psb_barrier(icontxt) - - if ((me==1).or.(me==2)) then - true = 1 - else - true=0 - endif - - @assertEqual(real(true*check),real(true*v)) - - deallocate(vg,ia,val,v,check) - - call psb_gefree(x, desc_a, info) - call psb_cdfree(desc_a, info) - - call psb_exit(icontxt) - - -end subroutine test_psb_swapdata_8imgs_b - - -@test(nimgs=[std]) -subroutine test_psb_swapdatatran_2imgs(this) - implicit none - Class(CafTestMethod), intent(inout) :: this - integer :: msg, me, i=0, np, j, info - integer, parameter :: nrows=6 - integer :: icontxt, mid, true - integer, allocatable :: vg(:), ia(:) - real(psb_dpk_), allocatable :: val(:) - real(psb_dpk_), allocatable :: v(:), check(:) - class(psb_xch_idx_type), pointer :: xchg - integer(psb_ipk_) :: iictxt, icomm, flag - type(psb_desc_type):: desc_a - type(psb_d_vect_type) :: x - - np = this%getNumImages() - if (np < 2) then - print*,'You need at least 2 processes to run this test.' - return - endif - call psb_init(icontxt,np,MPI_COMM_WORLD) - !call psb_info(icontxt, me, np) - me = this_image() - !Allocate vectors - allocate(vg(nrows)) - allocate(ia(nrows)) - allocate(val(nrows)) - allocate(v(nrows)) - i = 0 - do j=1,size(vg,1) - vg(j)= i - i = i+1 - if (i==np) then - i=0 - endif - enddo - - - !Use only 2 processes - !Assuming nrows is a multiple of 2 so mid is an integer - !Distribute equally to the two processes - mid=nrows/2 - - do i=1, mid - vg(i)=0 - enddo - do i=mid+1, nrows - vg(i)=1 - enddo - - - do i=1,size(ia,1) - ia(i)=i - enddo - - do i=1,mid - val(i)=1. - enddo - - do i=mid + 1,nrows - val(i)=2. - enddo - - call psb_cdall(icontxt,desc_a,info, vg=vg) - if ( me == 1) call psb_cdins(nz=1,ia=(/mid/),ja=(/mid+1/), desc_a=desc_a, info=info) - if ( me == 2) call psb_cdins(nz=1,ia=(/mid+1/),ja=(/mid/), desc_a=desc_a, info=info) - call psb_cdasb(desc_a, info) - - call psb_geall(x,desc_a,info) - call psb_geins(m=nrows, irw=ia, val=val,x=x, desc_a=desc_a, info=info) - call psb_geasb(x,desc_a,info) - - call psb_barrier(icontxt) - !v = x%get_vect() - !Let's modify x, so we need to update halo indices - - if ((me == 1).or.(me == 2)) then - call psb_geaxpby(dble(me),x,0.0d0,x,desc_a,info) - !x%v%v(mid +1)=x%v%v(mid+1) + 2.0d0 - endif - call psb_barrier(icontxt) - - ! END OF SETUP - v = x%get_vect() - - - iictxt = desc_a%get_context() - - icomm = desc_a%get_mpic() - - call desc_a%get_list(psb_comm_halo_,xchg,info) - - flag = IOR(psb_swap_send_, psb_swap_recv_) - sync all - - call psi_dswaptran_xchg_vect(iictxt,icomm,flag,0.0d0,x%v,xchg,info) - !GETTING BACK X - call psb_barrier(icontxt) - v = x%get_vect() - sync all - - !Let's build the expected solution - if ((me == 1).or.(me==2)) then - allocate(check(mid+1)) - else - allocate(check(1)) - endif - if (me == 1 )then - check(1:mid)=1.0d0 - check(mid + 1)=2.0d0 - else if (me == 2) then - check(1)=2.0d0 - check(mid-1:mid)=4.0d0 - check(mid + 1)=1.0d0 - else - check(1)=0.0d0 - endif - !call psb_barrier(icontxt) - - if ((me==1).or.(me==2)) then - true = 1 - else - true=0 - endif - @assertEqual(real(true*check),real(true*v)) - deallocate(vg,ia,val,v,check) - - call psb_gefree(x, desc_a, info) - call psb_cdfree(desc_a, info) - - call psb_exit(icontxt) - -end subroutine test_psb_swapdatatran_2imgs - -@test(nimgs=[std]) -subroutine test_psb_sswapdatam_2imgs(this) - implicit none - Class(CafTestMethod), intent(inout) :: this - integer :: msg, me, i=0, np, j, info - integer, parameter :: nrows=6 - integer :: icontxt, mid, true - integer, allocatable :: vg(:), ia(:) - real(psb_spk_), allocatable :: val(:) - real(psb_spk_), allocatable :: y(:,:), check(:), v(:) - class(psb_xch_idx_type), pointer :: xchg - integer(psb_ipk_) :: iictxt, icomm, flag - type(psb_desc_type):: desc_a - type(psb_s_vect_type) :: x - - np = this%getNumImages() - if (np < 2) then - print*,'You need at least 2 processes to run this test.' - return - endif - call psb_init(icontxt,np,MPI_COMM_WORLD) - !call psb_info(icontxt, me, np) - me = this_image() - !Allocate vectors - allocate(vg(nrows)) - allocate(ia(nrows)) - allocate(val(nrows)) - allocate(v(nrows)) - i = 0 - do j=1,size(vg,1) - vg(j)= i - i = i+1 - if (i==np) then - i=0 - endif - enddo - - - !Use only 2 processes - !Assuming nrows is a multiple of 2 so mid is an integer - !Distribute equally to the two processes - mid=nrows/2 - - do i=1, mid - vg(i)=0 - enddo - do i=mid+1, nrows - vg(i)=1 - enddo - - - do i=1,size(ia,1) - ia(i)=i - enddo - - do i=1,mid - val(i)=1. - enddo - - do i=mid + 1,nrows - val(i)=2. - enddo - - call psb_cdall(icontxt,desc_a,info, vg=vg) - if ( me == 1) call psb_cdins(nz=1,ia=(/mid/),ja=(/mid+1/), desc_a=desc_a, info=info) - if ( me == 2) call psb_cdins(nz=1,ia=(/mid+1/),ja=(/mid/), desc_a=desc_a, info=info) - call psb_cdasb(desc_a, info) - - call psb_geall(x,desc_a,info) - call psb_geins(m=nrows, irw=ia, val=val,x=x, desc_a=desc_a, info=info) - call psb_geasb(x,desc_a,info) - - call psb_barrier(icontxt) - v = x%get_vect() - allocate(y(size(v,1),1)) - y(:,1)=v - !Let's modify x, so we need to update halo indices - - if ((me == 1).or.(me == 2)) then - y(mid +1,1)=y(mid+1,1) + 2.0 - endif - call psb_barrier(icontxt) - - ! END OF SETUP - - - iictxt = desc_a%get_context() - - icomm = desc_a%get_mpic() - - call desc_a%get_list(psb_comm_halo_,xchg,info) - - flag = IOR(psb_swap_send_, psb_swap_recv_) - - print*,'size of y', size(y,1), size(y,2) - call psi_sswap_xchg_m(iictxt,icomm,flag,1,0.0,y,xchg,info) - !GETTING BACK X - call psb_barrier(icontxt) - - !Let's build the expected solution - if (allocated(check)) deallocate(check) - if ((me == 1).or.(me==2)) then - allocate(check(mid+1)) - else - allocate(check(1)) - endif - if (me == 1 ) then - check(1:mid)=1.0 - check(mid + 1)=2.0 - else if (me == 2) then - check(1:mid)=2.0 - check(mid + 1)=1.0 - else - check(1)=0.0 - endif - !call psb_barrier(icontxt) - - if ((me==1).or.(me==2)) then - true = 1 - else - true=0 - endif - - @assertEqual(real(true*check),real(true*y(:,1))) - deallocate(vg,ia,val,y,v,check) - - call psb_gefree(x, desc_a, info) - call psb_cdfree(desc_a, info) - - call psb_exit(icontxt) - -end subroutine test_psb_sswapdatam_2imgs - - -end module test_psb_swapdata - diff --git a/util/psb_d_mat_dist_impl.f90 b/util/psb_d_mat_dist_impl.f90 index 719fd407..27e40b33 100644 --- a/util/psb_d_mat_dist_impl.f90 +++ b/util/psb_d_mat_dist_impl.f90 @@ -83,6 +83,8 @@ subroutine psb_dmatdist(a_glob, a, ictxt, desc_a,& ! use psb_base_mod use psb_mat_mod + use iso_fortran_env + use psb_caf_mod implicit none ! parameters @@ -113,6 +115,14 @@ subroutine psb_dmatdist(a_glob, a, ictxt, desc_a,& integer(psb_ipk_), parameter :: nb=30 real(psb_dpk_) :: t0, t1, t2, t3, t4, t5 character(len=20) :: name, ch_err + !logical, parameter :: if_caf=.true. + + !CAF variables + integer(psb_ipk_), allocatable :: b_irow(:)[:],b_icol(:)[:] + real(psb_dpk_), allocatable :: b_val(:)[:] + integer(psb_ipk_), save :: b_ll[*], b_nnr[*] + type(event_type), save :: ll_done[*], transf_done[*] + info = psb_success_ err = 0 @@ -247,6 +257,94 @@ subroutine psb_dmatdist(a_glob, a, ictxt, desc_a,& ! now we should insert rows i_count..j_count-1 nnr = j_count - i_count + if (if_caf) then + if (iam == root) then + ll = 0 + do i= i_count, j_count-1 + call a_glob%csget(i,i,nz,& + & irow,icol,val,info,nzin=ll,append=.true.) + if (info /= psb_success_) then + if (nz >min(size(irow(ll+1:)),size(icol(ll+1:)),size(val(ll+1:)))) then + write(psb_err_unit,*) 'Allocation failure? This should not happen!' + end if + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + ll = ll + nz + end do + + if (allocated(b_val)) deallocate(b_val) + if (allocated(b_irow)) deallocate(b_irow) + if (allocated(b_icol)) deallocate(b_icol) + + allocate(b_val(ll)[*], b_irow(ll)[*], b_icol(ll)[*]) + do k_count = 1, np_sharing + iproc = iwork(k_count) + !print*,'np_sharing', np_sharing + if (iproc == iam) then + call psb_spins(ll,irow,icol,val,a,desc_a,info) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_spins' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + else + + b_ll[iproc + 1]=ll + b_nnr[iproc + 1]=nnr + event post(ll_done[iproc+1]) + !print*,'root has post ll_done for', iproc+1 + b_val(1:ll)=val(1:ll) + b_irow(1:ll)=irow(1:ll) + b_icol(1:ll)=icol(1:ll) + event post(transf_done[iproc+1]) + !print*,'root has post transf_done for', iproc+1 + endif + end do + else if (iam /= root) then + + if (allocated(b_val)) deallocate(b_val) + if (allocated(b_irow)) deallocate(b_irow) + if (allocated(b_icol)) deallocate(b_icol) + + allocate(b_val(1)[*], b_irow(1)[*], b_icol(1)[*]) + do k_count = 1, np_sharing + iproc = iwork(k_count) + if (iproc == iam) then + !print*,iproc+1,' waiting for ll_done', np_sharing + event wait(ll_done) + !print*,iproc+1,'stopped waiting for ll_done' + ll = b_ll + nnr = b_nnr + if (ll > size(irow)) then + write(psb_err_unit,*) iam,'need to reallocate ',ll + deallocate(val,irow,icol) + allocate(val(ll),irow(ll),icol(ll),stat=info) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='Allocate' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + endif + !print*,iproc+1,' waiting for transf_done' + event wait(transf_done) + !print*,iproc+1,'stopped waiting for transf_done' + val(1:ll)=b_val(1:ll)[root+1] + icol(1:ll)=b_icol(1:ll)[root+1] + irow(1:ll)=b_irow(1:ll)[root+1] + call psb_spins(ll,irow,icol,val,a,desc_a,info) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psspins' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + endif + end do + endif + else if (iam == root) then ll = 0 @@ -280,7 +378,7 @@ subroutine psb_dmatdist(a_glob, a, ictxt, desc_a,& call psb_snd(ictxt,irow(1:ll),iproc) call psb_snd(ictxt,icol(1:ll),iproc) call psb_snd(ictxt,val(1:ll),iproc) - call psb_rcv(ictxt,ll,iproc) + !call psb_rcv(ictxt,ll,iproc) endif end do else if (iam /= root) then @@ -305,7 +403,7 @@ subroutine psb_dmatdist(a_glob, a, ictxt, desc_a,& call psb_rcv(ictxt,irow(1:ll),root) call psb_rcv(ictxt,icol(1:ll),root) call psb_rcv(ictxt,val(1:ll),root) - call psb_snd(ictxt,ll,root) + !call psb_snd(ictxt,ll,root) call psb_spins(ll,irow,icol,val,a,desc_a,info) if(info /= psb_success_) then info=psb_err_from_subroutine_ @@ -316,6 +414,7 @@ subroutine psb_dmatdist(a_glob, a, ictxt, desc_a,& endif end do endif + endif i_count = j_count end do