From d9997e511f3cf270a4621423a9df587ba3d8cb9d Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 26 May 2020 16:07:13 +0200 Subject: [PATCH 1/8] Fixed alterations for MAP routines and new internals --- base/modules/psi_i_mod.F90 | 25 +++++++++++-------------- base/tools/psb_c_map.f90 | 5 +++-- base/tools/psb_d_map.f90 | 5 +++-- base/tools/psb_s_map.f90 | 5 +++-- base/tools/psb_z_map.f90 | 5 +++-- 5 files changed, 23 insertions(+), 22 deletions(-) diff --git a/base/modules/psi_i_mod.F90 b/base/modules/psi_i_mod.F90 index 9239617d..41bc92f6 100644 --- a/base/modules/psi_i_mod.F90 +++ b/base/modules/psi_i_mod.F90 @@ -125,24 +125,21 @@ module psi_i_mod integer(psb_ipk_), allocatable, intent(out) :: dep_list(:,:), length_dl(:) integer(psb_ipk_), intent(out) :: info end subroutine psi_i_extract_dep_list - end interface psi_extract_dep_list + end interface - interface psi_bld_glb_dep_list - subroutine psi_i_bld_glb_dep_list(ictxt,loc_dl,& - & length_dl,dep_list,dl_lda,info) + interface psi_bld_glb_dep_list + subroutine psi_i_bld_glb_dep_list(ictxt,loc_dl,length_dl,dep_list,dl_lda,info) import - implicit none integer(psb_ipk_), intent(in) :: ictxt integer(psb_ipk_), intent(out) :: dl_lda - integer(psb_ipk_), intent(in) :: loc_dl(:), length_dl(:) + integer(psb_ipk_), intent(in) :: loc_dl(:), length_dl(0:) integer(psb_ipk_), allocatable, intent(out) :: dep_list(:,:) integer(psb_ipk_), intent(out) :: info - end subroutine psi_i_bld_glb_dep_list - end interface psi_bld_glb_dep_list - - interface psi_extract_loc_dl - subroutine psi_i_xtr_loc_dl(ictxt,is_bld,is_upd,desc_str,loc_dl,& - & length_dl,info) + end subroutine psi_i_bld_glb_dep_list + end interface + + interface psi_extract_loc_dl + subroutine psi_i_xtr_loc_dl(ictxt,is_bld,is_upd,desc_str,loc_dl,length_dl,info) import logical, intent(in) :: is_bld, is_upd integer(psb_ipk_), intent(in) :: ictxt @@ -150,8 +147,8 @@ module psi_i_mod integer(psb_ipk_), allocatable, intent(out) :: loc_dl(:), length_dl(:) integer(psb_ipk_), intent(out) :: info end subroutine psi_i_xtr_loc_dl - end interface psi_extract_loc_dl - + end interface + interface psi_fnd_owner subroutine psi_i_fnd_owner(nv,idx,iprc,desc,info) import diff --git a/base/tools/psb_c_map.f90 b/base/tools/psb_c_map.f90 index 83a54d32..6324b944 100644 --- a/base/tools/psb_c_map.f90 +++ b/base/tools/psb_c_map.f90 @@ -159,7 +159,7 @@ subroutine psb_c_map_U2V_v(alpha,x,beta,y,map,info,work,vtx,vty) end if if (info == psb_success_) call psb_geaxpby(alpha,pty,beta,y,map%p_desc_V,info) if (info /= psb_success_) then - write(psb_err_unit,*) iam,' ',trim(name),' Error from inner routines',info + write(psb_err_unit,*) trim(name),' Error from inner routines',info info = -1 else if (.not.present(vty)) call yt%free(info) @@ -173,6 +173,7 @@ subroutine psb_c_map_U2V_v(alpha,x,beta,y,map,info,work,vtx,vty) nc1 = map%desc_U%get_local_cols() nr2 = map%desc_V%get_global_rows() nc2 = map%desc_V%get_local_cols() + if (present(vtx).and.present(vty)) then ptx => vtx pty => vty @@ -193,7 +194,7 @@ subroutine psb_c_map_U2V_v(alpha,x,beta,y,map,info,work,vtx,vty) end if if (info == psb_success_) call psb_geaxpby(alpha,pty,beta,y,map%desc_V,info) if (info /= psb_success_) then - write(psb_err_unit,*) iam,' ',trim(name),' Error from inner routines',info + write(psb_err_unit,*) trim(name),' Error from inner routines',info info = -1 else if (.not.(present(vtx).and.present(vty) )) then diff --git a/base/tools/psb_d_map.f90 b/base/tools/psb_d_map.f90 index 51672121..3046482e 100644 --- a/base/tools/psb_d_map.f90 +++ b/base/tools/psb_d_map.f90 @@ -159,7 +159,7 @@ subroutine psb_d_map_U2V_v(alpha,x,beta,y,map,info,work,vtx,vty) end if if (info == psb_success_) call psb_geaxpby(alpha,pty,beta,y,map%p_desc_V,info) if (info /= psb_success_) then - write(psb_err_unit,*) iam,' ',trim(name),' Error from inner routines',info + write(psb_err_unit,*) trim(name),' Error from inner routines',info info = -1 else if (.not.present(vty)) call yt%free(info) @@ -173,6 +173,7 @@ subroutine psb_d_map_U2V_v(alpha,x,beta,y,map,info,work,vtx,vty) nc1 = map%desc_U%get_local_cols() nr2 = map%desc_V%get_global_rows() nc2 = map%desc_V%get_local_cols() + if (present(vtx).and.present(vty)) then ptx => vtx pty => vty @@ -193,7 +194,7 @@ subroutine psb_d_map_U2V_v(alpha,x,beta,y,map,info,work,vtx,vty) end if if (info == psb_success_) call psb_geaxpby(alpha,pty,beta,y,map%desc_V,info) if (info /= psb_success_) then - write(psb_err_unit,*) iam,' ',trim(name),' Error from inner routines',info + write(psb_err_unit,*) trim(name),' Error from inner routines',info info = -1 else if (.not.(present(vtx).and.present(vty) )) then diff --git a/base/tools/psb_s_map.f90 b/base/tools/psb_s_map.f90 index 6fa9b7b7..1d10b879 100644 --- a/base/tools/psb_s_map.f90 +++ b/base/tools/psb_s_map.f90 @@ -159,7 +159,7 @@ subroutine psb_s_map_U2V_v(alpha,x,beta,y,map,info,work,vtx,vty) end if if (info == psb_success_) call psb_geaxpby(alpha,pty,beta,y,map%p_desc_V,info) if (info /= psb_success_) then - write(psb_err_unit,*) iam,' ',trim(name),' Error from inner routines',info + write(psb_err_unit,*) trim(name),' Error from inner routines',info info = -1 else if (.not.present(vty)) call yt%free(info) @@ -173,6 +173,7 @@ subroutine psb_s_map_U2V_v(alpha,x,beta,y,map,info,work,vtx,vty) nc1 = map%desc_U%get_local_cols() nr2 = map%desc_V%get_global_rows() nc2 = map%desc_V%get_local_cols() + if (present(vtx).and.present(vty)) then ptx => vtx pty => vty @@ -193,7 +194,7 @@ subroutine psb_s_map_U2V_v(alpha,x,beta,y,map,info,work,vtx,vty) end if if (info == psb_success_) call psb_geaxpby(alpha,pty,beta,y,map%desc_V,info) if (info /= psb_success_) then - write(psb_err_unit,*) iam,' ',trim(name),' Error from inner routines',info + write(psb_err_unit,*) trim(name),' Error from inner routines',info info = -1 else if (.not.(present(vtx).and.present(vty) )) then diff --git a/base/tools/psb_z_map.f90 b/base/tools/psb_z_map.f90 index 86858c60..6b07401f 100644 --- a/base/tools/psb_z_map.f90 +++ b/base/tools/psb_z_map.f90 @@ -159,7 +159,7 @@ subroutine psb_z_map_U2V_v(alpha,x,beta,y,map,info,work,vtx,vty) end if if (info == psb_success_) call psb_geaxpby(alpha,pty,beta,y,map%p_desc_V,info) if (info /= psb_success_) then - write(psb_err_unit,*) iam,' ',trim(name),' Error from inner routines',info + write(psb_err_unit,*) trim(name),' Error from inner routines',info info = -1 else if (.not.present(vty)) call yt%free(info) @@ -173,6 +173,7 @@ subroutine psb_z_map_U2V_v(alpha,x,beta,y,map,info,work,vtx,vty) nc1 = map%desc_U%get_local_cols() nr2 = map%desc_V%get_global_rows() nc2 = map%desc_V%get_local_cols() + if (present(vtx).and.present(vty)) then ptx => vtx pty => vty @@ -193,7 +194,7 @@ subroutine psb_z_map_U2V_v(alpha,x,beta,y,map,info,work,vtx,vty) end if if (info == psb_success_) call psb_geaxpby(alpha,pty,beta,y,map%desc_V,info) if (info /= psb_success_) then - write(psb_err_unit,*) iam,' ',trim(name),' Error from inner routines',info + write(psb_err_unit,*) trim(name),' Error from inner routines',info info = -1 else if (.not.(present(vtx).and.present(vty) )) then From 167f4157bbbb383f3e28b2620d7465ddba5bf8cf Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Fri, 29 May 2020 16:54:19 +0200 Subject: [PATCH 2/8] Improve graph_fnd_owner --- base/internals/psi_graph_fnd_owner.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/internals/psi_graph_fnd_owner.F90 b/base/internals/psi_graph_fnd_owner.F90 index b951945e..d31a8f2f 100644 --- a/base/internals/psi_graph_fnd_owner.F90 +++ b/base/internals/psi_graph_fnd_owner.F90 @@ -216,7 +216,7 @@ subroutine psi_graph_fnd_owner(idx,iprc,idxmap,info) ! sample query is then sent to all processes ! ! if (trace.and.(me == 0)) write(0,*) 'Looping in graph_fnd_owner: ', nreqst_max - nsampl_in = psb_cd_get_samplesize() + nsampl_in = nreqst nsampl_in = min(max(1,(maxspace+np-1)/np),nsampl_in) ! ! Choose a sample, should it be done in this simplistic way? From 56189f39fdf8a76c97d7ec7f56929b9a558c1c8a Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Sat, 30 May 2020 20:01:26 +0200 Subject: [PATCH 3/8] Start reworking of handling of dependency lists. --- base/internals/psi_bld_glb_dep_list.F90 | 96 +++++++++++++++++++++++++ base/internals/psi_crea_index.f90 | 81 +++++++++++++-------- base/internals/psi_sort_dl.f90 | 14 ++++ base/modules/psi_i_mod.F90 | 17 ++++- 4 files changed, 177 insertions(+), 31 deletions(-) diff --git a/base/internals/psi_bld_glb_dep_list.F90 b/base/internals/psi_bld_glb_dep_list.F90 index 08311f2b..fb4f9f32 100644 --- a/base/internals/psi_bld_glb_dep_list.F90 +++ b/base/internals/psi_bld_glb_dep_list.F90 @@ -117,3 +117,99 @@ subroutine psi_i_bld_glb_dep_list(ictxt,loc_dl,length_dl,dep_list,dl_lda,info) return end subroutine psi_i_bld_glb_dep_list + +subroutine psi_i_bld_glb_csr_dep_list(ictxt,loc_dl,length_dl,c_dep_list,dl_ptr,info) + use psi_mod, psb_protect_name => psi_i_bld_glb_csr_dep_list +#ifdef MPI_MOD + use mpi +#endif + use psb_penv_mod + use psb_const_mod + use psb_error_mod + use psb_desc_mod + use psb_sort_mod + implicit none +#ifdef MPI_H + include 'mpif.h' +#endif + ! ....scalar parameters... + integer(psb_ipk_), intent(in) :: ictxt + integer(psb_ipk_), intent(in) :: loc_dl(:), length_dl(0:) + integer(psb_ipk_), allocatable, intent(out) :: c_dep_list(:), dl_ptr(:) + integer(psb_ipk_), intent(out) :: info + + + ! .....local arrays.... + integer(psb_ipk_) :: int_err(5) + + ! .....local scalars... + integer(psb_ipk_) :: i, proc,j,err_act, length, myld + integer(psb_ipk_) :: err + integer(psb_ipk_) :: debug_level, debug_unit + integer(psb_mpk_) :: iictxt, icomm, me, np, minfo + logical, parameter :: dist_symm_list=.false., print_dl=.false. + character name*20 + name='psi_bld_glb_csr_dep_list' + + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + iictxt = ictxt + info = psb_success_ + + call psb_info(iictxt,me,np) + + myld = length_dl(me) + length = sum(length_dl(0:np-1)) + allocate(dl_ptr(0:np),stat=info) + dl_ptr(0) = 0 + do i=1, np + dl_ptr(i) = dl_ptr(i-1) + length_dl(i-1) + end do + + if (length /= dl_ptr(np)) then + write(0,*) me,trim(name),' Inconsistency: ',length,dl_ptr(np) + end if + if (debug_level >= psb_debug_inner_) & + & write(debug_unit,*) me,' ',trim(name),': Dep_list length ',length_dl(me) + + allocate(c_dep_list(length),stat=info) + + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate') + goto 9999 + end if + icomm = psb_get_mpi_comm(iictxt) + call mpi_allgather(loc_dl,myld,psb_mpi_ipk_,& + & c_dep_list,length_dl,dl_ptr,psb_mpi_ipk_,icomm,minfo) + + info = minfo + if (info /= psb_success_) then + info=psb_err_internal_error_ + goto 9999 + endif + dl_ptr = dl_ptr + 1 + if (print_dl) then + if (me == 0) then + write(0,*) ' Dep_list ' + do i=0,np-1 + write(0,*) 'Proc ',i,':',c_dep_list(dl_ptr(i):dl_ptr(i+1)-1) + end do + flush(0) + end if + call psb_barrier(ictxt) + end if + + call psb_erractionrestore(err_act) + return + + +9999 continue + + call psb_errpush(info,name,i_err=int_err) + call psb_error_handler(err_act) + + return + +end subroutine psi_i_bld_glb_csr_dep_list diff --git a/base/internals/psi_crea_index.f90 b/base/internals/psi_crea_index.f90 index f9df4014..eb22893e 100644 --- a/base/internals/psi_crea_index.f90 +++ b/base/internals/psi_crea_index.f90 @@ -66,7 +66,7 @@ subroutine psi_i_crea_index(desc_a,index_in,index_out,nxch,nsnd,nrcv,info) ! ....local scalars... integer(psb_ipk_) :: ictxt, me, np, mode, err_act, dl_lda, ldl ! ...parameters... - integer(psb_ipk_), allocatable :: dep_list(:,:), length_dl(:), loc_dl(:) + integer(psb_ipk_), allocatable :: dep_list(:,:), length_dl(:), loc_dl(:), c_dep_list(:), dl_ptr(:) integer(psb_ipk_) :: dlmax, dlavg integer(psb_ipk_),parameter :: root=psb_root_,no_comm=-1 integer(psb_ipk_) :: debug_level, debug_unit @@ -154,35 +154,56 @@ subroutine psi_i_crea_index(desc_a,index_in,index_out,nxch,nsnd,nrcv,info) !!$ & ' avg:',dlavg, choose_sorting(dlmax,dlavg,np) if (choose_sorting(dlmax,dlavg,np)) then - - call psi_bld_glb_dep_list(ictxt,& - & loc_dl,length_dl,dep_list,dl_lda,info) - - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='extrct_dl') - goto 9999 - end if - - if (debug_level >= psb_debug_inner_) & - & write(debug_unit,*) me,' ',trim(name),': from extract_dep_list',& - & me,length_dl(0),index_in(1), ':',dep_list(:length_dl(me),me) - ! ...now process root contains dependence list of all processes... - if (debug_level >= psb_debug_inner_) & - & write(debug_unit,*) me,' ',trim(name),': root sorting dep list' - if (do_timings) call psb_toc(idx_phase1) - if (do_timings) call psb_tic(idx_phase2) - - call psi_dl_check(dep_list,dl_lda,np,length_dl) - - ! ....now i can sort dependency lists. - call psi_sort_dl(dep_list,length_dl,np,info) - if(info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='psi_sort_dl') - goto 9999 + if (.true.) then + call psi_bld_glb_dep_list(ictxt,& + & loc_dl,length_dl,dep_list,dl_lda,info) + + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='extrct_dl') + goto 9999 + end if + + if (debug_level >= psb_debug_inner_) & + & write(debug_unit,*) me,' ',trim(name),': from extract_dep_list',& + & me,length_dl(0),index_in(1), ':',dep_list(:length_dl(me),me) + ! ...now process root contains dependence list of all processes... + if (debug_level >= psb_debug_inner_) & + & write(debug_unit,*) me,' ',trim(name),': root sorting dep list' + if (do_timings) call psb_toc(idx_phase1) + if (do_timings) call psb_tic(idx_phase2) + + ! + ! The dependency list has been symmetrized inside xtract_loc_dl + ! +!!$ call psi_dl_check(dep_list,dl_lda,np,length_dl) + + ! ....now i can sort dependency lists. + call psi_sort_dl(dep_list,length_dl,np,info) + if(info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='psi_sort_dl') + goto 9999 + end if + if (do_timings) call psb_toc(idx_phase2) + ldl = length_dl(me) + loc_dl = dep_list(1:ldl,me) + else + if (do_timings) call psb_toc(idx_phase1) + if (do_timings) call psb_tic(idx_phase2) + call psi_bld_glb_dep_list(ictxt,& + & loc_dl,length_dl,c_dep_list,dl_ptr,info) + +!!$ call psi_dl_check(dep_list,dl_lda,np,length_dl) +!!$ +!!$ ! ....now i can sort dependency lists. + call psi_sort_dl(dl_ptr,c_dep_list,length_dl,np,info) +!!$ if(info /= psb_success_) then +!!$ call psb_errpush(psb_err_from_subroutine_,name,a_err='psi_sort_dl') +!!$ goto 9999 +!!$ end if + if (do_timings) call psb_toc(idx_phase2) + + end if - if (do_timings) call psb_toc(idx_phase2) - ldl = length_dl(me) - loc_dl = dep_list(1:ldl,me) else ! Do nothing ldl = length_dl(me) @@ -229,5 +250,5 @@ contains val = .not.(((dlmax>(26*4)).or.((dlavg>=(26*2)).and.(np>=128)))) end function choose_sorting - + end subroutine psi_i_crea_index diff --git a/base/internals/psi_sort_dl.f90 b/base/internals/psi_sort_dl.f90 index 4c2b78db..60212d30 100644 --- a/base/internals/psi_sort_dl.f90 +++ b/base/internals/psi_sort_dl.f90 @@ -89,5 +89,19 @@ subroutine psi_i_sort_dl(dep_list,l_dep_list,np,info) end subroutine psi_i_sort_dl +subroutine psi_i_csr_sort_dl(dl_ptr,c_dep_list,l_dep_list,np,info) + use psi_mod, psb_protect_name => psi_i_csr_sort_dl + use psb_const_mod + use psb_error_mod + implicit none + + integer(psb_ipk_), intent(in) :: c_dep_list(:), dl_ptr(0:) + integer(psb_ipk_), intent(inout) :: l_dep_list(0:) + integer(psb_ipk_), intent(in) :: np + integer(psb_ipk_), intent(out) :: info + ! Local variables + + +end subroutine psi_i_csr_sort_dl diff --git a/base/modules/psi_i_mod.F90 b/base/modules/psi_i_mod.F90 index 41bc92f6..7a247f8d 100644 --- a/base/modules/psi_i_mod.F90 +++ b/base/modules/psi_i_mod.F90 @@ -111,6 +111,14 @@ module psi_i_mod integer(psb_ipk_) :: np integer(psb_ipk_) :: info end subroutine psi_i_sort_dl + subroutine psi_i_csr_sort_dl(dl_ptr,c_dep_list,l_dep_list,np,info) + import + implicit none + integer(psb_ipk_), intent(in) :: c_dep_list(:), dl_ptr(0:) + integer(psb_ipk_), intent(inout) :: l_dep_list(0:) + integer(psb_ipk_), intent(in) :: np + integer(psb_ipk_), intent(out) :: info + end subroutine psi_i_csr_sort_dl end interface interface psi_extract_dep_list @@ -136,7 +144,14 @@ module psi_i_mod integer(psb_ipk_), allocatable, intent(out) :: dep_list(:,:) integer(psb_ipk_), intent(out) :: info end subroutine psi_i_bld_glb_dep_list - end interface + subroutine psi_i_bld_glb_csr_dep_list(ictxt,loc_dl,length_dl,c_dep_list,dl_ptr,info) + import + integer(psb_ipk_), intent(in) :: ictxt + integer(psb_ipk_), intent(in) :: loc_dl(:), length_dl(0:) + integer(psb_ipk_), allocatable, intent(out) :: c_dep_list(:), dl_ptr(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psi_i_bld_glb_csr_dep_list + end interface psi_bld_glb_dep_list interface psi_extract_loc_dl subroutine psi_i_xtr_loc_dl(ictxt,is_bld,is_upd,desc_str,loc_dl,length_dl,info) From 781f0ef08330bcdb1064972609efc280a099e723 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Mon, 1 Jun 2020 10:22:09 +0200 Subject: [PATCH 4/8] New version of sort_dep_list. --- base/internals/psi_bld_glb_dep_list.F90 | 2 +- base/internals/psi_crea_index.f90 | 9 +- base/internals/psi_sort_dl.f90 | 183 +++++++++++++++++++++++- base/internals/psi_srtlist.f90 | 1 - 4 files changed, 186 insertions(+), 9 deletions(-) diff --git a/base/internals/psi_bld_glb_dep_list.F90 b/base/internals/psi_bld_glb_dep_list.F90 index fb4f9f32..bf28e49b 100644 --- a/base/internals/psi_bld_glb_dep_list.F90 +++ b/base/internals/psi_bld_glb_dep_list.F90 @@ -181,7 +181,7 @@ subroutine psi_i_bld_glb_csr_dep_list(ictxt,loc_dl,length_dl,c_dep_list,dl_ptr,i goto 9999 end if icomm = psb_get_mpi_comm(iictxt) - call mpi_allgather(loc_dl,myld,psb_mpi_ipk_,& + call mpi_allgatherv(loc_dl,myld,psb_mpi_ipk_,& & c_dep_list,length_dl,dl_ptr,psb_mpi_ipk_,icomm,minfo) info = minfo diff --git a/base/internals/psi_crea_index.f90 b/base/internals/psi_crea_index.f90 index eb22893e..68c4615b 100644 --- a/base/internals/psi_crea_index.f90 +++ b/base/internals/psi_crea_index.f90 @@ -154,8 +154,8 @@ subroutine psi_i_crea_index(desc_a,index_in,index_out,nxch,nsnd,nrcv,info) !!$ & ' avg:',dlavg, choose_sorting(dlmax,dlavg,np) if (choose_sorting(dlmax,dlavg,np)) then - if (.true.) then - call psi_bld_glb_dep_list(ictxt,& + if (.false.) then + call psi_bld_glb_dep_list(ictxt,& & loc_dl,length_dl,dep_list,dl_lda,info) if (info /= psb_success_) then @@ -196,6 +196,9 @@ subroutine psi_i_crea_index(desc_a,index_in,index_out,nxch,nsnd,nrcv,info) !!$ !!$ ! ....now i can sort dependency lists. call psi_sort_dl(dl_ptr,c_dep_list,length_dl,np,info) + ldl = length_dl(me) + loc_dl = c_dep_list(dl_ptr(me):dl_ptr(me)+ldl-1) + !!$ if(info /= psb_success_) then !!$ call psb_errpush(psb_err_from_subroutine_,name,a_err='psi_sort_dl') !!$ goto 9999 @@ -215,6 +218,7 @@ subroutine psi_i_crea_index(desc_a,index_in,index_out,nxch,nsnd,nrcv,info) if (do_timings) call psb_tic(idx_phase3) if(debug_level >= psb_debug_inner_)& & write(debug_unit,*) me,' ',trim(name),': calling psi_desc_index',ldl,':',loc_dl(1:ldl) + ! Do the actual format conversion. call psi_desc_index(desc_a,index_in,loc_dl,ldl,nsnd,nrcv,index_out,info) if(debug_level >= psb_debug_inner_) & @@ -249,6 +253,7 @@ contains logical :: val val = .not.(((dlmax>(26*4)).or.((dlavg>=(26*2)).and.(np>=128)))) + val = .true. end function choose_sorting end subroutine psi_i_crea_index diff --git a/base/internals/psi_sort_dl.f90 b/base/internals/psi_sort_dl.f90 index 60212d30..29a55b49 100644 --- a/base/internals/psi_sort_dl.f90 +++ b/base/internals/psi_sort_dl.f90 @@ -69,7 +69,6 @@ subroutine psi_i_sort_dl(dep_list,l_dep_list,np,info) isz = iich + ndgmx if (debug_level >= psb_debug_inner_)& & write(debug_unit,*) name,': ndgmx ',ndgmx,isz - allocate(work(isz)) ! call srtlist(dep_list, dl_lda, l_dep_list, np, info) call srtlist(dep_list,size(dep_list,1,kind=psb_ipk_),l_dep_list,np,work(idg),& @@ -89,19 +88,193 @@ subroutine psi_i_sort_dl(dep_list,l_dep_list,np,info) end subroutine psi_i_sort_dl +!********************************************************************** +! * +! The communication step among processors at each * +! matrix-vector product is a variable all-to-all * +! collective communication that we reimplement * +! in terms of point-to-point communications. * +! The data in input is a list of dependencies: * +! for each node a list of all the nodes it has to * +! communicate with. The lists are guaranteed to be * +! symmetric, i.e. for each pair (I,J) there is a * +! pair (J,I). The idea is to organize the ordering * +! so that at each communication step as many * +! processors as possible are communicating at the * +! same time, i.e. a step is defined by the fact * +! that all edges (I,J) in it have no common node. * +! * +! Formulation of the problem is: * +! Given an undirected graph (forest): * +! Find the shortest series of steps to cancel all * +! graph edges, where at each step all edges belonging * +! to a matching in the graph are canceled. * +! * +! An obvious lower bound to the optimum number of steps * +! is the largest degree of any node in the graph. * +! * +! The algorithm proceeds as follows: * +! 1. Build a list of all edges, e.g. copy the * +! dependencies lists keeping only (I,J) with I psi_i_csr_sort_dl use psb_const_mod use psb_error_mod + use psb_sort_mod implicit none - integer(psb_ipk_), intent(in) :: c_dep_list(:), dl_ptr(0:) - integer(psb_ipk_), intent(inout) :: l_dep_list(0:) + integer(psb_ipk_), intent(inout) :: c_dep_list(:), dl_ptr(0:), l_dep_list(0:) integer(psb_ipk_), intent(in) :: np integer(psb_ipk_), intent(out) :: info ! Local variables + integer(psb_ipk_), allocatable :: dg(:), dgp(:),& + & idx(:), upd(:), edges(:,:), ich(:) + integer(psb_ipk_) :: i, j, nedges, ip1, ip2, nch, ip, iedge,& + & i1, ix, ist, iswap(2) + nedges = size(c_dep_list) -end subroutine psi_i_csr_sort_dl + allocate(dg(0:np-1),dgp(nedges),edges(2,nedges),upd(0:np-1),& + & idx(nedges),ich(nedges),stat = info) + + if (info /= 0) then + info = -9 + return + end if + ! + ! 1. Compute an auxiliary vector with the degree of + ! each node of the graph. + dg(0:np-1) = l_dep_list(0:np-1) + ! + ! 2. Build a list of all edges, e.g. copy the + ! dependencies lists keeping only (I,J) with I Date: Tue, 2 Jun 2020 17:52:44 +0200 Subject: [PATCH 5/8] Fix sorting of dep_list: store in CSR-like, handle self-loops --- base/internals/psi_crea_index.f90 | 9 ++++++-- base/internals/psi_sort_dl.f90 | 36 ++++++++++++++++++++++--------- base/internals/psi_srtlist.f90 | 3 ++- base/modules/psi_i_mod.F90 | 14 ++++++------ 4 files changed, 42 insertions(+), 20 deletions(-) diff --git a/base/internals/psi_crea_index.f90 b/base/internals/psi_crea_index.f90 index 68c4615b..2c340805 100644 --- a/base/internals/psi_crea_index.f90 +++ b/base/internals/psi_crea_index.f90 @@ -191,11 +191,16 @@ subroutine psi_i_crea_index(desc_a,index_in,index_out,nxch,nsnd,nrcv,info) if (do_timings) call psb_tic(idx_phase2) call psi_bld_glb_dep_list(ictxt,& & loc_dl,length_dl,c_dep_list,dl_ptr,info) - + if (info /= 0) then + write(0,*) me,trim(name),' From bld_glb_list ',info + end if !!$ call psi_dl_check(dep_list,dl_lda,np,length_dl) !!$ !!$ ! ....now i can sort dependency lists. call psi_sort_dl(dl_ptr,c_dep_list,length_dl,np,info) + if (info /= 0) then + write(0,*) me,trim(name),' From sort_dl ',info + end if ldl = length_dl(me) loc_dl = c_dep_list(dl_ptr(me):dl_ptr(me)+ldl-1) @@ -253,7 +258,7 @@ contains logical :: val val = .not.(((dlmax>(26*4)).or.((dlavg>=(26*2)).and.(np>=128)))) - val = .true. + !val = .true. end function choose_sorting end subroutine psi_i_crea_index diff --git a/base/internals/psi_sort_dl.f90 b/base/internals/psi_sort_dl.f90 index 29a55b49..0e7c276e 100644 --- a/base/internals/psi_sort_dl.f90 +++ b/base/internals/psi_sort_dl.f90 @@ -136,7 +136,7 @@ end subroutine psi_i_sort_dl ! node in the dependency list for the current one * ! * !********************************************************************** -subroutine psi_i_csr_sort_dl(dl_ptr,c_dep_list,l_dep_list,np,info) +subroutine psi_i_csr_sort_dl(dl_ptr,c_dep_list,l_dep_list,ictxt,info) use psi_mod, psb_protect_name => psi_i_csr_sort_dl use psb_const_mod use psb_error_mod @@ -144,14 +144,18 @@ subroutine psi_i_csr_sort_dl(dl_ptr,c_dep_list,l_dep_list,np,info) implicit none integer(psb_ipk_), intent(inout) :: c_dep_list(:), dl_ptr(0:), l_dep_list(0:) - integer(psb_ipk_), intent(in) :: np + integer(psb_ipk_), intent(in) :: ictxt integer(psb_ipk_), intent(out) :: info ! Local variables integer(psb_ipk_), allocatable :: dg(:), dgp(:),& & idx(:), upd(:), edges(:,:), ich(:) integer(psb_ipk_) :: i, j, nedges, ip1, ip2, nch, ip, iedge,& & i1, ix, ist, iswap(2) + logical :: internal_error + integer(psb_ipk_) :: me, np + info = 0 + call psb_info(ictxt,me,np) nedges = size(c_dep_list) allocate(dg(0:np-1),dgp(nedges),edges(2,nedges),upd(0:np-1),& @@ -173,7 +177,7 @@ subroutine psi_i_csr_sort_dl(dl_ptr,c_dep_list,l_dep_list,np,info) do i = 0, np-1 do j = dl_ptr(i),dl_ptr(i+1) - 1 ip = c_dep_list(j) - if (i= i) then iedge = iedge + 1 edges(1,iedge) = i edges(2,iedge) = ip @@ -187,6 +187,7 @@ subroutine srtlist(dep_list,dl_lda,ldl,np,dg,dgp,upd, edges,idx,ich,info) i = edges(2,j) dg(i) = dg(i)+1 dep_list(dg(i),i) = edges(1,j)-1 + if (edges(1,j) == edges(2,j)) dg(i) = dg(i) -1 enddo do i=1, np if (dg(i).ne.ldl(i)) then diff --git a/base/modules/psi_i_mod.F90 b/base/modules/psi_i_mod.F90 index 7a247f8d..35ff1316 100644 --- a/base/modules/psi_i_mod.F90 +++ b/base/modules/psi_i_mod.F90 @@ -111,13 +111,13 @@ module psi_i_mod integer(psb_ipk_) :: np integer(psb_ipk_) :: info end subroutine psi_i_sort_dl - subroutine psi_i_csr_sort_dl(dl_ptr,c_dep_list,l_dep_list,np,info) + subroutine psi_i_csr_sort_dl(dl_ptr,c_dep_list,l_dep_list,ictxt,info) import implicit none - integer(psb_ipk_), intent(in) :: c_dep_list(:), dl_ptr(0:) + integer(psb_ipk_), intent(in) :: c_dep_list(:), dl_ptr(0:) integer(psb_ipk_), intent(inout) :: l_dep_list(0:) - integer(psb_ipk_), intent(in) :: np - integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: ictxt + integer(psb_ipk_) :: info end subroutine psi_i_csr_sort_dl end interface @@ -145,13 +145,13 @@ module psi_i_mod integer(psb_ipk_), intent(out) :: info end subroutine psi_i_bld_glb_dep_list subroutine psi_i_bld_glb_csr_dep_list(ictxt,loc_dl,length_dl,c_dep_list,dl_ptr,info) - import + import integer(psb_ipk_), intent(in) :: ictxt integer(psb_ipk_), intent(in) :: loc_dl(:), length_dl(0:) - integer(psb_ipk_), allocatable, intent(out) :: c_dep_list(:), dl_ptr(:) + integer(psb_ipk_), allocatable, intent(out) :: c_dep_list(:), dl_ptr(:) integer(psb_ipk_), intent(out) :: info end subroutine psi_i_bld_glb_csr_dep_list - end interface psi_bld_glb_dep_list + end interface interface psi_extract_loc_dl subroutine psi_i_xtr_loc_dl(ictxt,is_bld,is_upd,desc_str,loc_dl,length_dl,info) From ceb79d8fd850cd0949b276e36efce88f9a7a76c3 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Tue, 2 Jun 2020 18:09:15 +0200 Subject: [PATCH 6/8] Fix bad argument passing for sort_dl --- base/internals/psi_crea_index.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/internals/psi_crea_index.f90 b/base/internals/psi_crea_index.f90 index 2c340805..0f14c4aa 100644 --- a/base/internals/psi_crea_index.f90 +++ b/base/internals/psi_crea_index.f90 @@ -197,7 +197,7 @@ subroutine psi_i_crea_index(desc_a,index_in,index_out,nxch,nsnd,nrcv,info) !!$ call psi_dl_check(dep_list,dl_lda,np,length_dl) !!$ !!$ ! ....now i can sort dependency lists. - call psi_sort_dl(dl_ptr,c_dep_list,length_dl,np,info) + call psi_sort_dl(dl_ptr,c_dep_list,length_dl,ictxt,info) if (info /= 0) then write(0,*) me,trim(name),' From sort_dl ',info end if From 6b2abed8bb3d6d3816c6bbe51f72749f52bbffe8 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Wed, 3 Jun 2020 11:13:36 +0200 Subject: [PATCH 7/8] Reworked dep_list sorting. Take out obsolete functions --- base/internals/Makefile | 6 +- base/internals/psi_crea_index.f90 | 123 ++++++------------------- base/internals/psi_dl_check.f90 | 95 ------------------- base/internals/psi_graph_fnd_owner.F90 | 54 +++++------ base/internals/psi_list_search.f90 | 58 ------------ base/internals/psi_sort_dl.f90 | 59 ------------ base/modules/psi_i_mod.F90 | 16 ---- 7 files changed, 57 insertions(+), 354 deletions(-) delete mode 100644 base/internals/psi_dl_check.f90 delete mode 100644 base/internals/psi_list_search.f90 diff --git a/base/internals/Makefile b/base/internals/Makefile index cf499f2d..cbb282a9 100644 --- a/base/internals/Makefile +++ b/base/internals/Makefile @@ -1,12 +1,12 @@ include ../../Make.inc FOBJS = psi_compute_size.o psi_crea_bnd_elem.o psi_crea_index.o \ - psi_crea_ovr_elem.o psi_bld_tmpovrl.o psi_dl_check.o \ + psi_crea_ovr_elem.o psi_bld_tmpovrl.o \ psi_bld_tmphalo.o psi_sort_dl.o \ psi_indx_map_fnd_owner.o \ - psi_desc_impl.o psi_hash_impl.o psi_list_search.o psi_srtlist.o \ + psi_desc_impl.o psi_hash_impl.o psi_srtlist.o \ psi_bld_glb_dep_list.o psi_xtr_loc_dl.o - +#psi_list_search.o psi_dl_check.o MPFOBJS = psi_desc_index.o psi_extrct_dl.o psi_fnd_owner.o psi_a2a_fnd_owner.o \ psi_graph_fnd_owner.o psi_adjcncy_fnd_owner.o psi_symm_dep_list.o diff --git a/base/internals/psi_crea_index.f90 b/base/internals/psi_crea_index.f90 index 0f14c4aa..2b0a8321 100644 --- a/base/internals/psi_crea_index.f90 +++ b/base/internals/psi_crea_index.f90 @@ -108,117 +108,48 @@ subroutine psi_i_crea_index(desc_a,index_in,index_out,nxch,nsnd,nrcv,info) if (debug_level >= psb_debug_inner_) & & write(debug_unit,*) me,' ',trim(name),': calling extract_dep_list' mode = 1 - if (.false.) then - if (do_timings) call psb_tic(idx_phase1) + if (do_timings) call psb_tic(idx_phase1) - call psi_extract_dep_list(ictxt,& - & desc_a%is_bld(), desc_a%is_upd(),& - & index_in, dep_list,length_dl,dl_lda,mode,info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='extrct_dl') - goto 9999 - end if - - if (debug_level >= psb_debug_inner_) & - & write(debug_unit,*) me,' ',trim(name),': from extract_dep_list',& - & me,length_dl(0),index_in(1), ':',dep_list(:length_dl(me),me) - ! ...now process root contains dependence list of all processes... - if (debug_level >= psb_debug_inner_) & - & write(debug_unit,*) me,' ',trim(name),': root sorting dep list' - if (do_timings) call psb_toc(idx_phase1) - if (do_timings) call psb_tic(idx_phase2) - - call psi_dl_check(dep_list,dl_lda,np,length_dl) - - ! ....now i can sort dependency lists. - call psi_sort_dl(dep_list,length_dl,np,info) - if(info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='psi_sort_dl') - goto 9999 - end if - if (do_timings) call psb_toc(idx_phase2) - ldl = length_dl(me) - loc_dl = dep_list(1:ldl,me) - - else - - if (do_timings) call psb_tic(idx_phase1) - - call psi_extract_loc_dl(ictxt,& - & desc_a%is_bld(), desc_a%is_upd(),& - & index_in, loc_dl,length_dl,info) + call psi_extract_loc_dl(ictxt,& + & desc_a%is_bld(), desc_a%is_upd(),& + & index_in, loc_dl,length_dl,info) - dlmax = maxval(length_dl(:)) - dlavg = (sum(length_dl(:))+np-1)/np + dlmax = maxval(length_dl(:)) + dlavg = (sum(length_dl(:))+np-1)/np !!$ if ((dlmax>0).and.(me==0)) write(0,*) 'Dependency list : max:',dlmax,& !!$ & ' avg:',dlavg, choose_sorting(dlmax,dlavg,np) - if (choose_sorting(dlmax,dlavg,np)) then - if (.false.) then - call psi_bld_glb_dep_list(ictxt,& - & loc_dl,length_dl,dep_list,dl_lda,info) + if (do_timings) call psb_toc(idx_phase1) + if (do_timings) call psb_tic(idx_phase2) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='extrct_dl') - goto 9999 - end if - - if (debug_level >= psb_debug_inner_) & - & write(debug_unit,*) me,' ',trim(name),': from extract_dep_list',& - & me,length_dl(0),index_in(1), ':',dep_list(:length_dl(me),me) - ! ...now process root contains dependence list of all processes... - if (debug_level >= psb_debug_inner_) & - & write(debug_unit,*) me,' ',trim(name),': root sorting dep list' - if (do_timings) call psb_toc(idx_phase1) - if (do_timings) call psb_tic(idx_phase2) - - ! - ! The dependency list has been symmetrized inside xtract_loc_dl - ! -!!$ call psi_dl_check(dep_list,dl_lda,np,length_dl) - - ! ....now i can sort dependency lists. - call psi_sort_dl(dep_list,length_dl,np,info) - if(info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='psi_sort_dl') - goto 9999 - end if - if (do_timings) call psb_toc(idx_phase2) - ldl = length_dl(me) - loc_dl = dep_list(1:ldl,me) - else - if (do_timings) call psb_toc(idx_phase1) - if (do_timings) call psb_tic(idx_phase2) - call psi_bld_glb_dep_list(ictxt,& - & loc_dl,length_dl,c_dep_list,dl_ptr,info) - if (info /= 0) then - write(0,*) me,trim(name),' From bld_glb_list ',info - end if + if (choose_sorting(dlmax,dlavg,np)) then + call psi_bld_glb_dep_list(ictxt,& + & loc_dl,length_dl,c_dep_list,dl_ptr,info) + if (info /= 0) then + write(0,*) me,trim(name),' From bld_glb_list ',info + end if !!$ call psi_dl_check(dep_list,dl_lda,np,length_dl) !!$ !!$ ! ....now i can sort dependency lists. - call psi_sort_dl(dl_ptr,c_dep_list,length_dl,ictxt,info) - if (info /= 0) then - write(0,*) me,trim(name),' From sort_dl ',info - end if - ldl = length_dl(me) - loc_dl = c_dep_list(dl_ptr(me):dl_ptr(me)+ldl-1) - + call psi_sort_dl(dl_ptr,c_dep_list,length_dl,ictxt,info) + if (info /= 0) then + write(0,*) me,trim(name),' From sort_dl ',info + end if + ldl = length_dl(me) + loc_dl = c_dep_list(dl_ptr(me):dl_ptr(me)+ldl-1) + !!$ if(info /= psb_success_) then !!$ call psb_errpush(psb_err_from_subroutine_,name,a_err='psi_sort_dl') !!$ goto 9999 !!$ end if - if (do_timings) call psb_toc(idx_phase2) - - - end if - else - ! Do nothing - ldl = length_dl(me) - loc_dl = loc_dl(1:ldl) - end if + else + ! Do nothing + ldl = length_dl(me) + loc_dl = loc_dl(1:ldl) end if + if (do_timings) call psb_toc(idx_phase2) + if (do_timings) call psb_tic(idx_phase3) if(debug_level >= psb_debug_inner_)& diff --git a/base/internals/psi_dl_check.f90 b/base/internals/psi_dl_check.f90 deleted file mode 100644 index bf25976b..00000000 --- a/base/internals/psi_dl_check.f90 +++ /dev/null @@ -1,95 +0,0 @@ -! -! Parallel Sparse BLAS version 3.5 -! (C) Copyright 2006-2018 -! Salvatore Filippone -! Alfredo Buttari -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -! -! File: psi_dl_check.f90 -! -! Subroutine: psi_dl_check -! Make sure a dependency list is symmetric, i.e. if process i depends on j -! then process j should depend on i (even if the data to be sent in one of the -! directions happens to be empty) -! -! Arguments: -! dep_list(:,:) - integer Initial dependency lists -! dl_lda - integer Allocated size of dep_list -! np - integer Total number of processes. -! length_dl(:) - integer Items in dependency lists; updated on -! exit -! -subroutine psi_i_dl_check(dep_list,dl_lda,np,length_dl) - - use psi_mod, psb_protect_name => psi_i_dl_check - use psb_const_mod - use psb_desc_mod - implicit none - - integer(psb_ipk_) :: np,dl_lda,length_dl(0:np) - integer(psb_ipk_) :: dep_list(dl_lda,0:np) - ! locals - integer(psb_ipk_) :: proc, proc2, i, j - - - ! ...if j is in dep_list of process i - ! and i is not in dep_list of process j - ! fix it. - - do proc=0,np-1 - i=1 - outer: do - if (i >length_dl(proc)) exit outer - proc2=dep_list(i,proc) - if ((proc2 /= -1).and.(proc2 /= proc)) then - ! ...search proc in proc2's dep_list.... - j=1 - p2loop:do - if (j > length_dl(proc2)) exit p2loop - if (dep_list(j,proc2) == proc) exit p2loop - j=j+1 - enddo p2loop - - if (j > length_dl(proc2)) then - ! ...add proc to proc2 s dep_list.....',proc,proc2 - length_dl(proc2) = length_dl(proc2)+1 - if (length_dl(proc2) > size(dep_list,1)) then - write(psb_err_unit,*)'error in dl_check', proc2,proc,& - & length_dl(proc2),'>',size(dep_list,1) - endif - dep_list(length_dl(proc2),proc2) = proc - else if (dep_list(j,proc2) /= proc) then - write(psb_err_unit,*) 'PSI_DL_CHECK This should not happen!!! ',& - & j,proc2,dep_list(j,proc2),proc - endif - endif - i=i+1 - enddo outer - enddo - -end subroutine psi_i_dl_check diff --git a/base/internals/psi_graph_fnd_owner.F90 b/base/internals/psi_graph_fnd_owner.F90 index d31a8f2f..485c4806 100644 --- a/base/internals/psi_graph_fnd_owner.F90 +++ b/base/internals/psi_graph_fnd_owner.F90 @@ -47,7 +47,7 @@ ! This is the method to find out who owns a set of indices. ! In principle we could do the following: ! 1. Do an allgatherv of IDX -! 2. For each of the collected indices figure if current proces owns it +! 2. For each of the collected indices figure out if current proces owns it ! 3. Scatter the results ! 4. Loop through the answers ! This method is guaranteed to find the owner, unless an input index has @@ -101,8 +101,8 @@ subroutine psi_graph_fnd_owner(idx,iprc,idxmap,info) integer(psb_ipk_), allocatable :: tprc(:), tsmpl(:), ladj(:) integer(psb_mpk_) :: icomm, minfo, iictxt integer(psb_ipk_) :: i,n_row,n_col,err_act,ip,j,ipnt, nsampl_out,& - & nv, n_answers, nreqst, nsampl_in, locr_max, & - & nreqst_max, nadj, maxspace, mxnsin + & nv, n_answers, nqries, nsampl_in, locr_max, & + & nqries_max, nadj, maxspace, mxnsin integer(psb_lpk_) :: mglob, ih integer(psb_ipk_) :: ictxt,np,me, nresp integer(psb_ipk_), parameter :: nt=4 @@ -165,22 +165,22 @@ subroutine psi_graph_fnd_owner(idx,iprc,idxmap,info) ! This makes ladj allocated with size 0 if needed, as opposed to unallocated call psb_realloc(nadj,ladj,info) ! - ! Throughout the subroutine, nreqst is the number of local inquiries + ! Throughout the subroutine, nqries is the number of local inquiries ! that have not been answered yet ! - nreqst = nv - n_answers - nreqst_max = nreqst + nqries = nv - n_answers + nqries_max = nqries ! ! Choice of maxspace should be adjusted to account for a default ! "sensible" size and/or a user-specified value ! tmpv(1) = nadj - tmpv(2) = nreqst_max + tmpv(2) = nqries_max tmpv(3) = n_row tmpv(4) = psb_cd_get_maxspace() call psb_max(ictxt,tmpv) - nreqst_max = tmpv(2) + nqries_max = tmpv(2) locr_max = tmpv(3) maxspace = nt*locr_max if (tmpv(4) > 0) maxspace = min(maxspace,tmpv(4)) @@ -192,21 +192,21 @@ subroutine psi_graph_fnd_owner(idx,iprc,idxmap,info) ! Do a preliminary run on the user-defined adjacency lists ! if (trace.and.(me == 0)) write(0,*) ' Initial sweep on user-defined topology' - if (debugsz) write(0,*) me,' Initial sweep on user-defined topology',nreqst - nsampl_in = min(nreqst,max(1,(maxspace+max(1,nadj)-1))/(max(1,nadj))) + if (debugsz) write(0,*) me,' Initial sweep on user-defined topology',nqries + nsampl_in = min(nqries,max(1,(maxspace+max(1,nadj)-1))/(max(1,nadj))) call psi_adj_fnd_sweep(idx,iprc,ladj,idxmap,nsampl_in,n_answers) call idxmap%xtnd_p_adjcncy(ladj) - nreqst = nv - n_answers - nreqst_max = nreqst - call psb_max(ictxt,nreqst_max) - if (trace.and.(me == 0)) write(0,*) ' After initial sweep:',nreqst_max - if (debugsz) write(0,*) me,' After sweep on user-defined topology',nreqst_max + nqries = nv - n_answers + nqries_max = nqries + call psb_max(ictxt,nqries_max) + if (trace.and.(me == 0)) write(0,*) ' After initial sweep:',nqries_max + if (debugsz) write(0,*) me,' After sweep on user-defined topology',nqries_max end if if (do_timings) call psb_toc(idx_sweep0) - fnd_owner_loop: do while (nreqst_max>0) + fnd_owner_loop: do while (nqries_max>0) if (do_timings) call psb_tic(idx_loop_a2a) - if (debugsz) write(0,*) me,' fnd_owner_loop',nreqst_max + if (debugsz) write(0,*) me,' fnd_owner_loop',nqries_max ! ! The basic idea of this loop is to alternate between ! searching through all processes and searching @@ -215,8 +215,8 @@ subroutine psi_graph_fnd_owner(idx,iprc,idxmap,info) ! 1. Select a sample such that the total size is <= maxspace ! sample query is then sent to all processes ! - ! if (trace.and.(me == 0)) write(0,*) 'Looping in graph_fnd_owner: ', nreqst_max - nsampl_in = nreqst + ! if (trace.and.(me == 0)) write(0,*) 'Looping in graph_fnd_owner: ', nqries_max + nsampl_in = nqries nsampl_in = min(max(1,(maxspace+np-1)/np),nsampl_in) ! ! Choose a sample, should it be done in this simplistic way? @@ -236,13 +236,13 @@ subroutine psi_graph_fnd_owner(idx,iprc,idxmap,info) ! We might have padded when looking for owners, so the actual samples ! could be less than they appear. Should be explained better. ! - nsampl_in = min(nreqst,nsampl_in) + nsampl_in = min(nqries,nsampl_in) call psi_cpy_out(iprc,tprc,tsmpl,nsampl_in,nsampl_out) if (nsampl_out /= nsampl_in) then write(0,*) me,'Warning: indices not found by a2a_fnd_owner ',nsampl_out,nsampl_in end if n_answers = n_answers + nsampl_out - nreqst = nv - n_answers + nqries = nv - n_answers ! ! 3. Extract the resulting adjacency list and add it to the ! indxmap; @@ -259,18 +259,18 @@ subroutine psi_graph_fnd_owner(idx,iprc,idxmap,info) ! Need to set up a proper loop here to have a complete ! sweep over the input vector. Done inside adj_fnd_sweep. ! -!!$ write(0,*) me,' After a2a ',nreqst - nsampl_in = min(nreqst,max(1,(maxspace+max(1,nadj)-1))/(max(1,nadj))) +!!$ write(0,*) me,' After a2a ',nqries + nsampl_in = min(nqries,max(1,(maxspace+max(1,nadj)-1))/(max(1,nadj))) mxnsin = nsampl_in call psb_max(ictxt,mxnsin) !!$ write(0,*) me, ' mxnsin ',mxnsin if (mxnsin>0) call psi_adj_fnd_sweep(idx,iprc,ladj,idxmap,nsampl_in,n_answers) call idxmap%xtnd_p_adjcncy(ladj) - nreqst = nv - n_answers - nreqst_max = nreqst - call psb_max(ictxt,nreqst_max) - if (trace.and.(me == 0)) write(0,*) ' fnd_owner_loop remaining:',nreqst_max + nqries = nv - n_answers + nqries_max = nqries + call psb_max(ictxt,nqries_max) + if (trace.and.(me == 0)) write(0,*) ' fnd_owner_loop remaining:',nqries_max if (do_timings) call psb_toc(idx_loop_neigh) end do fnd_owner_loop diff --git a/base/internals/psi_list_search.f90 b/base/internals/psi_list_search.f90 deleted file mode 100644 index bb362422..00000000 --- a/base/internals/psi_list_search.f90 +++ /dev/null @@ -1,58 +0,0 @@ -! -! Parallel Sparse BLAS version 3.5 -! (C) Copyright 2006, 2010, 2015, 2017 -! Salvatore Filippone -! Alfredo Buttari CNRS-IRIT, Toulouse -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! 1. Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! 2. Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions, and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! 3. The name of the PSBLAS group or the names of its contributors may -! not be used to endorse or promote products derived from this -! software without specific written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS -! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -integer function psi_list_search(list,lenght_list,elem) - use psb_const_mod - implicit none - !returns position of elem in a array list - !of lenght lenght_list, if this element does not exist - !returns -1 - integer(psb_ipk_) :: list(*) - integer(psb_ipk_) :: lenght_list - integer(psb_ipk_) :: elem - - integer(psb_ipk_) :: i - - i=1 - do while ((i.le.lenght_list).and.(list(i).ne.elem)) - i=i+1 - enddo - if (i.le.lenght_list) then - if (list(i).eq.elem) then - psi_list_search=i - else - psi_list_search=-1 - endif - else - psi_list_search=-1 - endif -end function psi_list_search - diff --git a/base/internals/psi_sort_dl.f90 b/base/internals/psi_sort_dl.f90 index 0e7c276e..c4e3ea07 100644 --- a/base/internals/psi_sort_dl.f90 +++ b/base/internals/psi_sort_dl.f90 @@ -29,65 +29,6 @@ ! POSSIBILITY OF SUCH DAMAGE. ! ! -subroutine psi_i_sort_dl(dep_list,l_dep_list,np,info) - ! - ! interface between former sort_dep_list subroutine - ! and new srtlist - ! - use psi_mod, psb_protect_name => psi_i_sort_dl - use psb_const_mod - use psb_error_mod - implicit none - - integer(psb_ipk_) :: np,dep_list(:,:), l_dep_list(:) - integer(psb_ipk_) :: idg, iupd, idgp, iedges, iidx, iich,ndgmx, isz, err_act - integer(psb_ipk_) :: i, info - integer(psb_ipk_), allocatable :: work(:) - integer(psb_ipk_) :: debug_level, debug_unit - character(len=20) :: name - - name='psi_sort_dl' - if(psb_get_errstatus() /= 0) return - info=psb_success_ - call psb_erractionsave(err_act) - debug_unit = psb_get_debug_unit() - debug_level = psb_get_debug_level() - - info = psb_success_ - ndgmx = 0 - do i=1,np - ndgmx = ndgmx + l_dep_list(i) - if (debug_level >= psb_debug_inner_)& - & write(debug_unit,*) name,': ',i,l_dep_list(i) - enddo - idg = 1 - iupd = idg+np - idgp = iupd+np - iedges = idgp + ndgmx - iidx = iedges + 2*ndgmx - iich = iidx + ndgmx - isz = iich + ndgmx - if (debug_level >= psb_debug_inner_)& - & write(debug_unit,*) name,': ndgmx ',ndgmx,isz - allocate(work(isz)) - ! call srtlist(dep_list, dl_lda, l_dep_list, np, info) - call srtlist(dep_list,size(dep_list,1,kind=psb_ipk_),l_dep_list,np,work(idg),& - & work(idgp),work(iupd),work(iedges),work(iidx),work(iich),info) - if (info /= psb_success_) then - call psb_errpush(psb_err_from_subroutine_,name,a_err='srtlist') - goto 9999 - endif - - deallocate(work) - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(err_act) - - return - -end subroutine psi_i_sort_dl - !********************************************************************** ! * ! The communication step among processors at each * diff --git a/base/modules/psi_i_mod.F90 b/base/modules/psi_i_mod.F90 index 35ff1316..5eebad41 100644 --- a/base/modules/psi_i_mod.F90 +++ b/base/modules/psi_i_mod.F90 @@ -94,23 +94,7 @@ module psi_i_mod end subroutine psi_i_desc_index end interface - interface psi_dl_check - subroutine psi_i_dl_check(dep_list,dl_lda,np,length_dl) - import - implicit none - integer(psb_ipk_) :: np,dl_lda,length_dl(0:np) - integer(psb_ipk_) :: dep_list(dl_lda,0:np) - end subroutine psi_i_dl_check - end interface - interface psi_sort_dl - subroutine psi_i_sort_dl(dep_list,l_dep_list,np,info) - import - implicit none - integer(psb_ipk_) :: dep_list(:,:), l_dep_list(:) - integer(psb_ipk_) :: np - integer(psb_ipk_) :: info - end subroutine psi_i_sort_dl subroutine psi_i_csr_sort_dl(dl_ptr,c_dep_list,l_dep_list,ictxt,info) import implicit none From 76ff6c824c34a2d997f35caf34814aac3b8867ed Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Wed, 3 Jun 2020 20:01:36 +0200 Subject: [PATCH 8/8] Fix GEN_BLOCK distribution search for owner process: binary search on repeated keys --- base/modules/desc/psb_gen_block_map_mod.F90 | 62 ++++++++++++--------- 1 file changed, 35 insertions(+), 27 deletions(-) diff --git a/base/modules/desc/psb_gen_block_map_mod.F90 b/base/modules/desc/psb_gen_block_map_mod.F90 index b53713a5..4bb29294 100644 --- a/base/modules/desc/psb_gen_block_map_mod.F90 +++ b/base/modules/desc/psb_gen_block_map_mod.F90 @@ -2192,16 +2192,7 @@ contains integer(psb_ipk_) :: lb, ub, m - if (n < 5) then - ! don't bother with binary search for very - ! small vectors - ipos = 0 - do - if (ipos == n) return - if (key < v(ipos+1)) return - ipos = ipos + 1 - end do - else + choice: if (n >5) then lb = 1 ub = n ipos = -1 @@ -2210,7 +2201,7 @@ contains m = (lb+ub)/2 if (key==v(m)) then ipos = m - return + exit choice else if (key < v(m)) then ub = m-1 else @@ -2220,8 +2211,21 @@ contains if (v(ub) > key) then ub = ub - 1 end if - ipos = ub - endif + ipos = ub + else + ! No binary search, do everything in the final cleanup + ipos = 0 + end if choice + + ! Final cleanup + ! This is needed because V may contain repeated entries + ! i.e. there may be processes that own 0 indices + do + if (ipos == n) exit + if (key < v(ipos+1) ) exit + ipos = ipos + 1 + end do + return end function i_gen_block_search @@ -2234,17 +2238,8 @@ contains integer(psb_lpk_) :: v(:) integer(psb_ipk_) :: lb, ub, m - - if (n < 5) then - ! don't bother with binary search for very - ! small vectors - ipos = 0 - do - if (ipos == n) return - if (key < v(ipos+1)) return - ipos = ipos + 1 - end do - else + + choice: if (n >5) then lb = 1 ub = n ipos = -1 @@ -2253,7 +2248,7 @@ contains m = (lb+ub)/2 if (key==v(m)) then ipos = m - return + exit choice else if (key < v(m)) then ub = m-1 else @@ -2263,8 +2258,21 @@ contains if (v(ub) > key) then ub = ub - 1 end if - ipos = ub - endif + ipos = ub + else + ! No binary search, do everything in the final cleanup + ipos = 0 + end if choice + + ! Final cleanup + ! This is needed because V may contain repeated entries + ! i.e. there may be processes that own 0 indices + do + if (ipos == n) exit + if (key < v(ipos+1) ) exit + ipos = ipos + 1 + end do + return end function l_gen_block_search #endif