From 0302f5692d3d976c578be4e9ecf6cc348f4f5692 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Fri, 26 Jun 2020 14:31:55 +0200 Subject: [PATCH] Benchmark version --- base/internals/psi_bld_glb_dep_list.F90 | 38 ++++++++++++++++++----- base/internals/psi_crea_index.f90 | 40 ++++++++++++++++++++----- base/internals/psi_graph_fnd_owner.F90 | 2 +- base/tools/psb_d_glob_transpose.F90 | 20 +++++++++++-- 4 files changed, 81 insertions(+), 19 deletions(-) diff --git a/base/internals/psi_bld_glb_dep_list.F90 b/base/internals/psi_bld_glb_dep_list.F90 index bf28e49b..29e1a41e 100644 --- a/base/internals/psi_bld_glb_dep_list.F90 +++ b/base/internals/psi_bld_glb_dep_list.F90 @@ -147,7 +147,7 @@ subroutine psi_i_bld_glb_csr_dep_list(ictxt,loc_dl,length_dl,c_dep_list,dl_ptr,i integer(psb_ipk_) :: err integer(psb_ipk_) :: debug_level, debug_unit integer(psb_mpk_) :: iictxt, icomm, me, np, minfo - logical, parameter :: dist_symm_list=.false., print_dl=.false. + logical, parameter :: dist_symm_list=.false., print_dl=.true. character name*20 name='psi_bld_glb_csr_dep_list' @@ -189,14 +189,38 @@ subroutine psi_i_bld_glb_csr_dep_list(ictxt,loc_dl,length_dl,c_dep_list,dl_ptr,i info=psb_err_internal_error_ goto 9999 endif - dl_ptr = dl_ptr + 1 + dl_ptr = dl_ptr + 1 + if (print_dl) then if (me == 0) then - write(0,*) ' Dep_list ' - do i=0,np-1 - write(0,*) 'Proc ',i,':',c_dep_list(dl_ptr(i):dl_ptr(i+1)-1) - end do - flush(0) + block + character(len=80) :: fname, frmt + integer :: ni, nl,ldl, lname, iout = 87 + ldl = dl_ptr(np) + ni = floor(log10(1.0*np)) + 1 + nl = floor(log10(1.0*ldl)) + 1 + write(frmt,'(a,i3.3,a,i3.3,a)') '(a,i',ni,'.',ni,')' + write(fname,frmt) 'dep_list_p_',np + lname = len_trim(fname) + write(frmt,'(a,i3.3,a,i3.3,a)') '(a,i',nl,'.',nl,')' + write(fname(lname+1:lname+nl+3),frmt) '_l_',ldl + fname = trim(fname)//'.mtx' + open(iout,file=fname) + if (.true.) then + write(iout,*) np, np, ldl-1 + do i=0,np-1 + do j=dl_ptr(i),dl_ptr(i+1)-1 + write(iout,*) i+1,c_dep_list(j)+1 + end do + end do + else + write(iout,*) ' Dep_list ' + do i=0,np-1 + write(iout,*) 'Proc ',i,':',c_dep_list(dl_ptr(i):dl_ptr(i+1)-1) + end do + end if + close(iout) + end block end if call psb_barrier(ictxt) end if diff --git a/base/internals/psi_crea_index.f90 b/base/internals/psi_crea_index.f90 index 2b0a8321..f97144f8 100644 --- a/base/internals/psi_crea_index.f90 +++ b/base/internals/psi_crea_index.f90 @@ -71,9 +71,9 @@ subroutine psi_i_crea_index(desc_a,index_in,index_out,nxch,nsnd,nrcv,info) integer(psb_ipk_),parameter :: root=psb_root_,no_comm=-1 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name - logical, parameter :: do_timings=.false. + logical, parameter :: do_timings=.true., shuffle_dep_list=.true. integer(psb_ipk_), save :: idx_phase1=-1, idx_phase2=-1, idx_phase3=-1 - integer(psb_ipk_), save :: idx_phase11=-1, idx_phase12=-1, idx_phase13=-1 + integer(psb_ipk_), save :: idx_phase21=-1, idx_phase22=-1, idx_phase13=-1 info = psb_success_ name='psi_crea_index' @@ -95,10 +95,10 @@ subroutine psi_i_crea_index(desc_a,index_in,index_out,nxch,nsnd,nrcv,info) & idx_phase2 = psb_get_timer_idx("PSI_CREA_INDEX: phase2") if ((do_timings).and.(idx_phase3==-1)) & & idx_phase3 = psb_get_timer_idx("PSI_CREA_INDEX: phase3") -!!$ if ((do_timings).and.(idx_phase11==-1)) & -!!$ & idx_phase11 = psb_get_timer_idx("PSI_CREA_INDEX: phase11 ") -!!$ if ((do_timings).and.(idx_phase12==-1)) & -!!$ & idx_phase12 = psb_get_timer_idx("PSI_CREA_INDEX: phase12") + if ((do_timings).and.(idx_phase21==-1)) & + & idx_phase21 = psb_get_timer_idx("PSI_CREA_INDEX: phase21 ") + if ((do_timings).and.(idx_phase22==-1)) & + & idx_phase22 = psb_get_timer_idx("PSI_CREA_INDEX: phase22") !!$ if ((do_timings).and.(idx_phase13==-1)) & !!$ & idx_phase13 = psb_get_timer_idx("PSI_CREA_INDEX: phase13") @@ -123,6 +123,7 @@ subroutine psi_i_crea_index(desc_a,index_in,index_out,nxch,nsnd,nrcv,info) if (do_timings) call psb_tic(idx_phase2) if (choose_sorting(dlmax,dlavg,np)) then + if (do_timings) call psb_tic(idx_phase21) call psi_bld_glb_dep_list(ictxt,& & loc_dl,length_dl,c_dep_list,dl_ptr,info) if (info /= 0) then @@ -131,13 +132,15 @@ subroutine psi_i_crea_index(desc_a,index_in,index_out,nxch,nsnd,nrcv,info) !!$ call psi_dl_check(dep_list,dl_lda,np,length_dl) !!$ !!$ ! ....now i can sort dependency lists. + if (do_timings) call psb_toc(idx_phase21) + if (do_timings) call psb_tic(idx_phase22) call psi_sort_dl(dl_ptr,c_dep_list,length_dl,ictxt,info) if (info /= 0) then write(0,*) me,trim(name),' From sort_dl ',info end if ldl = length_dl(me) loc_dl = c_dep_list(dl_ptr(me):dl_ptr(me)+ldl-1) - + if (do_timings) call psb_toc(idx_phase22) !!$ if(info /= psb_success_) then !!$ call psb_errpush(psb_err_from_subroutine_,name,a_err='psi_sort_dl') !!$ goto 9999 @@ -146,7 +149,26 @@ subroutine psi_i_crea_index(desc_a,index_in,index_out,nxch,nsnd,nrcv,info) else ! Do nothing ldl = length_dl(me) - loc_dl = loc_dl(1:ldl) + loc_dl = loc_dl(1:ldl) + if (shuffle_dep_list) then + ! + ! Apply a random shuffle to the dependency list + ! should improve the behaviour + ! + block + ! Algorithm 3.4.2P from TAOCP vol 2. + integer(psb_ipk_) :: tmp + integer :: j,k + real :: u + do j=ldl,2,-1 + call random_number(u) + k = min(j,floor(j*u)+1) + tmp = loc_dl(k) + loc_dl(k) = loc_dl(j) + loc_dl(j) = tmp + end do + end block + end if end if if (do_timings) call psb_toc(idx_phase2) @@ -189,7 +211,9 @@ contains logical :: val val = .not.(((dlmax>(26*4)).or.((dlavg>=(26*2)).and.(np>=128)))) + val = (dlmax<16) !val = .true. + val = .false. end function choose_sorting end subroutine psi_i_crea_index diff --git a/base/internals/psi_graph_fnd_owner.F90 b/base/internals/psi_graph_fnd_owner.F90 index 485c4806..148eb19f 100644 --- a/base/internals/psi_graph_fnd_owner.F90 +++ b/base/internals/psi_graph_fnd_owner.F90 @@ -107,7 +107,7 @@ subroutine psi_graph_fnd_owner(idx,iprc,idxmap,info) integer(psb_ipk_) :: ictxt,np,me, nresp integer(psb_ipk_), parameter :: nt=4 integer(psb_ipk_) :: tmpv(4) - logical, parameter :: do_timings=.false., trace=.false., debugsz=.false. + logical, parameter :: do_timings=.true., trace=.false., debugsz=.false. integer(psb_ipk_), save :: idx_sweep0=-1, idx_loop_a2a=-1, idx_loop_neigh=-1 real(psb_dpk_) :: t0, t1, t2, t3, t4 character(len=20) :: name diff --git a/base/tools/psb_d_glob_transpose.F90 b/base/tools/psb_d_glob_transpose.F90 index 638ba174..469df2a8 100644 --- a/base/tools/psb_d_glob_transpose.F90 +++ b/base/tools/psb_d_glob_transpose.F90 @@ -423,6 +423,8 @@ subroutine psb_d_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx) character(len=5) :: outfmt_ integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name, ch_err + logical, parameter :: do_timings=.true. + integer(psb_ipk_), save :: idx_phase1=-1, idx_phase2=-1, idx_phase3=-1 if(psb_get_errstatus() /= 0) return info=psb_success_ @@ -441,7 +443,15 @@ subroutine psb_d_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx) if (debug_level >= psb_debug_outer_) & & write(debug_unit,*) me,' ',trim(name),': Start' - + if ((do_timings).and.(idx_phase1==-1)) & + & idx_phase1 = psb_get_timer_idx("GLB_TRANSP: phase1 ") + if ((do_timings).and.(idx_phase2==-1)) & + & idx_phase2 = psb_get_timer_idx("GLB_TRANSP: phase2") + if ((do_timings).and.(idx_phase3==-1)) & + & idx_phase3 = psb_get_timer_idx("GLB_TRANSP: phase3") + + if (do_timings) call psb_tic(idx_phase1) + if (present(desc_c)) then p_desc_c => desc_c else @@ -525,7 +535,9 @@ subroutine psb_d_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx) iszr = sum(rvsz) iszs = sum(sdsz) - + if (do_timings) call psb_toc(idx_phase1) + if (do_timings) call psb_tic(idx_phase2) + if (debug_level >= psb_debug_outer_)& & write(debug_unit,*) me,' ',trim(name),': Sizes:',& & ' Send:',sdsz(:),' Receive:',rvsz(:) @@ -619,6 +631,8 @@ subroutine psb_d_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx) if (debug_level >= psb_debug_outer_)& & write(debug_unit,*) me,' ',trim(name),': Done alltoallv' + if (do_timings) call psb_toc(idx_phase2) + if (do_timings) call psb_tic(idx_phase3) if (present(desc_rx)) then ! @@ -686,7 +700,7 @@ subroutine psb_d_coo_glob_transpose(ain,desc_r,info,atrans,desc_c,desc_rx) & iarcv,jarcv,stat=info) if (debug_level >= psb_debug_outer_)& & write(debug_unit,*) me,' ',trim(name),': Done' - + if (do_timings) call psb_toc(idx_phase3) call psb_erractionrestore(err_act) return