From 73e5d499131c086b77edca6c94538fc2ecda17a0 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 2 Jun 2023 11:37:58 +0200 Subject: [PATCH] Added timers to build phases --- .../aggregator/amg_c_dec_aggregator_tprol.f90 | 10 +++++ .../impl/aggregator/amg_caggrmat_smth_bld.f90 | 38 ++++++++++++++++--- .../aggregator/amg_d_dec_aggregator_tprol.f90 | 10 +++++ .../impl/aggregator/amg_daggrmat_smth_bld.f90 | 38 ++++++++++++++++--- .../aggregator/amg_s_dec_aggregator_tprol.f90 | 10 +++++ .../impl/aggregator/amg_saggrmat_smth_bld.f90 | 38 ++++++++++++++++--- .../aggregator/amg_z_dec_aggregator_tprol.f90 | 10 +++++ .../impl/aggregator/amg_zaggrmat_smth_bld.f90 | 38 ++++++++++++++++--- .../impl/level/amg_c_base_onelev_mat_asb.f90 | 16 +++++++- .../impl/level/amg_d_base_onelev_mat_asb.f90 | 16 +++++++- .../impl/level/amg_s_base_onelev_mat_asb.f90 | 16 +++++++- .../impl/level/amg_z_base_onelev_mat_asb.f90 | 16 +++++++- 12 files changed, 224 insertions(+), 32 deletions(-) diff --git a/amgprec/impl/aggregator/amg_c_dec_aggregator_tprol.f90 b/amgprec/impl/aggregator/amg_c_dec_aggregator_tprol.f90 index 4efaf61d..250cc549 100644 --- a/amgprec/impl/aggregator/amg_c_dec_aggregator_tprol.f90 +++ b/amgprec/impl/aggregator/amg_c_dec_aggregator_tprol.f90 @@ -97,6 +97,8 @@ subroutine amg_c_dec_aggregator_build_tprol(ag,parms,ag_data,& integer(psb_lpk_) :: ntaggr integer(psb_ipk_) :: debug_level, debug_unit logical :: clean_zeros + integer(psb_ipk_), save :: idx_map_bld=-1, idx_map_tprol=-1 + logical, parameter :: do_timings=.false. name='amg_c_dec_aggregator_tprol' call psb_erractionsave(err_act) @@ -108,6 +110,10 @@ subroutine amg_c_dec_aggregator_build_tprol(ag,parms,ag_data,& info = psb_success_ ctxt = desc_a%get_context() call psb_info(ctxt,me,np) + if ((do_timings).and.(idx_map_bld==-1)) & + & idx_map_bld = psb_get_timer_idx("DEC_TPROL: map_bld") + if ((do_timings).and.(idx_map_tprol==-1)) & + & idx_map_tprol = psb_get_timer_idx("DEC_TPROL: map_tprol") call amg_check_def(parms%ml_cycle,'Multilevel cycle',& & amg_mult_ml_,is_legal_ml_cycle) @@ -121,10 +127,14 @@ subroutine amg_c_dec_aggregator_build_tprol(ag,parms,ag_data,& ! The decoupled aggregator based on SOC measures ignores ! ag_data except for clean_zeros; soc_map_bld is a procedure pointer. ! + if (do_timings) call psb_tic(idx_map_bld) clean_zeros = ag%do_clean_zeros call ag%soc_map_bld(parms%aggr_ord,parms%aggr_thresh,clean_zeros,a,desc_a,nlaggr,ilaggr,info) + if (do_timings) call psb_toc(idx_map_bld) + if (do_timings) call psb_tic(idx_map_tprol) if (info==psb_success_) call amg_map_to_tprol(desc_a,ilaggr,nlaggr,t_prol,info) + if (do_timings) call psb_toc(idx_map_tprol) if (info /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='soc_map_bld/map_to_tprol') diff --git a/amgprec/impl/aggregator/amg_caggrmat_smth_bld.f90 b/amgprec/impl/aggregator/amg_caggrmat_smth_bld.f90 index 53e740fe..c4a85b05 100644 --- a/amgprec/impl/aggregator/amg_caggrmat_smth_bld.f90 +++ b/amgprec/impl/aggregator/amg_caggrmat_smth_bld.f90 @@ -140,6 +140,9 @@ subroutine amg_caggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& real(psb_spk_) :: anorm, omega, tmp, dg, theta logical, parameter :: debug_new=.false. character(len=80) :: filename + logical, parameter :: do_timings=.false. + integer(psb_ipk_), save :: idx_spspmm=-1, idx_phase1=-1, idx_gtrans=-1, idx_phase2=-1, idx_refine=-1 + integer(psb_ipk_), save :: idx_phase3=-1, idx_cdasb=-1, idx_ptap=-1 name='amg_aggrmat_smth_bld' info=psb_success_ @@ -153,6 +156,23 @@ subroutine amg_caggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& ctxt = desc_a%get_context() call psb_info(ctxt, me, np) + if ((do_timings).and.(idx_spspmm==-1)) & + & idx_spspmm = psb_get_timer_idx("DEC_SMTH_BLD: par_spspmm") + if ((do_timings).and.(idx_phase1==-1)) & + & idx_phase1 = psb_get_timer_idx("DEC_SMTH_BLD: phase1 ") + if ((do_timings).and.(idx_phase2==-1)) & + & idx_phase2 = psb_get_timer_idx("DEC_SMTH_BLD: phase2 ") + if ((do_timings).and.(idx_phase3==-1)) & + & idx_phase3 = psb_get_timer_idx("DEC_SMTH_BLD: phase3 ") + if ((do_timings).and.(idx_gtrans==-1)) & + & idx_gtrans = psb_get_timer_idx("DEC_SMTH_BLD: gtrans ") + if ((do_timings).and.(idx_refine==-1)) & + & idx_refine = psb_get_timer_idx("DEC_SMTH_BLD: refine ") + if ((do_timings).and.(idx_cdasb==-1)) & + & idx_cdasb = psb_get_timer_idx("DEC_SMTH_BLD: cdasb ") + if ((do_timings).and.(idx_ptap==-1)) & + & idx_ptap = psb_get_timer_idx("DEC_SMTH_BLD: ptap_bld ") + nglob = desc_a%get_global_rows() nrow = desc_a%get_local_rows() @@ -171,6 +191,7 @@ subroutine amg_caggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& ! naggr: number of local aggregates ! nrow: local rows. ! + if (do_timings) call psb_tic(idx_phase1) ! Get the diagonal D adiag = a%get_diag(info) @@ -196,7 +217,7 @@ subroutine amg_caggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& ! ! Build the filtered matrix Af from A ! - + !$OMP parallel do private(i,j,tmp,jd) schedule(static) do i=1, nrow tmp = czero jd = -1 @@ -214,11 +235,13 @@ subroutine amg_caggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& acsrf%val(jd)=acsrf%val(jd)-tmp end if enddo + !$OMP end parallel do ! Take out zeroed terms call acsrf%clean_zeros(info) end if + !$OMP parallel do private(i) schedule(static) do i=1,size(adiag) if (adiag(i) /= czero) then adiag(i) = cone / adiag(i) @@ -226,7 +249,7 @@ subroutine amg_caggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& adiag(i) = cone end if end do - + !$OMP end parallel do if (parms%aggr_omega_alg == amg_eig_est_) then if (parms%aggr_eig == amg_max_norm_) then @@ -252,8 +275,9 @@ subroutine amg_caggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& call psb_errpush(info,name,a_err='invalid amg_aggr_omega_alg_') goto 9999 end if + if (do_timings) call psb_toc(idx_phase1) - + if (do_timings) call psb_tic(idx_phase2) call acsrf%scal(adiag,info) if (info /= psb_success_) goto 9999 @@ -267,6 +291,8 @@ subroutine amg_caggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& call psb_cdasb(desc_ac,info) call psb_cd_reinit(desc_ac,info) + if (do_timings) call psb_toc(idx_phase2) + if (do_timings) call psb_tic(idx_phase3) ! ! Build the smoothed prolongator using either A or Af ! acsr1 = (I-w*D*A) Prol acsr1 = (I-w*D*Af) Prol @@ -279,8 +305,8 @@ subroutine amg_caggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& call psb_errpush(psb_err_from_subroutine_,name,a_err='spspmm 1') goto 9999 end if - - + if (do_timings) call psb_toc(idx_phase3) + if (do_timings) call psb_tic(idx_ptap) if (debug_level >= psb_debug_outer_) & & write(debug_unit,*) me,' ',trim(name),& & 'Done SPSPMM 1' @@ -292,7 +318,7 @@ subroutine amg_caggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& call op_prol%mv_from(coo_prol) call op_restr%mv_from(coo_restr) - + if (do_timings) call psb_toc(idx_ptap) if (debug_level >= psb_debug_outer_) & & write(debug_unit,*) me,' ',trim(name),& & 'Done smooth_aggregate ' diff --git a/amgprec/impl/aggregator/amg_d_dec_aggregator_tprol.f90 b/amgprec/impl/aggregator/amg_d_dec_aggregator_tprol.f90 index 2edcca6c..26edbb0a 100644 --- a/amgprec/impl/aggregator/amg_d_dec_aggregator_tprol.f90 +++ b/amgprec/impl/aggregator/amg_d_dec_aggregator_tprol.f90 @@ -97,6 +97,8 @@ subroutine amg_d_dec_aggregator_build_tprol(ag,parms,ag_data,& integer(psb_lpk_) :: ntaggr integer(psb_ipk_) :: debug_level, debug_unit logical :: clean_zeros + integer(psb_ipk_), save :: idx_map_bld=-1, idx_map_tprol=-1 + logical, parameter :: do_timings=.false. name='amg_d_dec_aggregator_tprol' call psb_erractionsave(err_act) @@ -108,6 +110,10 @@ subroutine amg_d_dec_aggregator_build_tprol(ag,parms,ag_data,& info = psb_success_ ctxt = desc_a%get_context() call psb_info(ctxt,me,np) + if ((do_timings).and.(idx_map_bld==-1)) & + & idx_map_bld = psb_get_timer_idx("DEC_TPROL: map_bld") + if ((do_timings).and.(idx_map_tprol==-1)) & + & idx_map_tprol = psb_get_timer_idx("DEC_TPROL: map_tprol") call amg_check_def(parms%ml_cycle,'Multilevel cycle',& & amg_mult_ml_,is_legal_ml_cycle) @@ -121,10 +127,14 @@ subroutine amg_d_dec_aggregator_build_tprol(ag,parms,ag_data,& ! The decoupled aggregator based on SOC measures ignores ! ag_data except for clean_zeros; soc_map_bld is a procedure pointer. ! + if (do_timings) call psb_tic(idx_map_bld) clean_zeros = ag%do_clean_zeros call ag%soc_map_bld(parms%aggr_ord,parms%aggr_thresh,clean_zeros,a,desc_a,nlaggr,ilaggr,info) + if (do_timings) call psb_toc(idx_map_bld) + if (do_timings) call psb_tic(idx_map_tprol) if (info==psb_success_) call amg_map_to_tprol(desc_a,ilaggr,nlaggr,t_prol,info) + if (do_timings) call psb_toc(idx_map_tprol) if (info /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='soc_map_bld/map_to_tprol') diff --git a/amgprec/impl/aggregator/amg_daggrmat_smth_bld.f90 b/amgprec/impl/aggregator/amg_daggrmat_smth_bld.f90 index 82da3fc7..d365bf27 100644 --- a/amgprec/impl/aggregator/amg_daggrmat_smth_bld.f90 +++ b/amgprec/impl/aggregator/amg_daggrmat_smth_bld.f90 @@ -140,6 +140,9 @@ subroutine amg_daggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& real(psb_dpk_) :: anorm, omega, tmp, dg, theta logical, parameter :: debug_new=.false. character(len=80) :: filename + logical, parameter :: do_timings=.false. + integer(psb_ipk_), save :: idx_spspmm=-1, idx_phase1=-1, idx_gtrans=-1, idx_phase2=-1, idx_refine=-1 + integer(psb_ipk_), save :: idx_phase3=-1, idx_cdasb=-1, idx_ptap=-1 name='amg_aggrmat_smth_bld' info=psb_success_ @@ -153,6 +156,23 @@ subroutine amg_daggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& ctxt = desc_a%get_context() call psb_info(ctxt, me, np) + if ((do_timings).and.(idx_spspmm==-1)) & + & idx_spspmm = psb_get_timer_idx("DEC_SMTH_BLD: par_spspmm") + if ((do_timings).and.(idx_phase1==-1)) & + & idx_phase1 = psb_get_timer_idx("DEC_SMTH_BLD: phase1 ") + if ((do_timings).and.(idx_phase2==-1)) & + & idx_phase2 = psb_get_timer_idx("DEC_SMTH_BLD: phase2 ") + if ((do_timings).and.(idx_phase3==-1)) & + & idx_phase3 = psb_get_timer_idx("DEC_SMTH_BLD: phase3 ") + if ((do_timings).and.(idx_gtrans==-1)) & + & idx_gtrans = psb_get_timer_idx("DEC_SMTH_BLD: gtrans ") + if ((do_timings).and.(idx_refine==-1)) & + & idx_refine = psb_get_timer_idx("DEC_SMTH_BLD: refine ") + if ((do_timings).and.(idx_cdasb==-1)) & + & idx_cdasb = psb_get_timer_idx("DEC_SMTH_BLD: cdasb ") + if ((do_timings).and.(idx_ptap==-1)) & + & idx_ptap = psb_get_timer_idx("DEC_SMTH_BLD: ptap_bld ") + nglob = desc_a%get_global_rows() nrow = desc_a%get_local_rows() @@ -171,6 +191,7 @@ subroutine amg_daggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& ! naggr: number of local aggregates ! nrow: local rows. ! + if (do_timings) call psb_tic(idx_phase1) ! Get the diagonal D adiag = a%get_diag(info) @@ -196,7 +217,7 @@ subroutine amg_daggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& ! ! Build the filtered matrix Af from A ! - + !$OMP parallel do private(i,j,tmp,jd) schedule(static) do i=1, nrow tmp = dzero jd = -1 @@ -214,11 +235,13 @@ subroutine amg_daggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& acsrf%val(jd)=acsrf%val(jd)-tmp end if enddo + !$OMP end parallel do ! Take out zeroed terms call acsrf%clean_zeros(info) end if + !$OMP parallel do private(i) schedule(static) do i=1,size(adiag) if (adiag(i) /= dzero) then adiag(i) = done / adiag(i) @@ -226,7 +249,7 @@ subroutine amg_daggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& adiag(i) = done end if end do - + !$OMP end parallel do if (parms%aggr_omega_alg == amg_eig_est_) then if (parms%aggr_eig == amg_max_norm_) then @@ -252,8 +275,9 @@ subroutine amg_daggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& call psb_errpush(info,name,a_err='invalid amg_aggr_omega_alg_') goto 9999 end if + if (do_timings) call psb_toc(idx_phase1) - + if (do_timings) call psb_tic(idx_phase2) call acsrf%scal(adiag,info) if (info /= psb_success_) goto 9999 @@ -267,6 +291,8 @@ subroutine amg_daggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& call psb_cdasb(desc_ac,info) call psb_cd_reinit(desc_ac,info) + if (do_timings) call psb_toc(idx_phase2) + if (do_timings) call psb_tic(idx_phase3) ! ! Build the smoothed prolongator using either A or Af ! acsr1 = (I-w*D*A) Prol acsr1 = (I-w*D*Af) Prol @@ -279,8 +305,8 @@ subroutine amg_daggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& call psb_errpush(psb_err_from_subroutine_,name,a_err='spspmm 1') goto 9999 end if - - + if (do_timings) call psb_toc(idx_phase3) + if (do_timings) call psb_tic(idx_ptap) if (debug_level >= psb_debug_outer_) & & write(debug_unit,*) me,' ',trim(name),& & 'Done SPSPMM 1' @@ -292,7 +318,7 @@ subroutine amg_daggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& call op_prol%mv_from(coo_prol) call op_restr%mv_from(coo_restr) - + if (do_timings) call psb_toc(idx_ptap) if (debug_level >= psb_debug_outer_) & & write(debug_unit,*) me,' ',trim(name),& & 'Done smooth_aggregate ' diff --git a/amgprec/impl/aggregator/amg_s_dec_aggregator_tprol.f90 b/amgprec/impl/aggregator/amg_s_dec_aggregator_tprol.f90 index c52c04f7..9529d141 100644 --- a/amgprec/impl/aggregator/amg_s_dec_aggregator_tprol.f90 +++ b/amgprec/impl/aggregator/amg_s_dec_aggregator_tprol.f90 @@ -97,6 +97,8 @@ subroutine amg_s_dec_aggregator_build_tprol(ag,parms,ag_data,& integer(psb_lpk_) :: ntaggr integer(psb_ipk_) :: debug_level, debug_unit logical :: clean_zeros + integer(psb_ipk_), save :: idx_map_bld=-1, idx_map_tprol=-1 + logical, parameter :: do_timings=.false. name='amg_s_dec_aggregator_tprol' call psb_erractionsave(err_act) @@ -108,6 +110,10 @@ subroutine amg_s_dec_aggregator_build_tprol(ag,parms,ag_data,& info = psb_success_ ctxt = desc_a%get_context() call psb_info(ctxt,me,np) + if ((do_timings).and.(idx_map_bld==-1)) & + & idx_map_bld = psb_get_timer_idx("DEC_TPROL: map_bld") + if ((do_timings).and.(idx_map_tprol==-1)) & + & idx_map_tprol = psb_get_timer_idx("DEC_TPROL: map_tprol") call amg_check_def(parms%ml_cycle,'Multilevel cycle',& & amg_mult_ml_,is_legal_ml_cycle) @@ -121,10 +127,14 @@ subroutine amg_s_dec_aggregator_build_tprol(ag,parms,ag_data,& ! The decoupled aggregator based on SOC measures ignores ! ag_data except for clean_zeros; soc_map_bld is a procedure pointer. ! + if (do_timings) call psb_tic(idx_map_bld) clean_zeros = ag%do_clean_zeros call ag%soc_map_bld(parms%aggr_ord,parms%aggr_thresh,clean_zeros,a,desc_a,nlaggr,ilaggr,info) + if (do_timings) call psb_toc(idx_map_bld) + if (do_timings) call psb_tic(idx_map_tprol) if (info==psb_success_) call amg_map_to_tprol(desc_a,ilaggr,nlaggr,t_prol,info) + if (do_timings) call psb_toc(idx_map_tprol) if (info /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='soc_map_bld/map_to_tprol') diff --git a/amgprec/impl/aggregator/amg_saggrmat_smth_bld.f90 b/amgprec/impl/aggregator/amg_saggrmat_smth_bld.f90 index d96176b2..c2eae3a4 100644 --- a/amgprec/impl/aggregator/amg_saggrmat_smth_bld.f90 +++ b/amgprec/impl/aggregator/amg_saggrmat_smth_bld.f90 @@ -140,6 +140,9 @@ subroutine amg_saggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& real(psb_spk_) :: anorm, omega, tmp, dg, theta logical, parameter :: debug_new=.false. character(len=80) :: filename + logical, parameter :: do_timings=.false. + integer(psb_ipk_), save :: idx_spspmm=-1, idx_phase1=-1, idx_gtrans=-1, idx_phase2=-1, idx_refine=-1 + integer(psb_ipk_), save :: idx_phase3=-1, idx_cdasb=-1, idx_ptap=-1 name='amg_aggrmat_smth_bld' info=psb_success_ @@ -153,6 +156,23 @@ subroutine amg_saggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& ctxt = desc_a%get_context() call psb_info(ctxt, me, np) + if ((do_timings).and.(idx_spspmm==-1)) & + & idx_spspmm = psb_get_timer_idx("DEC_SMTH_BLD: par_spspmm") + if ((do_timings).and.(idx_phase1==-1)) & + & idx_phase1 = psb_get_timer_idx("DEC_SMTH_BLD: phase1 ") + if ((do_timings).and.(idx_phase2==-1)) & + & idx_phase2 = psb_get_timer_idx("DEC_SMTH_BLD: phase2 ") + if ((do_timings).and.(idx_phase3==-1)) & + & idx_phase3 = psb_get_timer_idx("DEC_SMTH_BLD: phase3 ") + if ((do_timings).and.(idx_gtrans==-1)) & + & idx_gtrans = psb_get_timer_idx("DEC_SMTH_BLD: gtrans ") + if ((do_timings).and.(idx_refine==-1)) & + & idx_refine = psb_get_timer_idx("DEC_SMTH_BLD: refine ") + if ((do_timings).and.(idx_cdasb==-1)) & + & idx_cdasb = psb_get_timer_idx("DEC_SMTH_BLD: cdasb ") + if ((do_timings).and.(idx_ptap==-1)) & + & idx_ptap = psb_get_timer_idx("DEC_SMTH_BLD: ptap_bld ") + nglob = desc_a%get_global_rows() nrow = desc_a%get_local_rows() @@ -171,6 +191,7 @@ subroutine amg_saggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& ! naggr: number of local aggregates ! nrow: local rows. ! + if (do_timings) call psb_tic(idx_phase1) ! Get the diagonal D adiag = a%get_diag(info) @@ -196,7 +217,7 @@ subroutine amg_saggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& ! ! Build the filtered matrix Af from A ! - + !$OMP parallel do private(i,j,tmp,jd) schedule(static) do i=1, nrow tmp = szero jd = -1 @@ -214,11 +235,13 @@ subroutine amg_saggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& acsrf%val(jd)=acsrf%val(jd)-tmp end if enddo + !$OMP end parallel do ! Take out zeroed terms call acsrf%clean_zeros(info) end if + !$OMP parallel do private(i) schedule(static) do i=1,size(adiag) if (adiag(i) /= szero) then adiag(i) = sone / adiag(i) @@ -226,7 +249,7 @@ subroutine amg_saggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& adiag(i) = sone end if end do - + !$OMP end parallel do if (parms%aggr_omega_alg == amg_eig_est_) then if (parms%aggr_eig == amg_max_norm_) then @@ -252,8 +275,9 @@ subroutine amg_saggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& call psb_errpush(info,name,a_err='invalid amg_aggr_omega_alg_') goto 9999 end if + if (do_timings) call psb_toc(idx_phase1) - + if (do_timings) call psb_tic(idx_phase2) call acsrf%scal(adiag,info) if (info /= psb_success_) goto 9999 @@ -267,6 +291,8 @@ subroutine amg_saggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& call psb_cdasb(desc_ac,info) call psb_cd_reinit(desc_ac,info) + if (do_timings) call psb_toc(idx_phase2) + if (do_timings) call psb_tic(idx_phase3) ! ! Build the smoothed prolongator using either A or Af ! acsr1 = (I-w*D*A) Prol acsr1 = (I-w*D*Af) Prol @@ -279,8 +305,8 @@ subroutine amg_saggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& call psb_errpush(psb_err_from_subroutine_,name,a_err='spspmm 1') goto 9999 end if - - + if (do_timings) call psb_toc(idx_phase3) + if (do_timings) call psb_tic(idx_ptap) if (debug_level >= psb_debug_outer_) & & write(debug_unit,*) me,' ',trim(name),& & 'Done SPSPMM 1' @@ -292,7 +318,7 @@ subroutine amg_saggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& call op_prol%mv_from(coo_prol) call op_restr%mv_from(coo_restr) - + if (do_timings) call psb_toc(idx_ptap) if (debug_level >= psb_debug_outer_) & & write(debug_unit,*) me,' ',trim(name),& & 'Done smooth_aggregate ' diff --git a/amgprec/impl/aggregator/amg_z_dec_aggregator_tprol.f90 b/amgprec/impl/aggregator/amg_z_dec_aggregator_tprol.f90 index a64e3ebb..a6a7856e 100644 --- a/amgprec/impl/aggregator/amg_z_dec_aggregator_tprol.f90 +++ b/amgprec/impl/aggregator/amg_z_dec_aggregator_tprol.f90 @@ -97,6 +97,8 @@ subroutine amg_z_dec_aggregator_build_tprol(ag,parms,ag_data,& integer(psb_lpk_) :: ntaggr integer(psb_ipk_) :: debug_level, debug_unit logical :: clean_zeros + integer(psb_ipk_), save :: idx_map_bld=-1, idx_map_tprol=-1 + logical, parameter :: do_timings=.false. name='amg_z_dec_aggregator_tprol' call psb_erractionsave(err_act) @@ -108,6 +110,10 @@ subroutine amg_z_dec_aggregator_build_tprol(ag,parms,ag_data,& info = psb_success_ ctxt = desc_a%get_context() call psb_info(ctxt,me,np) + if ((do_timings).and.(idx_map_bld==-1)) & + & idx_map_bld = psb_get_timer_idx("DEC_TPROL: map_bld") + if ((do_timings).and.(idx_map_tprol==-1)) & + & idx_map_tprol = psb_get_timer_idx("DEC_TPROL: map_tprol") call amg_check_def(parms%ml_cycle,'Multilevel cycle',& & amg_mult_ml_,is_legal_ml_cycle) @@ -121,10 +127,14 @@ subroutine amg_z_dec_aggregator_build_tprol(ag,parms,ag_data,& ! The decoupled aggregator based on SOC measures ignores ! ag_data except for clean_zeros; soc_map_bld is a procedure pointer. ! + if (do_timings) call psb_tic(idx_map_bld) clean_zeros = ag%do_clean_zeros call ag%soc_map_bld(parms%aggr_ord,parms%aggr_thresh,clean_zeros,a,desc_a,nlaggr,ilaggr,info) + if (do_timings) call psb_toc(idx_map_bld) + if (do_timings) call psb_tic(idx_map_tprol) if (info==psb_success_) call amg_map_to_tprol(desc_a,ilaggr,nlaggr,t_prol,info) + if (do_timings) call psb_toc(idx_map_tprol) if (info /= psb_success_) then info=psb_err_from_subroutine_ call psb_errpush(info,name,a_err='soc_map_bld/map_to_tprol') diff --git a/amgprec/impl/aggregator/amg_zaggrmat_smth_bld.f90 b/amgprec/impl/aggregator/amg_zaggrmat_smth_bld.f90 index 2f944699..7b8ed075 100644 --- a/amgprec/impl/aggregator/amg_zaggrmat_smth_bld.f90 +++ b/amgprec/impl/aggregator/amg_zaggrmat_smth_bld.f90 @@ -140,6 +140,9 @@ subroutine amg_zaggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& real(psb_dpk_) :: anorm, omega, tmp, dg, theta logical, parameter :: debug_new=.false. character(len=80) :: filename + logical, parameter :: do_timings=.false. + integer(psb_ipk_), save :: idx_spspmm=-1, idx_phase1=-1, idx_gtrans=-1, idx_phase2=-1, idx_refine=-1 + integer(psb_ipk_), save :: idx_phase3=-1, idx_cdasb=-1, idx_ptap=-1 name='amg_aggrmat_smth_bld' info=psb_success_ @@ -153,6 +156,23 @@ subroutine amg_zaggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& ctxt = desc_a%get_context() call psb_info(ctxt, me, np) + if ((do_timings).and.(idx_spspmm==-1)) & + & idx_spspmm = psb_get_timer_idx("DEC_SMTH_BLD: par_spspmm") + if ((do_timings).and.(idx_phase1==-1)) & + & idx_phase1 = psb_get_timer_idx("DEC_SMTH_BLD: phase1 ") + if ((do_timings).and.(idx_phase2==-1)) & + & idx_phase2 = psb_get_timer_idx("DEC_SMTH_BLD: phase2 ") + if ((do_timings).and.(idx_phase3==-1)) & + & idx_phase3 = psb_get_timer_idx("DEC_SMTH_BLD: phase3 ") + if ((do_timings).and.(idx_gtrans==-1)) & + & idx_gtrans = psb_get_timer_idx("DEC_SMTH_BLD: gtrans ") + if ((do_timings).and.(idx_refine==-1)) & + & idx_refine = psb_get_timer_idx("DEC_SMTH_BLD: refine ") + if ((do_timings).and.(idx_cdasb==-1)) & + & idx_cdasb = psb_get_timer_idx("DEC_SMTH_BLD: cdasb ") + if ((do_timings).and.(idx_ptap==-1)) & + & idx_ptap = psb_get_timer_idx("DEC_SMTH_BLD: ptap_bld ") + nglob = desc_a%get_global_rows() nrow = desc_a%get_local_rows() @@ -171,6 +191,7 @@ subroutine amg_zaggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& ! naggr: number of local aggregates ! nrow: local rows. ! + if (do_timings) call psb_tic(idx_phase1) ! Get the diagonal D adiag = a%get_diag(info) @@ -196,7 +217,7 @@ subroutine amg_zaggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& ! ! Build the filtered matrix Af from A ! - + !$OMP parallel do private(i,j,tmp,jd) schedule(static) do i=1, nrow tmp = zzero jd = -1 @@ -214,11 +235,13 @@ subroutine amg_zaggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& acsrf%val(jd)=acsrf%val(jd)-tmp end if enddo + !$OMP end parallel do ! Take out zeroed terms call acsrf%clean_zeros(info) end if + !$OMP parallel do private(i) schedule(static) do i=1,size(adiag) if (adiag(i) /= zzero) then adiag(i) = zone / adiag(i) @@ -226,7 +249,7 @@ subroutine amg_zaggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& adiag(i) = zone end if end do - + !$OMP end parallel do if (parms%aggr_omega_alg == amg_eig_est_) then if (parms%aggr_eig == amg_max_norm_) then @@ -252,8 +275,9 @@ subroutine amg_zaggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& call psb_errpush(info,name,a_err='invalid amg_aggr_omega_alg_') goto 9999 end if + if (do_timings) call psb_toc(idx_phase1) - + if (do_timings) call psb_tic(idx_phase2) call acsrf%scal(adiag,info) if (info /= psb_success_) goto 9999 @@ -267,6 +291,8 @@ subroutine amg_zaggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& call psb_cdasb(desc_ac,info) call psb_cd_reinit(desc_ac,info) + if (do_timings) call psb_toc(idx_phase2) + if (do_timings) call psb_tic(idx_phase3) ! ! Build the smoothed prolongator using either A or Af ! acsr1 = (I-w*D*A) Prol acsr1 = (I-w*D*Af) Prol @@ -279,8 +305,8 @@ subroutine amg_zaggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& call psb_errpush(psb_err_from_subroutine_,name,a_err='spspmm 1') goto 9999 end if - - + if (do_timings) call psb_toc(idx_phase3) + if (do_timings) call psb_tic(idx_ptap) if (debug_level >= psb_debug_outer_) & & write(debug_unit,*) me,' ',trim(name),& & 'Done SPSPMM 1' @@ -292,7 +318,7 @@ subroutine amg_zaggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& call op_prol%mv_from(coo_prol) call op_restr%mv_from(coo_restr) - + if (do_timings) call psb_toc(idx_ptap) if (debug_level >= psb_debug_outer_) & & write(debug_unit,*) me,' ',trim(name),& & 'Done smooth_aggregate ' diff --git a/amgprec/impl/level/amg_c_base_onelev_mat_asb.f90 b/amgprec/impl/level/amg_c_base_onelev_mat_asb.f90 index e79c90c9..27896806 100644 --- a/amgprec/impl/level/amg_c_base_onelev_mat_asb.f90 +++ b/amgprec/impl/level/amg_c_base_onelev_mat_asb.f90 @@ -109,6 +109,8 @@ subroutine amg_c_base_onelev_mat_asb(lv,a,desc_a,ilaggr,nlaggr,t_prol,info) type(psb_cspmat_type) :: ac, op_restr, op_prol integer(psb_ipk_) :: nzl, inl integer(psb_ipk_) :: debug_level, debug_unit + integer(psb_ipk_), save :: idx_matbld=-1, idx_matasb=-1, idx_mapbld=-1 + logical, parameter :: do_timings=.false. name='amg_c_onelev_mat_asb' call psb_erractionsave(err_act) @@ -120,6 +122,12 @@ subroutine amg_c_base_onelev_mat_asb(lv,a,desc_a,ilaggr,nlaggr,t_prol,info) info = psb_success_ ctxt = desc_a%get_context() call psb_info(ctxt,me,np) + if ((do_timings).and.(idx_matbld==-1)) & + & idx_matbld = psb_get_timer_idx("LEV_MASB: mat_bld") + if ((do_timings).and.(idx_matasb==-1)) & + & idx_matasb = psb_get_timer_idx("LEV_MASB: mat_asb") + if ((do_timings).and.(idx_mapbld==-1)) & + & idx_mapbld = psb_get_timer_idx("LEV_MASB: map_bld") call amg_check_def(lv%parms%aggr_prol,'Smoother',& & amg_smooth_prol_,is_legal_ml_aggr_prol) @@ -139,9 +147,10 @@ subroutine amg_c_base_onelev_mat_asb(lv,a,desc_a,ilaggr,nlaggr,t_prol,info) ! the mapping defined by amg_aggrmap_bld and applying the aggregation ! algorithm specified by lv%iprcparm(amg_aggr_prol_) ! + if (do_timings) call psb_tic(idx_matbld) call lv%aggr%mat_bld(lv%parms,a,desc_a,ilaggr,nlaggr,& & lv%ac,lv%desc_ac,op_prol,op_restr,t_prol,info) - + if (do_timings) call psb_toc(idx_matbld) if(info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_,name,a_err='amg_aggrmat_asb') goto 9999 @@ -151,14 +160,17 @@ subroutine amg_c_base_onelev_mat_asb(lv,a,desc_a,ilaggr,nlaggr,t_prol,info) ! Now build its descriptor and convert global indices for ! ac, op_restr and op_prol ! + if (do_timings) call psb_tic(idx_matasb) if (info == psb_success_) & & call lv%aggr%mat_asb(lv%parms,a,desc_a,& & lv%ac,lv%desc_ac,op_prol,op_restr,info) - + if (do_timings) call psb_toc(idx_matasb) + if (do_timings) call psb_tic(idx_mapbld) if (info == psb_success_) call lv%ac%cscnv(info,type='csr',dupl=psb_dupl_add_) if (info == psb_success_) call lv%aggr%bld_map(desc_a, lv%desc_ac,& & ilaggr,nlaggr,op_restr,op_prol,lv%linmap,info) + if (do_timings) call psb_toc(idx_mapbld) if(info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_,name,a_err='mat_asb/map_bld') goto 9999 diff --git a/amgprec/impl/level/amg_d_base_onelev_mat_asb.f90 b/amgprec/impl/level/amg_d_base_onelev_mat_asb.f90 index e9e55a9a..6bd4e1ac 100644 --- a/amgprec/impl/level/amg_d_base_onelev_mat_asb.f90 +++ b/amgprec/impl/level/amg_d_base_onelev_mat_asb.f90 @@ -109,6 +109,8 @@ subroutine amg_d_base_onelev_mat_asb(lv,a,desc_a,ilaggr,nlaggr,t_prol,info) type(psb_dspmat_type) :: ac, op_restr, op_prol integer(psb_ipk_) :: nzl, inl integer(psb_ipk_) :: debug_level, debug_unit + integer(psb_ipk_), save :: idx_matbld=-1, idx_matasb=-1, idx_mapbld=-1 + logical, parameter :: do_timings=.false. name='amg_d_onelev_mat_asb' call psb_erractionsave(err_act) @@ -120,6 +122,12 @@ subroutine amg_d_base_onelev_mat_asb(lv,a,desc_a,ilaggr,nlaggr,t_prol,info) info = psb_success_ ctxt = desc_a%get_context() call psb_info(ctxt,me,np) + if ((do_timings).and.(idx_matbld==-1)) & + & idx_matbld = psb_get_timer_idx("LEV_MASB: mat_bld") + if ((do_timings).and.(idx_matasb==-1)) & + & idx_matasb = psb_get_timer_idx("LEV_MASB: mat_asb") + if ((do_timings).and.(idx_mapbld==-1)) & + & idx_mapbld = psb_get_timer_idx("LEV_MASB: map_bld") call amg_check_def(lv%parms%aggr_prol,'Smoother',& & amg_smooth_prol_,is_legal_ml_aggr_prol) @@ -139,9 +147,10 @@ subroutine amg_d_base_onelev_mat_asb(lv,a,desc_a,ilaggr,nlaggr,t_prol,info) ! the mapping defined by amg_aggrmap_bld and applying the aggregation ! algorithm specified by lv%iprcparm(amg_aggr_prol_) ! + if (do_timings) call psb_tic(idx_matbld) call lv%aggr%mat_bld(lv%parms,a,desc_a,ilaggr,nlaggr,& & lv%ac,lv%desc_ac,op_prol,op_restr,t_prol,info) - + if (do_timings) call psb_toc(idx_matbld) if(info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_,name,a_err='amg_aggrmat_asb') goto 9999 @@ -151,14 +160,17 @@ subroutine amg_d_base_onelev_mat_asb(lv,a,desc_a,ilaggr,nlaggr,t_prol,info) ! Now build its descriptor and convert global indices for ! ac, op_restr and op_prol ! + if (do_timings) call psb_tic(idx_matasb) if (info == psb_success_) & & call lv%aggr%mat_asb(lv%parms,a,desc_a,& & lv%ac,lv%desc_ac,op_prol,op_restr,info) - + if (do_timings) call psb_toc(idx_matasb) + if (do_timings) call psb_tic(idx_mapbld) if (info == psb_success_) call lv%ac%cscnv(info,type='csr',dupl=psb_dupl_add_) if (info == psb_success_) call lv%aggr%bld_map(desc_a, lv%desc_ac,& & ilaggr,nlaggr,op_restr,op_prol,lv%linmap,info) + if (do_timings) call psb_toc(idx_mapbld) if(info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_,name,a_err='mat_asb/map_bld') goto 9999 diff --git a/amgprec/impl/level/amg_s_base_onelev_mat_asb.f90 b/amgprec/impl/level/amg_s_base_onelev_mat_asb.f90 index 271b31d0..034151d3 100644 --- a/amgprec/impl/level/amg_s_base_onelev_mat_asb.f90 +++ b/amgprec/impl/level/amg_s_base_onelev_mat_asb.f90 @@ -109,6 +109,8 @@ subroutine amg_s_base_onelev_mat_asb(lv,a,desc_a,ilaggr,nlaggr,t_prol,info) type(psb_sspmat_type) :: ac, op_restr, op_prol integer(psb_ipk_) :: nzl, inl integer(psb_ipk_) :: debug_level, debug_unit + integer(psb_ipk_), save :: idx_matbld=-1, idx_matasb=-1, idx_mapbld=-1 + logical, parameter :: do_timings=.false. name='amg_s_onelev_mat_asb' call psb_erractionsave(err_act) @@ -120,6 +122,12 @@ subroutine amg_s_base_onelev_mat_asb(lv,a,desc_a,ilaggr,nlaggr,t_prol,info) info = psb_success_ ctxt = desc_a%get_context() call psb_info(ctxt,me,np) + if ((do_timings).and.(idx_matbld==-1)) & + & idx_matbld = psb_get_timer_idx("LEV_MASB: mat_bld") + if ((do_timings).and.(idx_matasb==-1)) & + & idx_matasb = psb_get_timer_idx("LEV_MASB: mat_asb") + if ((do_timings).and.(idx_mapbld==-1)) & + & idx_mapbld = psb_get_timer_idx("LEV_MASB: map_bld") call amg_check_def(lv%parms%aggr_prol,'Smoother',& & amg_smooth_prol_,is_legal_ml_aggr_prol) @@ -139,9 +147,10 @@ subroutine amg_s_base_onelev_mat_asb(lv,a,desc_a,ilaggr,nlaggr,t_prol,info) ! the mapping defined by amg_aggrmap_bld and applying the aggregation ! algorithm specified by lv%iprcparm(amg_aggr_prol_) ! + if (do_timings) call psb_tic(idx_matbld) call lv%aggr%mat_bld(lv%parms,a,desc_a,ilaggr,nlaggr,& & lv%ac,lv%desc_ac,op_prol,op_restr,t_prol,info) - + if (do_timings) call psb_toc(idx_matbld) if(info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_,name,a_err='amg_aggrmat_asb') goto 9999 @@ -151,14 +160,17 @@ subroutine amg_s_base_onelev_mat_asb(lv,a,desc_a,ilaggr,nlaggr,t_prol,info) ! Now build its descriptor and convert global indices for ! ac, op_restr and op_prol ! + if (do_timings) call psb_tic(idx_matasb) if (info == psb_success_) & & call lv%aggr%mat_asb(lv%parms,a,desc_a,& & lv%ac,lv%desc_ac,op_prol,op_restr,info) - + if (do_timings) call psb_toc(idx_matasb) + if (do_timings) call psb_tic(idx_mapbld) if (info == psb_success_) call lv%ac%cscnv(info,type='csr',dupl=psb_dupl_add_) if (info == psb_success_) call lv%aggr%bld_map(desc_a, lv%desc_ac,& & ilaggr,nlaggr,op_restr,op_prol,lv%linmap,info) + if (do_timings) call psb_toc(idx_mapbld) if(info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_,name,a_err='mat_asb/map_bld') goto 9999 diff --git a/amgprec/impl/level/amg_z_base_onelev_mat_asb.f90 b/amgprec/impl/level/amg_z_base_onelev_mat_asb.f90 index 07ab3e0b..eb11cad2 100644 --- a/amgprec/impl/level/amg_z_base_onelev_mat_asb.f90 +++ b/amgprec/impl/level/amg_z_base_onelev_mat_asb.f90 @@ -109,6 +109,8 @@ subroutine amg_z_base_onelev_mat_asb(lv,a,desc_a,ilaggr,nlaggr,t_prol,info) type(psb_zspmat_type) :: ac, op_restr, op_prol integer(psb_ipk_) :: nzl, inl integer(psb_ipk_) :: debug_level, debug_unit + integer(psb_ipk_), save :: idx_matbld=-1, idx_matasb=-1, idx_mapbld=-1 + logical, parameter :: do_timings=.false. name='amg_z_onelev_mat_asb' call psb_erractionsave(err_act) @@ -120,6 +122,12 @@ subroutine amg_z_base_onelev_mat_asb(lv,a,desc_a,ilaggr,nlaggr,t_prol,info) info = psb_success_ ctxt = desc_a%get_context() call psb_info(ctxt,me,np) + if ((do_timings).and.(idx_matbld==-1)) & + & idx_matbld = psb_get_timer_idx("LEV_MASB: mat_bld") + if ((do_timings).and.(idx_matasb==-1)) & + & idx_matasb = psb_get_timer_idx("LEV_MASB: mat_asb") + if ((do_timings).and.(idx_mapbld==-1)) & + & idx_mapbld = psb_get_timer_idx("LEV_MASB: map_bld") call amg_check_def(lv%parms%aggr_prol,'Smoother',& & amg_smooth_prol_,is_legal_ml_aggr_prol) @@ -139,9 +147,10 @@ subroutine amg_z_base_onelev_mat_asb(lv,a,desc_a,ilaggr,nlaggr,t_prol,info) ! the mapping defined by amg_aggrmap_bld and applying the aggregation ! algorithm specified by lv%iprcparm(amg_aggr_prol_) ! + if (do_timings) call psb_tic(idx_matbld) call lv%aggr%mat_bld(lv%parms,a,desc_a,ilaggr,nlaggr,& & lv%ac,lv%desc_ac,op_prol,op_restr,t_prol,info) - + if (do_timings) call psb_toc(idx_matbld) if(info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_,name,a_err='amg_aggrmat_asb') goto 9999 @@ -151,14 +160,17 @@ subroutine amg_z_base_onelev_mat_asb(lv,a,desc_a,ilaggr,nlaggr,t_prol,info) ! Now build its descriptor and convert global indices for ! ac, op_restr and op_prol ! + if (do_timings) call psb_tic(idx_matasb) if (info == psb_success_) & & call lv%aggr%mat_asb(lv%parms,a,desc_a,& & lv%ac,lv%desc_ac,op_prol,op_restr,info) - + if (do_timings) call psb_toc(idx_matasb) + if (do_timings) call psb_tic(idx_mapbld) if (info == psb_success_) call lv%ac%cscnv(info,type='csr',dupl=psb_dupl_add_) if (info == psb_success_) call lv%aggr%bld_map(desc_a, lv%desc_ac,& & ilaggr,nlaggr,op_restr,op_prol,lv%linmap,info) + if (do_timings) call psb_toc(idx_mapbld) if(info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_,name,a_err='mat_asb/map_bld') goto 9999