From e83bde6896431443f2d9f89c9ef1fbf0522996eb Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 17 Apr 2024 14:45:20 +0200 Subject: [PATCH] New timings --- amgprec/amg_base_prec_type.F90 | 3 + amgprec/amg_d_matchboxp_mod.f90 | 6 +- .../amg_d_parmatch_aggregator_tprol.F90 | 2 +- .../amg_d_parmatch_spmm_bld_inner.F90 | 2 +- amgprec/impl/aggregator/amg_d_ptap_bld.f90 | 2 +- .../impl/aggregator/amg_d_soc2_map_bld.F90 | 2 +- amgprec/impl/amg_dmlprec_aply.f90 | 34 +++++++++- .../amg_d_poly_smoother_apply_vect.f90 | 66 ++++++++++++++++++- .../impl/smoother/amg_d_poly_smoother_bld.f90 | 6 +- .../smoother/amg_d_poly_smoother_cseti.f90 | 2 +- 10 files changed, 112 insertions(+), 13 deletions(-) diff --git a/amgprec/amg_base_prec_type.F90 b/amgprec/amg_base_prec_type.F90 index 3434d675..60aacaec 100644 --- a/amgprec/amg_base_prec_type.F90 +++ b/amgprec/amg_base_prec_type.F90 @@ -326,6 +326,7 @@ module amg_base_prec_type integer(psb_ipk_), parameter :: amg_poly_lottes_ = 0 integer(psb_ipk_), parameter :: amg_poly_lottes_beta_ = 1 integer(psb_ipk_), parameter :: amg_poly_new_ = 2 + integer(psb_ipk_), parameter :: amg_poly_dbg_ = 8 integer(psb_ipk_), parameter :: amg_poly_rho_est_power_ = 0 @@ -575,6 +576,8 @@ contains val = amg_poly_lottes_beta_ case('POLY_NEW') val = amg_poly_new_ + case('POLY_DBG') + val = amg_poly_dbg_ case('POLY_RHO_EST_POWER') val = amg_poly_rho_est_power_ case('A_NORMI') diff --git a/amgprec/amg_d_matchboxp_mod.f90 b/amgprec/amg_d_matchboxp_mod.f90 index e19ce617..8b9c3bb6 100644 --- a/amgprec/amg_d_matchboxp_mod.f90 +++ b/amgprec/amg_d_matchboxp_mod.f90 @@ -145,7 +145,7 @@ contains logical, parameter :: dump=.false., debug=.false., dump_mate=.false., & & debug_ilaggr=.false., debug_sync=.false., debug_mate=.false. integer(psb_ipk_), save :: idx_bldmtc=-1, idx_phase1=-1, idx_phase2=-1, idx_phase3=-1 - logical, parameter :: do_timings=.true. + logical, parameter :: do_timings=.false. integer, parameter :: ilaggr_neginit=-1, ilaggr_nonlocal=-2 ictxt = desc_a%get_ctxt() @@ -608,7 +608,7 @@ contains logical, parameter :: old_style=.false., sort_minp=.true. character(len=40) :: name='build_matching', fname integer(psb_ipk_), save :: idx_cmboxp=-1, idx_bldahat=-1, idx_phase2=-1, idx_phase3=-1 - logical, parameter :: do_timings=.true. + logical, parameter :: do_timings=.false. ictxt = desc_a%get_ctxt() call psb_info(ictxt,iam,np) @@ -810,7 +810,7 @@ contains character(len=80) :: aname real(psb_dpk_), parameter :: eps=epsilon(1.d0) integer(psb_ipk_), save :: idx_glbt=-1, idx_phase1=-1, idx_phase2=-1 - logical, parameter :: do_timings=.true. + logical, parameter :: do_timings=.false. logical, parameter :: debug_symmetry = .false., check_size=.false. logical, parameter :: unroll_logtrans=.false. diff --git a/amgprec/impl/aggregator/amg_d_parmatch_aggregator_tprol.F90 b/amgprec/impl/aggregator/amg_d_parmatch_aggregator_tprol.F90 index f23869b7..3187aa70 100644 --- a/amgprec/impl/aggregator/amg_d_parmatch_aggregator_tprol.F90 +++ b/amgprec/impl/aggregator/amg_d_parmatch_aggregator_tprol.F90 @@ -88,7 +88,7 @@ subroutine amg_d_parmatch_aggregator_build_tprol(ag,parms,ag_data,& type(psb_ldspmat_type) :: tmp_prol, tmp_pg, tmp_restr type(psb_desc_type) :: tmp_desc_ac, tmp_desc_ax, tmp_desc_p integer(psb_ipk_), save :: idx_mboxp=-1, idx_spmmbld=-1, idx_sweeps_mult=-1 - logical, parameter :: dump=.false., do_timings=.true., debug=.false., & + logical, parameter :: dump=.false., do_timings=.false., debug=.false., & & dump_prol_restr=.false. name='d_parmatch_tprol' diff --git a/amgprec/impl/aggregator/amg_d_parmatch_spmm_bld_inner.F90 b/amgprec/impl/aggregator/amg_d_parmatch_spmm_bld_inner.F90 index 04d89b2f..aa60fe20 100644 --- a/amgprec/impl/aggregator/amg_d_parmatch_spmm_bld_inner.F90 +++ b/amgprec/impl/aggregator/amg_d_parmatch_spmm_bld_inner.F90 @@ -131,7 +131,7 @@ subroutine amg_d_parmatch_spmm_bld_inner(a_csr,desc_a,ilaggr,nlaggr,parms,& & nzt, naggrm1, naggrp1, i, k integer(psb_lpk_), allocatable :: ia(:),ja(:) !integer(psb_lpk_) :: nrsave, ncsave, nzsave, nza, nrpsave, ncpsave, nzpsave - logical, parameter :: do_timings=.true., oldstyle=.false., debug=.false. + logical, parameter :: do_timings=.false., oldstyle=.false., debug=.false. integer(psb_ipk_), save :: idx_spspmm=-1, idx_prolcnv=-1, idx_proltrans=-1, idx_asb=-1 name='amg_parmatch_spmm_bld_inner' diff --git a/amgprec/impl/aggregator/amg_d_ptap_bld.f90 b/amgprec/impl/aggregator/amg_d_ptap_bld.f90 index 4006c04c..4ca7d444 100644 --- a/amgprec/impl/aggregator/amg_d_ptap_bld.f90 +++ b/amgprec/impl/aggregator/amg_d_ptap_bld.f90 @@ -486,7 +486,7 @@ subroutine amg_ld_ptap_bld(a_csr,desc_a,nlaggr,parms,ac,& integer(psb_lpk_) :: nrow, nglob, ncol, ntaggr, nrl, nzl, ip, & & nzt, naggrm1, naggrp1, i, k integer(psb_lpk_) :: nrsave, ncsave, nzsave, nza - logical, parameter :: do_timings=.true., oldstyle=.false., debug=.false. + logical, parameter :: do_timings=.false., oldstyle=.false., debug=.false. integer(psb_ipk_), save :: idx_spspmm=-1 name='amg_ptap_bld' diff --git a/amgprec/impl/aggregator/amg_d_soc2_map_bld.F90 b/amgprec/impl/aggregator/amg_d_soc2_map_bld.F90 index 345cd1ad..82972fda 100644 --- a/amgprec/impl/aggregator/amg_d_soc2_map_bld.F90 +++ b/amgprec/impl/aggregator/amg_d_soc2_map_bld.F90 @@ -104,7 +104,7 @@ subroutine amg_d_soc2_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in character(len=20) :: name, ch_err integer(psb_ipk_), save :: idx_soc2_p1=-1, idx_soc2_p2=-1, idx_soc2_p3=-1 integer(psb_ipk_), save :: idx_soc2_p0=-1 - logical, parameter :: do_timings=.true. + logical, parameter :: do_timings=.false. info=psb_success_ name = 'amg_soc2_map_bld' diff --git a/amgprec/impl/amg_dmlprec_aply.f90 b/amgprec/impl/amg_dmlprec_aply.f90 index 983fb937..33a64c31 100644 --- a/amgprec/impl/amg_dmlprec_aply.f90 +++ b/amgprec/impl/amg_dmlprec_aply.f90 @@ -591,6 +591,8 @@ contains integer(psb_ipk_) :: nlev, ilev, sweeps logical :: pre, post character(len=20) :: name + logical, parameter :: do_timings=.true. + integer(psb_ipk_), save :: ml_mlt_smth=-1, ml_mlt_rp=-1, ml_mlt_rsd=-1 name = 'inner_inner_mult' info = psb_success_ @@ -608,6 +610,12 @@ contains if(debug_level > 1) then write(debug_unit,*) me,' inner_mult at level ',level end if + if ((do_timings).and.(ml_mlt_smth==-1)) & + & ml_mlt_smth = psb_get_timer_idx("ML-MLT: smoother ") + if ((do_timings).and.(ml_mlt_rp==-1)) & + & ml_mlt_rp = psb_get_timer_idx("ML-MLT: RestProl") + if ((do_timings).and.(ml_mlt_rsd==-1)) & + & ml_mlt_rsd = psb_get_timer_idx("ML-MLT: Residual") sweeps_post = p%precv(level)%parms%sweeps_post sweeps_pre = p%precv(level)%parms%sweeps_pre @@ -623,7 +631,7 @@ contains ! Apply the first smoother ! The residual has been prepared before the recursive call. ! - + if (do_timings) call psb_tic(ml_mlt_smth) if (pre) then if (me >=0) then !!$ write(0,*) me,'Applying smoother pre ', level @@ -646,10 +654,13 @@ contains end if end if endif + if (do_timings) call psb_toc(ml_mlt_smth) + ! ! Compute the residual for next level and call recursively ! if (pre) then + if (do_timings) call psb_tic(ml_mlt_rsd) call psb_geaxpby(done,vx2l,& & dzero,vty,& & base_desc,info) @@ -657,6 +668,9 @@ contains if (info == psb_success_) call psb_spmm(-done,base_a,& & vy2l,done,vty,& & base_desc,info,work=work,trans=trans) + if (do_timings) call psb_toc(ml_mlt_rsd) + if (do_timings) call psb_tic(ml_mlt_rp) + if (info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,& & a_err='Error during residue') @@ -671,7 +685,9 @@ contains & a_err='Error during restriction') goto 9999 end if + if (do_timings) call psb_toc(ml_mlt_rp) else + if (do_timings) call psb_tic(ml_mlt_rp) ! Shortcut: just transfer x2l. call p%precv(level+1)%map_rstr(done,vx2l,& & dzero,p%precv(level+1)%wrk%vx2l,& @@ -682,6 +698,7 @@ contains & a_err='Error during restriction') goto 9999 end if + if (do_timings) call psb_toc(ml_mlt_rp) endif call inner_ml_aply(level+1,p,trans,work,info) @@ -689,10 +706,12 @@ contains ! ! Apply the prolongator ! + if (do_timings) call psb_tic(ml_mlt_rp) call p%precv(level+1)%map_prol(done,& & p%precv(level+1)%wrk%vy2l,done,vy2l,& & info,work=work,& & vtx=p%precv(level+1)%wrk%wv(1),vty=wv(1)) + if (do_timings) call psb_toc(ml_mlt_rp) if (info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,& & a_err='Error during prolongation') @@ -700,7 +719,7 @@ contains end if if (p%precv(level)%parms%ml_cycle == amg_wcycle_ml_) then - + if (do_timings) call psb_tic(ml_mlt_rsd) if (me >=0) then call psb_geaxpby(done,vx2l, dzero,vty,& & base_desc,info) @@ -708,10 +727,13 @@ contains & vy2l,done,vty,& & base_desc,info,work=work,trans=trans) end if + if (do_timings) call psb_toc(ml_mlt_rsd) + if (do_timings) call psb_tic(ml_mlt_rp) if (info == psb_success_) & & call p%precv(level+1)%map_rstr(done,vty,& & dzero,p%precv(level+1)%wrk%vx2l,info,work=work,& & vtx=wv(1),vty=p%precv(level+1)%wrk%wv(1)) + if (do_timings) call psb_toc(ml_mlt_rp) if (info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,& & a_err='Error during W-cycle restriction') @@ -720,10 +742,12 @@ contains call inner_ml_aply(level+1,p,trans,work,info) + if (do_timings) call psb_tic(ml_mlt_rp) if (info == psb_success_) call p%precv(level+1)%map_prol(done, & & p%precv(level+1)%wrk%vy2l,done,vy2l,& & info,work=work,& & vtx=p%precv(level+1)%wrk%wv(1),vty=wv(1)) + if (do_timings) call psb_toc(ml_mlt_rp) if (info /= psb_success_) then call psb_errpush(psb_err_internal_error_,name,& @@ -736,6 +760,7 @@ contains if (post) then if (me >=0) then + if (do_timings) call psb_tic(ml_mlt_rsd) call psb_geaxpby(done,vx2l,& & dzero,vty,& & base_desc,info) @@ -747,7 +772,9 @@ contains & a_err='Error during residue') goto 9999 end if + if (do_timings) call psb_toc(ml_mlt_rsd) + if (do_timings) call psb_tic(ml_mlt_smth) ! ! Apply the second smoother ! @@ -762,6 +789,7 @@ contains & vty,done,vy2l, base_desc, trans,& & sweeps,work,wv,info,init='Z') end if + if (do_timings) call psb_toc(ml_mlt_smth) end if if (info /= psb_success_) then @@ -774,12 +802,14 @@ contains else if (level == nlev) then !!$ write(0,*) me,'Applying smoother at top level ',psb_errstatus_fatal() + if (do_timings) call psb_tic(ml_mlt_smth) if (me >=0) then sweeps = p%precv(level)%parms%sweeps_pre if (info == psb_success_) call p%precv(level)%sm%apply(done,& & vx2l,dzero,vy2l,base_desc, trans,& & sweeps,work,wv,info) end if + if (do_timings) call psb_toc(ml_mlt_smth) !!$ write(0,*) me,' Done applying smoother at top level ',psb_errstatus_fatal() else diff --git a/amgprec/impl/smoother/amg_d_poly_smoother_apply_vect.f90 b/amgprec/impl/smoother/amg_d_poly_smoother_apply_vect.f90 index 32926bd6..ac89c523 100644 --- a/amgprec/impl/smoother/amg_d_poly_smoother_apply_vect.f90 +++ b/amgprec/impl/smoother/amg_d_poly_smoother_apply_vect.f90 @@ -64,6 +64,9 @@ subroutine amg_d_poly_smoother_apply_vect(alpha,sm,x,beta,y,desc_data,trans,& character :: trans_, init_ real(psb_dpk_) :: res, resdenum character(len=20) :: name='d_poly_smoother_apply_v' + logical, parameter :: do_timings=.true. + integer(psb_ipk_), save :: poly_1=-1, poly_2=-1, poly_3=-1 + integer(psb_ipk_), save :: poly_mv=-1, poly_sv=-1, poly_vect=-1 call psb_erractionsave(err_act) @@ -93,6 +96,18 @@ subroutine amg_d_poly_smoother_apply_vect(alpha,sm,x,beta,y,desc_data,trans,& goto 9999 end if + if ((do_timings).and.(poly_1==-1)) & + & poly_1 = psb_get_timer_idx("POLY: Chebychev4") + if ((do_timings).and.(poly_2==-1)) & + & poly_2 = psb_get_timer_idx("POLY: OptChebychev4") + if ((do_timings).and.(poly_3==-1)) & + & poly_3 = psb_get_timer_idx("POLY: OptChebychev1") + if ((do_timings).and.(poly_mv==-1)) & + & poly_mv = psb_get_timer_idx("POLY: spMV") + if ((do_timings).and.(poly_vect==-1)) & + & poly_vect = psb_get_timer_idx("POLY: Vectors") + if ((do_timings).and.(poly_sv==-1)) & + & poly_sv = psb_get_timer_idx("POLY: solver") n_row = desc_data%get_local_rows() n_col = desc_data%get_local_cols() @@ -125,6 +140,7 @@ subroutine amg_d_poly_smoother_apply_vect(alpha,sm,x,beta,y,desc_data,trans,& select case(sm%variant) case(amg_poly_lottes_) + if (do_timings) call psb_tic(poly_1) block real(psb_dpk_) :: cz, cr ! b == x @@ -154,8 +170,10 @@ subroutine amg_d_poly_smoother_apply_vect(alpha,sm,x,beta,y,desc_data,trans,& ! x_k = x_{k-1} + z_k end do end block + if (do_timings) call psb_toc(poly_1) case(amg_poly_lottes_beta_) + if (do_timings) call psb_tic(poly_2) block real(psb_dpk_) :: cz, cr @@ -194,34 +212,51 @@ subroutine amg_d_poly_smoother_apply_vect(alpha,sm,x,beta,y,desc_data,trans,& ! x_k = x_{k-1} + z_k end do end block + if (do_timings) call psb_toc(poly_2) case(amg_poly_new_) + if (do_timings) call psb_tic(poly_3) + block real(psb_dpk_) :: sigma, theta, delta, rho_old, rho ! b == x ! x == tx ! - + sm%rho_ba = 1.12d0 + !write(0,*) 'Parameter: ',sm%cf_a,sm%rho_ba + theta = (done+sm%cf_a)/2 delta = (done-sm%cf_a)/2 sigma = theta/delta rho_old = done/sigma + if (do_timings) call psb_tic(poly_sv) call sm%sv%apply(done,r,dzero,ty,desc_data,trans_,aux,wv(5:),info,init='Z') + if (do_timings) call psb_toc(poly_sv) + if (do_timings) call psb_tic(poly_vect) call psb_geaxpby((done/sm%rho_ba),ty,dzero,r,desc_data,info) + !write(0,*) 'POLY_NEW Iteration',0,' :',psb_genrm2(r,desc_data,info) if (.false.) then call psb_geaxpby((done/theta),r,dzero,tz,desc_data,info) call psb_geaxpby(done,tz,done,tx,desc_data,info) else call psb_abgdxyz((done/theta),dzero,done,done,r,tz,tx,desc_data,info) end if + if (do_timings) call psb_toc(poly_vect) ! tz == d do i=1, sm%pdegree-1 ! ! ! r_{k-1} = r_k - (1/rho(BA)) B A d_k + if (do_timings) call psb_tic(poly_mv) call psb_spmm(done,sm%pa,tz,dzero,ty,desc_data,info,work=aux,trans=trans_) + if (do_timings) call psb_toc(poly_mv) + if (do_timings) call psb_tic(poly_sv) call sm%sv%apply(-(done/sm%rho_ba),ty,done,r,desc_data,trans_,aux,wv(5:),info,init='Z') + if (do_timings) call psb_toc(poly_sv) + if (do_timings) call psb_tic(poly_vect) + + !write(0,*) 'POLY_NEW Iteration',i,' :',psb_genrm2(r,desc_data,info) ! ! d_{k+1} = (rho rho_old) d_k + 2(rho/delta) r_{k+1} @@ -236,9 +271,36 @@ subroutine amg_d_poly_smoother_apply_vect(alpha,sm,x,beta,y,desc_data,trans,& !!$ write(0,*) 'Polynomial smoother NEW ',i,res ! x_k = x_{k-1} + z_k rho_old = rho + if (do_timings) call psb_toc(poly_vect) + end do + end block + if (do_timings) call psb_toc(poly_3) + + case(amg_poly_dbg_) + block + real(psb_dpk_) :: sigma, theta, delta, rho_old, rho + ! b == x + ! x == tx + ! + write(0,*) 'Parameter: ',sm%cf_a + theta = (done+sm%cf_a)/2 + delta = (done-sm%cf_a)/2 + sigma = theta/delta + rho_old = done/sigma + call sm%sv%apply(done,r,dzero,ty,desc_data,trans_,aux,wv(5:),info,init='Z') + call psb_geaxpby(done,ty,dzero,r,desc_data,info) + call psb_geaxpby(done/theta,r,dzero,tz,desc_data,info) + write(0,*) 'POLY_DBG Iteration',0,' :',psb_genrm2(r,desc_data,info) + do i=1, sm%pdegree + call psb_geaxpby(done,tz,done,tx,desc_data,info) + call psb_spmm(done,sm%pa,tz,dzero,ty,desc_data,info,work=aux,trans=trans_) + call sm%sv%apply(-(done),ty,done,r,desc_data,trans_,aux,wv(5:),info,init='Z') + write(0,*) 'POLY_DBG Iteration',i,' :',psb_genrm2(r,desc_data,info) + rho = done/(2*sigma - rho_old) + call psb_geaxpby((2*rho/delta),r,rho*rho_old,tz,desc_data,info) + rho_old = rho end do end block - case default info=psb_err_internal_error_ diff --git a/amgprec/impl/smoother/amg_d_poly_smoother_bld.f90 b/amgprec/impl/smoother/amg_d_poly_smoother_bld.f90 index dd156912..77b1aa3d 100644 --- a/amgprec/impl/smoother/amg_d_poly_smoother_bld.f90 +++ b/amgprec/impl/smoother/amg_d_poly_smoother_bld.f90 @@ -87,10 +87,14 @@ subroutine amg_d_poly_smoother_bld(a,desc_a,sm,info,amold,vmold,imold) & a_err='invalid sm%degree for poly_beta') goto 9999 end if - case(amg_poly_new_) + case(amg_poly_new_, amg_poly_dbg_) if ((1<=sm%pdegree).and.(sm%pdegree<=30)) then !Ok +!!$ write(0,*) 'Vector: ' +!!$ do i=1,size(amg_d_poly_a_vect) +!!$ write(0,*) i,amg_d_poly_a_vect(i) +!!$ end do sm%cf_a = amg_d_poly_a_vect(sm%pdegree) else info = psb_err_internal_error_ diff --git a/amgprec/impl/smoother/amg_d_poly_smoother_cseti.f90 b/amgprec/impl/smoother/amg_d_poly_smoother_cseti.f90 index 916fb5e6..5d8d1169 100644 --- a/amgprec/impl/smoother/amg_d_poly_smoother_cseti.f90 +++ b/amgprec/impl/smoother/amg_d_poly_smoother_cseti.f90 @@ -58,7 +58,7 @@ subroutine amg_d_poly_smoother_cseti(sm,what,val,info,idx) sm%pdegree = val case('POLY_VARIANT') select case(val) - case(amg_poly_lottes_,amg_poly_lottes_beta_,amg_poly_new_) + case(amg_poly_lottes_,amg_poly_lottes_beta_,amg_poly_new_,amg_poly_dbg_) sm%variant = val case default write(0,*) 'Invalid choice for POLY_VARIANT, defaulting to amg_poly_lottes_',val