From a2788bdf0ba1a4b6e4bf3bb447b49da76d4db931 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 7 Nov 2023 13:39:44 +0100 Subject: [PATCH] New version with ND product --- base/psblas/psb_cspmm.f90 | 23 ++++++++++++++----- base/psblas/psb_dspmm.f90 | 44 ++++++++++++++++++++++++++----------- base/psblas/psb_sspmm.f90 | 23 ++++++++++++++----- base/psblas/psb_zspmm.f90 | 23 ++++++++++++++----- base/tools/psb_cspasb.f90 | 2 +- base/tools/psb_sspasb.f90 | 2 +- base/tools/psb_zspasb.f90 | 2 +- test/pargen/psb_d_pde3d.F90 | 4 ++-- test/pargen/runs/ppde.inp | 2 +- 9 files changed, 88 insertions(+), 37 deletions(-) diff --git a/base/psblas/psb_cspmm.f90 b/base/psblas/psb_cspmm.f90 index 84d8a7d8..25a6bc56 100644 --- a/base/psblas/psb_cspmm.f90 +++ b/base/psblas/psb_cspmm.f90 @@ -180,12 +180,23 @@ subroutine psb_cspmv_vect(alpha,a,x,beta,y,desc_a,info,& ! Matrix is not transposed if (allocated(a%ad)) then - if (doswap_) call psi_swapdata(psb_swap_send_,& - & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - call a%ad%spmm(alpha,x%v,beta,y%v,info) - if (doswap_) call psi_swapdata(psb_swap_recv_,& - & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - call a%and%spmm(alpha,x%v,cone,y%v,info) + block + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + if (do_timings) call psb_barrier(ctxt) + if (do_timings) t1= psb_wtime() + if (doswap_) call psi_swapdata(psb_swap_send_,& + & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) t2= psb_wtime() + call a%ad%spmm(alpha,x%v,beta,y%v,info) + if (do_timings) t3= psb_wtime() + if (doswap_) call psi_swapdata(psb_swap_recv_,& + & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) t4= psb_wtime() + call a%and%spmm(alpha,x%v,cone,y%v,info) + if (do_timings) t5= psb_wtime() + if (do_timings) write(0,*) me,' SPMM:',t2-t1,t3-t2,t4-t3,t5-t4 + end block else if (doswap_) then diff --git a/base/psblas/psb_dspmm.f90 b/base/psblas/psb_dspmm.f90 index d5897f82..7888188a 100644 --- a/base/psblas/psb_dspmm.f90 +++ b/base/psblas/psb_dspmm.f90 @@ -180,22 +180,40 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,& ! Matrix is not transposed if (allocated(a%ad)) then - if (doswap_) call psi_swapdata(psb_swap_send_,& - & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - call a%ad%spmm(alpha,x%v,beta,y%v,info) - if (doswap_) call psi_swapdata(psb_swap_recv_,& - & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - call a%and%spmm(alpha,x%v,done,y%v,info) + block + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + if (do_timings) call psb_barrier(ctxt) + if (do_timings) t1= psb_wtime() + if (doswap_) call psi_swapdata(psb_swap_send_,& + & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) t2= psb_wtime() + call a%ad%spmm(alpha,x%v,beta,y%v,info) + if (do_timings) t3= psb_wtime() + if (doswap_) call psi_swapdata(psb_swap_recv_,& + & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) t4= psb_wtime() + call a%and%spmm(alpha,x%v,done,y%v,info) + if (do_timings) t5= psb_wtime() + if (do_timings) write(0,*) me,' SPMM:',t2-t1,t3-t2,t4-t3,t5-t4 + end block else - if (doswap_) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& - & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + block + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + if (do_timings) call psb_barrier(ctxt) + if (do_timings) t1= psb_wtime() + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + if (do_timings) t2= psb_wtime() + call psb_csmm(alpha,a,x,beta,y,info) + if (do_timings) t3= psb_wtime() + if (do_timings) write(0,*) me,' SPMM:',t2-t1,t3-t2 + end block end if - - call psb_csmm(alpha,a,x,beta,y,info) - - end if if(info /= psb_success_) then info = psb_err_from_subroutine_non_ diff --git a/base/psblas/psb_sspmm.f90 b/base/psblas/psb_sspmm.f90 index 7c1e0ab3..cf8919f0 100644 --- a/base/psblas/psb_sspmm.f90 +++ b/base/psblas/psb_sspmm.f90 @@ -180,12 +180,23 @@ subroutine psb_sspmv_vect(alpha,a,x,beta,y,desc_a,info,& ! Matrix is not transposed if (allocated(a%ad)) then - if (doswap_) call psi_swapdata(psb_swap_send_,& - & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - call a%ad%spmm(alpha,x%v,beta,y%v,info) - if (doswap_) call psi_swapdata(psb_swap_recv_,& - & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - call a%and%spmm(alpha,x%v,sone,y%v,info) + block + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + if (do_timings) call psb_barrier(ctxt) + if (do_timings) t1= psb_wtime() + if (doswap_) call psi_swapdata(psb_swap_send_,& + & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) t2= psb_wtime() + call a%ad%spmm(alpha,x%v,beta,y%v,info) + if (do_timings) t3= psb_wtime() + if (doswap_) call psi_swapdata(psb_swap_recv_,& + & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) t4= psb_wtime() + call a%and%spmm(alpha,x%v,sone,y%v,info) + if (do_timings) t5= psb_wtime() + if (do_timings) write(0,*) me,' SPMM:',t2-t1,t3-t2,t4-t3,t5-t4 + end block else if (doswap_) then diff --git a/base/psblas/psb_zspmm.f90 b/base/psblas/psb_zspmm.f90 index 4dc73f83..629fcf2b 100644 --- a/base/psblas/psb_zspmm.f90 +++ b/base/psblas/psb_zspmm.f90 @@ -180,12 +180,23 @@ subroutine psb_zspmv_vect(alpha,a,x,beta,y,desc_a,info,& ! Matrix is not transposed if (allocated(a%ad)) then - if (doswap_) call psi_swapdata(psb_swap_send_,& - & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - call a%ad%spmm(alpha,x%v,beta,y%v,info) - if (doswap_) call psi_swapdata(psb_swap_recv_,& - & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) - call a%and%spmm(alpha,x%v,zone,y%v,info) + block + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + if (do_timings) call psb_barrier(ctxt) + if (do_timings) t1= psb_wtime() + if (doswap_) call psi_swapdata(psb_swap_send_,& + & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) t2= psb_wtime() + call a%ad%spmm(alpha,x%v,beta,y%v,info) + if (do_timings) t3= psb_wtime() + if (doswap_) call psi_swapdata(psb_swap_recv_,& + & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) t4= psb_wtime() + call a%and%spmm(alpha,x%v,zone,y%v,info) + if (do_timings) t5= psb_wtime() + if (do_timings) write(0,*) me,' SPMM:',t2-t1,t3-t2,t4-t3,t5-t4 + end block else if (doswap_) then diff --git a/base/tools/psb_cspasb.f90 b/base/tools/psb_cspasb.f90 index 46258139..8263e309 100644 --- a/base/tools/psb_cspasb.f90 +++ b/base/tools/psb_cspasb.f90 @@ -183,7 +183,7 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold, bld_and) type(psb_c_coo_sparse_mat) :: acoo type(psb_c_csr_sparse_mat), allocatable :: aclip type(psb_c_ecsr_sparse_mat), allocatable :: andclip - logical, parameter :: use_ecsr=.false. + logical, parameter :: use_ecsr=.true. allocate(aclip) call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) allocate(a%ad,mold=a%a) diff --git a/base/tools/psb_sspasb.f90 b/base/tools/psb_sspasb.f90 index 0edae30e..f273c7f4 100644 --- a/base/tools/psb_sspasb.f90 +++ b/base/tools/psb_sspasb.f90 @@ -183,7 +183,7 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold, bld_and) type(psb_s_coo_sparse_mat) :: acoo type(psb_s_csr_sparse_mat), allocatable :: aclip type(psb_s_ecsr_sparse_mat), allocatable :: andclip - logical, parameter :: use_ecsr=.false. + logical, parameter :: use_ecsr=.true. allocate(aclip) call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) allocate(a%ad,mold=a%a) diff --git a/base/tools/psb_zspasb.f90 b/base/tools/psb_zspasb.f90 index cd77de15..1a381303 100644 --- a/base/tools/psb_zspasb.f90 +++ b/base/tools/psb_zspasb.f90 @@ -183,7 +183,7 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold, bld_and) type(psb_z_coo_sparse_mat) :: acoo type(psb_z_csr_sparse_mat), allocatable :: aclip type(psb_z_ecsr_sparse_mat), allocatable :: andclip - logical, parameter :: use_ecsr=.false. + logical, parameter :: use_ecsr=.true. allocate(aclip) call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) allocate(a%ad,mold=a%a) diff --git a/test/pargen/psb_d_pde3d.F90 b/test/pargen/psb_d_pde3d.F90 index cd503d29..e802736e 100644 --- a/test/pargen/psb_d_pde3d.F90 +++ b/test/pargen/psb_d_pde3d.F90 @@ -680,9 +680,9 @@ contains t1 = psb_wtime() if (info == psb_success_) then if (present(amold)) then - call psb_spasb(a,desc_a,info,mold=amold,bld_and=.true.) + call psb_spasb(a,desc_a,info,mold=amold,bld_and=.false.) else - call psb_spasb(a,desc_a,info,afmt=afmt,bld_and=.true.) + call psb_spasb(a,desc_a,info,afmt=afmt,bld_and=.false.) end if end if call psb_barrier(ctxt) diff --git a/test/pargen/runs/ppde.inp b/test/pargen/runs/ppde.inp index c70a973f..44dac085 100644 --- a/test/pargen/runs/ppde.inp +++ b/test/pargen/runs/ppde.inp @@ -5,7 +5,7 @@ CSR Storage format for matrix A: CSR COO 200 Domain size (acutal system is this**3 (pde3d) or **2 (pde2d) ) 3 Partition: 1 BLOCK 3 3D 2 Stopping criterion 1 2 -0300 MAXIT +0008 MAXIT 10 ITRACE 002 IRST restart for RGMRES and BiCGSTABL ILU Block Solver ILU,ILUT,INVK,AINVT,AORTH