From 00cc83cde8d03e85539ee06fbb3873ab80357a4f Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Wed, 18 Jan 2023 05:04:16 -0500 Subject: [PATCH] First version of AD/AND with memory duplication --- base/modules/serial/psb_c_mat_mod.F90 | 1 + base/modules/serial/psb_d_mat_mod.F90 | 1 + base/modules/serial/psb_s_mat_mod.F90 | 1 + base/modules/serial/psb_z_mat_mod.F90 | 1 + base/psblas/psb_cspmm.f90 | 21 ++++++++++++++----- base/psblas/psb_dspmm.f90 | 21 ++++++++++++++----- base/psblas/psb_sspmm.f90 | 21 ++++++++++++++----- base/psblas/psb_zspmm.f90 | 21 ++++++++++++++----- base/tools/psb_cspasb.f90 | 30 ++++++++++++++++++++++++++- base/tools/psb_dspasb.f90 | 30 ++++++++++++++++++++++++++- base/tools/psb_sspasb.f90 | 30 ++++++++++++++++++++++++++- base/tools/psb_zspasb.f90 | 30 ++++++++++++++++++++++++++- compile | 0 test/pargen/runs/ppde.inp | 2 +- 14 files changed, 185 insertions(+), 25 deletions(-) create mode 100644 compile diff --git a/base/modules/serial/psb_c_mat_mod.F90 b/base/modules/serial/psb_c_mat_mod.F90 index fd423de3..2e365858 100644 --- a/base/modules/serial/psb_c_mat_mod.F90 +++ b/base/modules/serial/psb_c_mat_mod.F90 @@ -85,6 +85,7 @@ module psb_c_mat_mod type :: psb_cspmat_type class(psb_c_base_sparse_mat), allocatable :: a + class(psb_c_base_sparse_mat), allocatable :: ad, and integer(psb_ipk_) :: remote_build=psb_matbld_noremote_ type(psb_lc_coo_sparse_mat), allocatable :: rmta diff --git a/base/modules/serial/psb_d_mat_mod.F90 b/base/modules/serial/psb_d_mat_mod.F90 index 8f967ce1..49a9545e 100644 --- a/base/modules/serial/psb_d_mat_mod.F90 +++ b/base/modules/serial/psb_d_mat_mod.F90 @@ -85,6 +85,7 @@ module psb_d_mat_mod type :: psb_dspmat_type class(psb_d_base_sparse_mat), allocatable :: a + class(psb_d_base_sparse_mat), allocatable :: ad, and integer(psb_ipk_) :: remote_build=psb_matbld_noremote_ type(psb_ld_coo_sparse_mat), allocatable :: rmta diff --git a/base/modules/serial/psb_s_mat_mod.F90 b/base/modules/serial/psb_s_mat_mod.F90 index 43f1c619..eb444249 100644 --- a/base/modules/serial/psb_s_mat_mod.F90 +++ b/base/modules/serial/psb_s_mat_mod.F90 @@ -85,6 +85,7 @@ module psb_s_mat_mod type :: psb_sspmat_type class(psb_s_base_sparse_mat), allocatable :: a + class(psb_s_base_sparse_mat), allocatable :: ad, and integer(psb_ipk_) :: remote_build=psb_matbld_noremote_ type(psb_ls_coo_sparse_mat), allocatable :: rmta diff --git a/base/modules/serial/psb_z_mat_mod.F90 b/base/modules/serial/psb_z_mat_mod.F90 index c534cad5..e70e48aa 100644 --- a/base/modules/serial/psb_z_mat_mod.F90 +++ b/base/modules/serial/psb_z_mat_mod.F90 @@ -85,6 +85,7 @@ module psb_z_mat_mod type :: psb_zspmat_type class(psb_z_base_sparse_mat), allocatable :: a + class(psb_z_base_sparse_mat), allocatable :: ad, and integer(psb_ipk_) :: remote_build=psb_matbld_noremote_ type(psb_lz_coo_sparse_mat), allocatable :: rmta diff --git a/base/psblas/psb_cspmm.f90 b/base/psblas/psb_cspmm.f90 index fd8a9c39..555461df 100644 --- a/base/psblas/psb_cspmm.f90 +++ b/base/psblas/psb_cspmm.f90 @@ -179,13 +179,24 @@ subroutine psb_cspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - if (doswap_) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + if (.true.) then + call psi_swapdata(psb_swap_send_,& & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + call a%ad%spmm(alpha,x%v,beta,y%v,info) + call psi_swapdata(psb_swap_recv_,& + & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + call a%and%spmm(alpha,x%v,cone,y%v,info) + + else + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + + call psb_csmm(alpha,a,x,beta,y,info) + end if - - call psb_csmm(alpha,a,x,beta,y,info) - + if(info /= psb_success_) then info = psb_err_from_subroutine_non_ call psb_errpush(info,name) diff --git a/base/psblas/psb_dspmm.f90 b/base/psblas/psb_dspmm.f90 index a006c7e9..be8a493f 100644 --- a/base/psblas/psb_dspmm.f90 +++ b/base/psblas/psb_dspmm.f90 @@ -179,13 +179,24 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - if (doswap_) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + if (.true.) then + call psi_swapdata(psb_swap_send_,& & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + call a%ad%spmm(alpha,x%v,beta,y%v,info) + call psi_swapdata(psb_swap_recv_,& + & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + call a%and%spmm(alpha,x%v,done,y%v,info) + + else + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + + call psb_csmm(alpha,a,x,beta,y,info) + end if - - call psb_csmm(alpha,a,x,beta,y,info) - + if(info /= psb_success_) then info = psb_err_from_subroutine_non_ call psb_errpush(info,name) diff --git a/base/psblas/psb_sspmm.f90 b/base/psblas/psb_sspmm.f90 index 43ee0d48..79bfbdd1 100644 --- a/base/psblas/psb_sspmm.f90 +++ b/base/psblas/psb_sspmm.f90 @@ -179,13 +179,24 @@ subroutine psb_sspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - if (doswap_) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + if (.true.) then + call psi_swapdata(psb_swap_send_,& & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + call a%ad%spmm(alpha,x%v,beta,y%v,info) + call psi_swapdata(psb_swap_recv_,& + & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + call a%and%spmm(alpha,x%v,sone,y%v,info) + + else + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + + call psb_csmm(alpha,a,x,beta,y,info) + end if - - call psb_csmm(alpha,a,x,beta,y,info) - + if(info /= psb_success_) then info = psb_err_from_subroutine_non_ call psb_errpush(info,name) diff --git a/base/psblas/psb_zspmm.f90 b/base/psblas/psb_zspmm.f90 index b58ca303..f248db8b 100644 --- a/base/psblas/psb_zspmm.f90 +++ b/base/psblas/psb_zspmm.f90 @@ -179,13 +179,24 @@ subroutine psb_zspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - if (doswap_) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + if (.true.) then + call psi_swapdata(psb_swap_send_,& & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + call a%ad%spmm(alpha,x%v,beta,y%v,info) + call psi_swapdata(psb_swap_recv_,& + & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + call a%and%spmm(alpha,x%v,zone,y%v,info) + + else + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + + call psb_csmm(alpha,a,x,beta,y,info) + end if - - call psb_csmm(alpha,a,x,beta,y,info) - + if(info /= psb_success_) then info = psb_err_from_subroutine_non_ call psb_errpush(info,name) diff --git a/base/tools/psb_cspasb.f90 b/base/tools/psb_cspasb.f90 index 0c5f14ab..ea7789f2 100644 --- a/base/tools/psb_cspasb.f90 +++ b/base/tools/psb_cspasb.f90 @@ -171,7 +171,35 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold) end if - + if (.true.) then + block + character(len=1024) :: fname + type(psb_c_coo_sparse_mat) :: acoo + type(psb_c_csr_sparse_mat), allocatable :: aclip, andclip + allocate(aclip,andclip) + call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) + call aclip%mv_from_coo(acoo,info) + call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) + call andclip%mv_from_coo(acoo,info) + call move_alloc(aclip,a%ad) + call move_alloc(andclip,a%and) + if (.false.) then + write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' + open(25,file=fname) + call a%ad%print(25) + close(25) + write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' + open(25,file=fname) + call a%and%print(25) + close(25) + !call andclip%set_cols(n_col) + write(*,*) me,' ',trim(name),' ad ',& + &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col + write(*,*) me,' ',trim(name),' and ',& + &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col + end if + end block + end if if (debug_level >= psb_debug_ext_) then ch_err=a%get_fmt() write(debug_unit, *) me,' ',trim(name),': From SPCNV',& diff --git a/base/tools/psb_dspasb.f90 b/base/tools/psb_dspasb.f90 index 3132f249..89ceef8d 100644 --- a/base/tools/psb_dspasb.f90 +++ b/base/tools/psb_dspasb.f90 @@ -171,7 +171,35 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold) end if - + if (.true.) then + block + character(len=1024) :: fname + type(psb_d_coo_sparse_mat) :: acoo + type(psb_d_csr_sparse_mat), allocatable :: aclip, andclip + allocate(aclip,andclip) + call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) + call aclip%mv_from_coo(acoo,info) + call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) + call andclip%mv_from_coo(acoo,info) + call move_alloc(aclip,a%ad) + call move_alloc(andclip,a%and) + if (.false.) then + write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' + open(25,file=fname) + call a%ad%print(25) + close(25) + write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' + open(25,file=fname) + call a%and%print(25) + close(25) + !call andclip%set_cols(n_col) + write(*,*) me,' ',trim(name),' ad ',& + &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col + write(*,*) me,' ',trim(name),' and ',& + &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col + end if + end block + end if if (debug_level >= psb_debug_ext_) then ch_err=a%get_fmt() write(debug_unit, *) me,' ',trim(name),': From SPCNV',& diff --git a/base/tools/psb_sspasb.f90 b/base/tools/psb_sspasb.f90 index cfa316eb..14ad5246 100644 --- a/base/tools/psb_sspasb.f90 +++ b/base/tools/psb_sspasb.f90 @@ -171,7 +171,35 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold) end if - + if (.true.) then + block + character(len=1024) :: fname + type(psb_s_coo_sparse_mat) :: acoo + type(psb_s_csr_sparse_mat), allocatable :: aclip, andclip + allocate(aclip,andclip) + call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) + call aclip%mv_from_coo(acoo,info) + call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) + call andclip%mv_from_coo(acoo,info) + call move_alloc(aclip,a%ad) + call move_alloc(andclip,a%and) + if (.false.) then + write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' + open(25,file=fname) + call a%ad%print(25) + close(25) + write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' + open(25,file=fname) + call a%and%print(25) + close(25) + !call andclip%set_cols(n_col) + write(*,*) me,' ',trim(name),' ad ',& + &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col + write(*,*) me,' ',trim(name),' and ',& + &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col + end if + end block + end if if (debug_level >= psb_debug_ext_) then ch_err=a%get_fmt() write(debug_unit, *) me,' ',trim(name),': From SPCNV',& diff --git a/base/tools/psb_zspasb.f90 b/base/tools/psb_zspasb.f90 index aeeef94d..f65be363 100644 --- a/base/tools/psb_zspasb.f90 +++ b/base/tools/psb_zspasb.f90 @@ -171,7 +171,35 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold) end if - + if (.true.) then + block + character(len=1024) :: fname + type(psb_z_coo_sparse_mat) :: acoo + type(psb_z_csr_sparse_mat), allocatable :: aclip, andclip + allocate(aclip,andclip) + call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) + call aclip%mv_from_coo(acoo,info) + call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) + call andclip%mv_from_coo(acoo,info) + call move_alloc(aclip,a%ad) + call move_alloc(andclip,a%and) + if (.false.) then + write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' + open(25,file=fname) + call a%ad%print(25) + close(25) + write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' + open(25,file=fname) + call a%and%print(25) + close(25) + !call andclip%set_cols(n_col) + write(*,*) me,' ',trim(name),' ad ',& + &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col + write(*,*) me,' ',trim(name),' and ',& + &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col + end if + end block + end if if (debug_level >= psb_debug_ext_) then ch_err=a%get_fmt() write(debug_unit, *) me,' ',trim(name),': From SPCNV',& diff --git a/compile b/compile new file mode 100644 index 00000000..e69de29b diff --git a/test/pargen/runs/ppde.inp b/test/pargen/runs/ppde.inp index e7e5dca2..5f040075 100644 --- a/test/pargen/runs/ppde.inp +++ b/test/pargen/runs/ppde.inp @@ -2,7 +2,7 @@ BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL RGMRES FCG CGR BJAC Preconditioner NONE DIAG BJAC CSR Storage format for matrix A: CSR COO -040 Domain size (acutal system is this**3 (pde3d) or **2 (pde2d) ) +140 Domain size (acutal system is this**3 (pde3d) or **2 (pde2d) ) 3 Partition: 1 BLOCK 3 3D 2 Stopping criterion 1 2 0100 MAXIT