From d231efe35ab4c137e424bbb4a082e7004760b11f Mon Sep 17 00:00:00 2001 From: Stack-1 Date: Tue, 12 May 2026 12:32:03 +0200 Subject: [PATCH] [FIX] Modified spmv test to check overlap --- base/psblas/psb_dspmm.f90 | 10 ++++---- base/tools/psb_dspasb.f90 | 39 +------------------------------- test/comm/spmv/psb_spmv_test.f90 | 18 +++++++-------- 3 files changed, 16 insertions(+), 51 deletions(-) diff --git a/base/psblas/psb_dspmm.f90 b/base/psblas/psb_dspmm.f90 index 10702422..4a18227d 100644 --- a/base/psblas/psb_dspmm.f90 +++ b/base/psblas/psb_dspmm.f90 @@ -176,7 +176,11 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,& !if (me==0) write(0,*) 'going for overlap ',a%ad%get_fmt(),' ',a%and%get_fmt() if (do_timings) call psb_barrier(ctxt) if (do_timings) call psb_tic(mv_phase1) - if (doswap_) call psi_swapdata(psb_comm_status_start_, dzero, x%v, desc_a, info, data=psb_comm_halo_) + if (doswap_) then + call psi_swapdata(psb_comm_status_start_, dzero, x%v, desc_a, info, data=psb_comm_halo_) + else + call psi_swapdata(psb_comm_status_sync_, dzero, x%v, desc_a, info, data=psb_comm_halo_) + end if if (do_timings) call psb_toc(mv_phase1) if (do_timings) call psb_tic(mv_phase2) call a%ad%spmm(alpha,x%v,beta,y%v,info) @@ -195,9 +199,7 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (do_timings) call psb_barrier(ctxt) if (do_timings) call psb_tic(mv_phase11) - if (doswap_) then - call psi_swapdata(psb_comm_status_sync_, dzero, x%v, desc_a, info, data=psb_comm_halo_) - end if + call psi_swapdata(psb_comm_status_sync_, dzero, x%v, desc_a, info, data=psb_comm_halo_) if (do_timings) call psb_toc(mv_phase11) if (do_timings) call psb_tic(mv_phase12) call psb_csmm(alpha,a,x,beta,y,info) diff --git a/base/tools/psb_dspasb.f90 b/base/tools/psb_dspasb.f90 index 61f20a39..7f00d8bc 100644 --- a/base/tools/psb_dspasb.f90 +++ b/base/tools/psb_dspasb.f90 @@ -184,44 +184,7 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold, dupl, bld_and) end if if (bld_and_) then -!!$ allocate(a%ad,mold=a%a) -!!$ allocate(a%and,mold=a%a)o -!!$ call a%split_nd(n_row,n_col,info) -!!$ block -!!$ character(len=1024) :: fname -!!$ type(psb_d_coo_sparse_mat) :: acoo -!!$ type(psb_d_csr_sparse_mat), allocatable :: aclip -!!$ type(psb_d_ecsr_sparse_mat), allocatable :: andclip -!!$ logical, parameter :: use_ecsr=.true. -!!$ allocate(aclip) -!!$ call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) -!!$ allocate(a%ad,mold=a%a) -!!$ call a%ad%mv_from_coo(acoo,info) -!!$ call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) -!!$ if (use_ecsr) then -!!$ allocate(andclip) -!!$ call andclip%mv_from_coo(acoo,info) -!!$ call move_alloc(andclip,a%and) -!!$ else -!!$ allocate(a%and,mold=a%a) -!!$ call a%and%mv_from_coo(acoo,info) -!!$ end if -!!$ if (.false.) then -!!$ write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' -!!$ open(25,file=fname) -!!$ call a%ad%print(25) -!!$ close(25) -!!$ write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' -!!$ open(25,file=fname) -!!$ call a%and%print(25) -!!$ close(25) -!!$ !call andclip%set_cols(n_col) -!!$ write(*,*) me,' ',trim(name),' ad ',& -!!$ &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col -!!$ write(*,*) me,' ',trim(name),' and ',& -!!$ &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col -!!$ end if -!!$ end block + call a%split_nd(n_row,n_col,info) else if (allocated(a%ad)) deallocate(a%ad) if (allocated(a%and)) deallocate(a%and) diff --git a/test/comm/spmv/psb_spmv_test.f90 b/test/comm/spmv/psb_spmv_test.f90 index 4e20b0b4..396cde35 100644 --- a/test/comm/spmv/psb_spmv_test.f90 +++ b/test/comm/spmv/psb_spmv_test.f90 @@ -554,7 +554,7 @@ contains character(len=8) :: afmt character(len=64) :: env_buf integer(psb_ipk_) :: my_rank, np, info, err_act - integer(psb_ipk_) :: idim, times, i, n_global + integer(psb_ipk_) :: idim, times, i integer :: env_len, env_status, ios real(psb_dpk_) :: alpha, beta, t0, t1, dt, avg_t logical :: use_external_matrix @@ -629,10 +629,8 @@ contains call psb_barrier(ctxt) if (use_external_matrix) then call load_external_matrix(ctxt, matrix_file, matrix_fmt, a, y, x, desc_a, afmt, info) - n_global = int(a%get_nrows(),kind=psb_ipk_) else call psb_d_gen_pde3d(ctxt,idim,a,y,x,desc_a,afmt,info) - n_global = idim * idim * idim end if if (info /= psb_success_) goto 9999 @@ -693,7 +691,9 @@ contains else write(psb_out_unit,'(" idim : ",i0)') idim end if - write(psb_out_unit,'(" global unknowns : ",i0)') n_global + write(psb_out_unit,'(" global non zeros : ",i0)') a%get_nzeros() + write(psb_out_unit,'(" global rows : ",i0)') a%get_nrows() + write(psb_out_unit,'(" global cols : ",i0)') a%get_ncols() write(psb_out_unit,'(" repetitions : ",i0)') times write(psb_out_unit,'(" comm backend : ",a)') trim(psb_toupper(trim(comm_mode))) write(psb_out_unit,'(" total time [s] : ",es12.5)') dt @@ -790,7 +790,7 @@ program psb_spmv_kernel character(len=8) :: cpu_fmt character(len=8) :: gpu_fmt integer(psb_ipk_) :: idim_arg, times_arg - logical :: do_swap + logical :: do_overlap integer :: kmode integer, parameter :: n_comm_modes = 5 character(len=20), parameter :: comm_modes(n_comm_modes) = [character(len=20) :: & @@ -803,7 +803,7 @@ program psb_spmv_kernel matrix_fmt = 'MM' cpu_fmt = 'CSR' gpu_fmt = 'HLG' - do_swap = .true. + do_overlap = .true. call psb_init(ctxt) call psb_info(ctxt, my_rank, np) @@ -849,9 +849,9 @@ program psb_spmv_kernel else if (index(psb_toupper(trim(arg)), '--GPU_FMT=') == 1) then gpu_fmt = psb_toupper(adjustl(arg(11:len_trim(arg)))) else if ((trim(psb_toupper(arg)) == '--NOOVERLAP') .or. (trim(psb_toupper(arg)) == '--NO_OVERLAP')) then - do_swap = .false. + do_overlap = .false. else if ((trim(psb_toupper(arg)) == '--OVERLAP') .or. (trim(psb_toupper(arg)) == '--SWAP')) then - do_swap = .true. + do_overlap = .true. else if (trim(psb_toupper(arg)) == '--MATRIX') then if (k < command_argument_count()) call get_command_argument(k+1,matrix_file) else if (trim(psb_toupper(arg)) == '--FMT') then @@ -911,7 +911,7 @@ program psb_spmv_kernel write(psb_out_unit,'(/,"=== Backend sweep: ",a," ===")') trim(comm_modes(kmode)) end if call run_spmv_kernel(ctxt, use_gpu, matrix_file, matrix_fmt, cpu_fmt, gpu_fmt, & - & idim_arg, times_arg, do_swap, comm_modes(kmode)) + & idim_arg, times_arg, do_overlap, comm_modes(kmode)) end do #ifdef PSB_HAVE_CUDA