From 52ad95461d7b2dd8f7de4e386428bab001ddbc01 Mon Sep 17 00:00:00 2001 From: Stack-1 Date: Thu, 23 Apr 2026 14:43:23 +0200 Subject: [PATCH] [UPDATE] Updated communicationtest to read also external matrices --- .gitignore | 1 + test/comm/cg/README.md | 10 +- test/comm/cg/psb_comm_cg_test.F90 | 144 ++++++++++++++- test/comm/spmv/README.md | 10 ++ test/comm/spmv/psb_spmv_test.f90 | 255 +++++++++++++++++++++++---- test/comm/swapdata/README.md | 10 ++ test/comm/swapdata/psb_comm_test.F90 | 133 ++++++++++---- test/pdegen/psb_d_pde3d.F90 | 8 - 8 files changed, 487 insertions(+), 84 deletions(-) diff --git a/.gitignore b/.gitignore index f4ad68fb..6775ecfb 100644 --- a/.gitignore +++ b/.gitignore @@ -28,4 +28,5 @@ autom4te.cache # the executable from tests runs +data diff --git a/test/comm/cg/README.md b/test/comm/cg/README.md index 76a85e1e..0f09dad5 100644 --- a/test/comm/cg/README.md +++ b/test/comm/cg/README.md @@ -34,4 +34,12 @@ From this directory: - `make run` (defaults: `NP=4`, `IDIM=40`) - `make run NP=8 IDIM=80` -The program accepts one optional CLI argument: `IDIM`. +Default PDE-generated matrix: + +- `./runs/psb_comm_cg_test [idim] [nrep] [nwarm] [itmax] [--gpu=TRUE|FALSE]` + +External matrix input: + +- `./runs/psb_comm_cg_test [idim] [nrep] [nwarm] [itmax] --matrix= [--fmt=MM|HB] [--gpu=TRUE|FALSE]` + +When `--matrix` is provided, the test reads and distributes that matrix and ignores the PDE generator. diff --git a/test/comm/cg/psb_comm_cg_test.F90 b/test/comm/cg/psb_comm_cg_test.F90 index baf1bab6..357d0048 100644 --- a/test/comm/cg/psb_comm_cg_test.F90 +++ b/test/comm/cg/psb_comm_cg_test.F90 @@ -1,5 +1,6 @@ program psb_comm_cg_test use psb_base_mod + use psb_util_mod #ifdef PSB_HAVE_CUDA use psb_cuda_mod #endif @@ -44,9 +45,12 @@ program psb_comm_cg_test character(len=20) :: prec_name(n_precs) character(len=5) :: afmt character(len=256) :: arg + character(len=256) :: matrix_file + character(len=2) :: matrix_fmt character(len=16) :: gpu_arg logical :: setup_done logical :: use_gpu + logical :: use_external_matrix info = psb_success_ afmt = 'CSR' @@ -58,6 +62,9 @@ program psb_comm_cg_test itrace = -1 istop = 2 eps = 1.d-6 + matrix_file = '' + matrix_fmt = 'MM' + use_external_matrix = .false. #ifdef PSB_HAVE_CUDA use_gpu = .true. #else @@ -111,6 +118,12 @@ program psb_comm_cg_test write(psb_err_unit,'("Invalid value for --gpu option. Use --gpu=TRUE or --gpu=FALSE")') stop 1 end if + call parse_matrix_arg(matrix_file, matrix_fmt, info) + if (info /= psb_success_) then + write(psb_err_unit,'("Invalid matrix options. Use --matrix= [--fmt=MM|HB]")') + stop 1 + end if + use_external_matrix = (len_trim(matrix_file) > 0) ! call psb_set_debug_level(psb_debug_ext_) @@ -142,7 +155,12 @@ program psb_comm_cg_test if (my_rank == psb_root_) then write(psb_out_unit,*) 'Welcome to PSBLAS version: ', psb_version_string_ write(psb_out_unit,*) 'This is the comm/cg test program' - write(psb_out_unit,'("Grid dimensions : ",i4," x ",i4," x ",i4)') idim,idim,idim + if (use_external_matrix) then + write(psb_out_unit,'("Input matrix : ",a)') trim(matrix_file) + write(psb_out_unit,'("Input format : ",a)') trim(matrix_fmt) + else + write(psb_out_unit,'("Grid dimensions : ",i4," x ",i4," x ",i4)') idim,idim,idim + end if write(psb_out_unit,'("Number of processors : ",i0)') np write(psb_out_unit,'("Iterative method : CG")') write(psb_out_unit,'("Preconditioners : NONE, DIAG")') @@ -151,14 +169,19 @@ program psb_comm_cg_test write(psb_out_unit,'("Warmup solves : ",i0)') nwarm write(psb_out_unit,'("GPU enabled : ",l1)') use_gpu write(psb_out_unit,'(" ")') - write(psb_out_unit,'("Usage: ./psb_comm_cg_test [idim] [nrep] [nwarm] [itmax] [--gpu=TRUE|FALSE]")') + write(psb_out_unit,'("Usage: ./psb_comm_cg_test [idim] [nrep] [nwarm] [itmax] ",& + &"[--gpu=TRUE|FALSE] [--matrix=] [--fmt=MM|HB]")') write(psb_out_unit,'(" ")') end if call psb_barrier(ctxt) - ! call probe_ieee('before psb_d_gen_pde3d') - call psb_d_gen_pde3d(ctxt,idim,a,b,x,desc_a,afmt,info) - ! call probe_ieee('after psb_d_gen_pde3d') + if (use_external_matrix) then + call load_external_matrix(ctxt, matrix_file, matrix_fmt, a, b, x, desc_a, afmt, info) + else + ! call probe_ieee('before psb_d_gen_pde3d') + call psb_d_gen_pde3d(ctxt,idim,a,b,x,desc_a,afmt,info) + ! call probe_ieee('after psb_d_gen_pde3d') + end if if (info /= psb_success_) goto 9999 #ifdef PSB_HAVE_CUDA @@ -168,9 +191,9 @@ program psb_comm_cg_test if (info /= psb_success_) goto 9999 call desc_a%cnv(mold=imold) if (info /= psb_success_) goto 9999 - call psb_geasb(x,desc_a,info,mold=vmold) + call x%cnv(mold=vmold) if (info /= psb_success_) goto 9999 - call psb_geasb(b,desc_a,info,mold=vmold) + call b%cnv(mold=vmold) if (info /= psb_success_) goto 9999 end if #endif @@ -514,6 +537,113 @@ contains end do end subroutine parse_gpu_arg + subroutine parse_matrix_arg(matrix_file, matrix_fmt, info) + character(len=*), intent(inout) :: matrix_file + character(len=*), intent(inout) :: matrix_fmt + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, argc + character(len=256) :: carg, uarg, val + + info = psb_success_ + argc = command_argument_count() + do i = 1, argc + call get_command_argument(i,carg) + uarg = psb_toupper(trim(carg)) + + if (index(uarg,'--MATRIX=') == 1) then + matrix_file = adjustl(carg(10:len_trim(carg))) + else if (trim(uarg) == '--MATRIX') then + if (i < argc) then + call get_command_argument(i+1,matrix_file) + else + info = psb_err_internal_error_ + return + end if + else if (index(uarg,'--FMT=') == 1) then + val = psb_toupper(adjustl(carg(7:len_trim(carg)))) + if ((trim(val) == 'MM') .or. (trim(val) == 'HB')) then + matrix_fmt = trim(val) + else + info = psb_err_internal_error_ + return + end if + else if (trim(uarg) == '--FMT') then + if (i < argc) then + call get_command_argument(i+1,val) + val = psb_toupper(trim(val)) + if ((trim(val) == 'MM') .or. (trim(val) == 'HB')) then + matrix_fmt = trim(val) + else + info = psb_err_internal_error_ + return + end if + else + info = psb_err_internal_error_ + return + end if + end if + end do + end subroutine parse_matrix_arg + + subroutine load_external_matrix(ctxt, matrix_file, matrix_fmt, a, bv, xv, desc_a, afmt, info) + type(psb_ctxt_type), intent(in) :: ctxt + character(len=*), intent(in) :: matrix_file + character(len=*), intent(in) :: matrix_fmt + type(psb_dspmat_type), intent(out) :: a + type(psb_d_vect_type), intent(out) :: bv, xv + type(psb_desc_type), intent(out) :: desc_a + character(len=*), intent(in) :: afmt + integer(psb_ipk_), intent(out) :: info + + type(psb_ldspmat_type) :: aux_a + real(psb_dpk_), allocatable :: rhs_glob(:), x_glob(:) + integer(psb_lpk_) :: nrows, ncols + + info = psb_success_ + + select case(psb_toupper(trim(matrix_fmt))) + case('MM') + call mm_mat_read(aux_a,info,filename=trim(matrix_file)) + case('HB') + call hb_read(aux_a,info,filename=trim(matrix_file)) + case default + info = psb_err_internal_error_ + return + end select + if (info /= psb_success_) return + + nrows = aux_a%get_nrows() + ncols = aux_a%get_ncols() + if (nrows /= ncols) then + write(psb_err_unit,'("Input matrix must be square for CG: ",a)') trim(matrix_file) + info = psb_err_internal_error_ + return + end if + + call psb_matdist(aux_a, a, ctxt, desc_a, info, fmt=afmt, parts=part_block) + if (info /= psb_success_) return + + call psb_geall(xv,desc_a,info) + if (info /= psb_success_) return + call psb_geall(bv,desc_a,info) + if (info /= psb_success_) return + + allocate(rhs_glob(nrows), x_glob(ncols), stat=info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + return + end if + rhs_glob = done + x_glob = dzero + + call psb_scatter(rhs_glob,bv,desc_a,info,root=psb_root_) + if (info /= psb_success_) return + call psb_scatter(x_glob,xv,desc_a,info,root=psb_root_) + if (info /= psb_success_) return + + deallocate(rhs_glob, x_glob) + end subroutine load_external_matrix + subroutine psb_d_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info) implicit none integer(psb_ipk_), intent(in) :: idim diff --git a/test/comm/spmv/README.md b/test/comm/spmv/README.md index 4e98e7a3..d6800c11 100644 --- a/test/comm/spmv/README.md +++ b/test/comm/spmv/README.md @@ -19,3 +19,13 @@ Communication schemes compared: Unlike `swapdata/`, which checks direct halo exchange, this test covers the overlapped SpMV workflow. + +Run options +----------- + +- Default PDE-generated matrix: `./runs/psb_spmv_kernel [--gpu=TRUE|FALSE] [--nooverlap]` +- External matrix: `./runs/psb_spmv_kernel [--gpu=TRUE|FALSE] --matrix= [--fmt=MM|HB] [--nooverlap]` + +The overlap path is enabled by default; pass `--nooverlap` to force the non-overlapped halo-update path. + +When `--matrix` is provided, the benchmark reads and distributes that matrix instead of generating the 3D PDE test matrix. diff --git a/test/comm/spmv/psb_spmv_test.f90 b/test/comm/spmv/psb_spmv_test.f90 index 897f9874..a6964956 100644 --- a/test/comm/spmv/psb_spmv_test.f90 +++ b/test/comm/spmv/psb_spmv_test.f90 @@ -1,8 +1,5 @@ !> Test program for overlapping communication and computation with psb_spmm. !! -!! This benchmark compares two equivalent SpMV paths: -!! 1. Serialized halo exchange + compute -!! 2. Overlapped psb_spmm(..., doswap=.true.) !! module psb_spmv_overlap_test @@ -532,7 +529,7 @@ contains return end subroutine psb_d_gen_pde3d - subroutine run_spmv_kernel(ctxt,use_gpu) + subroutine run_spmv_kernel(ctxt,use_gpu,matrix_file,matrix_fmt,cpu_fmt,gpu_fmt,idim_in,times_in,do_swap) use psb_base_mod #ifdef PSB_HAVE_CUDA use psb_cuda_mod @@ -541,73 +538,112 @@ contains type(psb_ctxt_type), intent(in) :: ctxt logical, intent(in) :: use_gpu + character(len=*), intent(in) :: matrix_file + character(len=*), intent(in) :: matrix_fmt + character(len=*), intent(in) :: cpu_fmt + character(len=*), intent(in) :: gpu_fmt + integer(psb_ipk_), intent(in) :: idim_in, times_in + logical, intent(in) :: do_swap type(psb_dspmat_type) :: a type(psb_d_vect_type) :: x, y type(psb_desc_type) :: desc_a - character(len=5) :: afmt + character(len=8) :: afmt character(len=64) :: env_buf integer(psb_ipk_) :: my_rank, np, info, err_act integer(psb_ipk_) :: idim, times, i, n_global integer :: env_len, env_status, ios real(psb_dpk_) :: alpha, beta, t0, t1, dt, avg_t + logical :: use_external_matrix #ifdef PSB_HAVE_CUDA - type(psb_d_vect_cuda) :: vmold - type(psb_i_vect_cuda) :: imold - type(psb_d_cuda_hlg_sparse_mat), target :: ahlg - class(psb_d_base_sparse_mat), pointer :: agmold + type(psb_d_vect_cuda) :: cuda_vector_mold + type(psb_i_vect_cuda) :: cuda_index_mold + type(psb_d_cuda_elg_sparse_mat), target :: cuda_ell_sparse_mold + type(psb_d_cuda_csrg_sparse_mat), target :: cuda_csr_sparse_mold + type(psb_d_cuda_hdiag_sparse_mat), target :: cuda_hdia_sparse_mold + type(psb_d_cuda_hlg_sparse_mat), target :: cuda_hll_sparse_mold + class(psb_d_base_sparse_mat), pointer :: cuda_sparse_mold #endif info = psb_success_ - afmt = 'CSR' - idim = 10 - times = 100 + afmt = psb_toupper(trim(cpu_fmt)) + if (len_trim(afmt) == 0) afmt = 'CSR' + if (idim_in > 0) then + idim = idim_in + else + idim = 10 + end if + + if (times_in > 0) then + times = times_in + else + times = 100 + end if alpha = done beta = dzero call psb_erractionsave(err_act) call psb_info(ctxt, my_rank, np) + use_external_matrix = (len_trim(matrix_file) > 0) - call get_environment_variable('IDIM', env_buf, length=env_len, status=env_status) - if ((env_status == 0) .and. (env_len > 0)) then - read(env_buf(1:env_len), *, iostat=ios) idim - if ((ios /= 0) .or. (idim < 2)) idim = 10 - end if - call get_environment_variable('TIMES', env_buf, length=env_len, status=env_status) - if ((env_status == 0) .and. (env_len > 0)) then - read(env_buf(1:env_len), *, iostat=ios) times - if ((ios /= 0) .or. (times < 1)) times = 100 + if (idim_in <= 0) then + call get_environment_variable('IDIM', env_buf, length=env_len, status=env_status) + if ((env_status == 0) .and. (env_len > 0)) then + read(env_buf(1:env_len), *, iostat=ios) idim + if ((ios /= 0) .or. (idim < 2)) idim = 10 + end if end if - n_global = idim * idim * idim + if (times_in <= 0) then + call get_environment_variable('TIMES', env_buf, length=env_len, status=env_status) + if ((env_status == 0) .and. (env_len > 0)) then + read(env_buf(1:env_len), *, iostat=ios) times + if ((ios /= 0) .or. (times < 1)) times = 100 + end if + end if call psb_barrier(ctxt) - call psb_d_gen_pde3d(ctxt,idim,a,y,x,desc_a,afmt,info) + if (use_external_matrix) then + call load_external_matrix(ctxt, matrix_file, matrix_fmt, a, y, x, desc_a, afmt, info) + n_global = int(a%get_nrows(),kind=psb_ipk_) + else + call psb_d_gen_pde3d(ctxt,idim,a,y,x,desc_a,afmt,info) + n_global = idim * idim * idim + end if if (info /= psb_success_) goto 9999 #ifdef PSB_HAVE_CUDA if (use_gpu) then - agmold => ahlg - call a%cscnv(info,mold=agmold) + select case(psb_toupper(trim(gpu_fmt))) + case('ELG') + cuda_sparse_mold => cuda_ell_sparse_mold + case('CSRG') + cuda_sparse_mold => cuda_csr_sparse_mold + case('HDIAG','HDIA') + cuda_sparse_mold => cuda_hdia_sparse_mold + case default + cuda_sparse_mold => cuda_hll_sparse_mold + end select + call a%cscnv(info,mold=cuda_sparse_mold) if (info /= psb_success_) goto 9999 - call desc_a%cnv(mold=imold) + call desc_a%cnv(mold=cuda_index_mold) if (info /= psb_success_) goto 9999 - call psb_geasb(x,desc_a,info,mold=vmold) + call x%cnv(mold=cuda_vector_mold) if (info /= psb_success_) goto 9999 - call psb_geasb(y,desc_a,info,mold=vmold) + call y%cnv(mold=cuda_vector_mold) if (info /= psb_success_) goto 9999 end if #endif ! warm-up - call psb_spmm(alpha, a, x, beta, y, desc_a, info, doswap=.false.) + call psb_spmm(alpha, a, x, beta, y, desc_a, info, doswap=do_swap) if (info /= psb_success_) goto 9999 call psb_barrier(ctxt) t0 = psb_wtime() do i = 1, times - call psb_spmm(alpha, a, x, beta, y, desc_a, info, doswap=.false.) + call psb_spmm(alpha, a, x, beta, y, desc_a, info, doswap=do_swap) if (info /= psb_success_) exit end do t1 = psb_wtime() @@ -618,8 +654,19 @@ contains avg_t = dt / real(times, psb_dpk_) if (my_rank == psb_root_) then - write(psb_out_unit,'(/,"SpMV benchmark (no overlap)")') - write(psb_out_unit,'(" idim : ",i0)') idim + if (do_swap) then + write(psb_out_unit,'(/,"SpMV benchmark (overlap)")') + else + write(psb_out_unit,'(/,"SpMV benchmark (no overlap)")') + end if + write(psb_out_unit,'(" cpu matrix fmt : ",a)') trim(afmt) + if (use_gpu) write(psb_out_unit,'(" gpu matrix fmt : ",a)') trim(psb_toupper(trim(gpu_fmt))) + if (use_external_matrix) then + write(psb_out_unit,'(" matrix file : ",a)') trim(matrix_file) + write(psb_out_unit,'(" matrix format : ",a)') trim(matrix_fmt) + else + write(psb_out_unit,'(" idim : ",i0)') idim + end if write(psb_out_unit,'(" global unknowns : ",i0)') n_global write(psb_out_unit,'(" repetitions : ",i0)') times write(psb_out_unit,'(" total time [s] : ",es12.5)') dt @@ -637,6 +684,65 @@ contains call psb_error_handler(ctxt, err_act) end subroutine run_spmv_kernel + subroutine load_external_matrix(ctxt, matrix_file, matrix_fmt, a, bv, xv, desc_a, afmt, info) + type(psb_ctxt_type), intent(in) :: ctxt + character(len=*), intent(in) :: matrix_file + character(len=*), intent(in) :: matrix_fmt + type(psb_dspmat_type), intent(out) :: a + type(psb_d_vect_type), intent(out) :: bv, xv + type(psb_desc_type), intent(out) :: desc_a + character(len=*), intent(in) :: afmt + integer(psb_ipk_), intent(out) :: info + + type(psb_ldspmat_type) :: aux_a + real(psb_dpk_), allocatable :: rhs_glob(:), x_glob(:) + integer(psb_lpk_) :: nrows, ncols + + info = psb_success_ + + select case(psb_toupper(trim(matrix_fmt))) + case('MM') + call mm_mat_read(aux_a,info,filename=trim(matrix_file)) + case('HB') + call hb_read(aux_a,info,filename=trim(matrix_file)) + case default + info = psb_err_internal_error_ + return + end select + if (info /= psb_success_) return + + nrows = aux_a%get_nrows() + ncols = aux_a%get_ncols() + if (nrows /= ncols) then + write(psb_err_unit,'("Input matrix must be square: ",a)') trim(matrix_file) + info = psb_err_internal_error_ + return + end if + + call psb_matdist(aux_a, a, ctxt, desc_a, info, fmt=afmt, parts=part_block) + if (info /= psb_success_) return + + call psb_geall(xv,desc_a,info) + if (info /= psb_success_) return + call psb_geall(bv,desc_a,info) + if (info /= psb_success_) return + + allocate(rhs_glob(nrows), x_glob(ncols), stat=info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + return + end if + rhs_glob = done + x_glob = dzero + + call psb_scatter(rhs_glob,bv,desc_a,info,root=psb_root_) + if (info /= psb_success_) return + call psb_scatter(x_glob,xv,desc_a,info,root=psb_root_) + if (info /= psb_success_) return + + deallocate(rhs_glob, x_glob) + end subroutine load_external_matrix + end module psb_spmv_overlap_test program psb_spmv_kernel @@ -650,7 +756,22 @@ program psb_spmv_kernel type(psb_ctxt_type) :: ctxt logical :: use_gpu integer(psb_ipk_) :: my_rank, np, k + integer :: ios character(len=256) :: arg + character(len=256) :: matrix_file + character(len=2) :: matrix_fmt + character(len=8) :: cpu_fmt + character(len=8) :: gpu_fmt + integer(psb_ipk_) :: idim_arg, times_arg + logical :: do_swap + idim_arg = -1 + times_arg = -1 + + matrix_file = '' + matrix_fmt = 'MM' + cpu_fmt = 'CSR' + gpu_fmt = 'HLG' + do_swap = .true. call psb_init(ctxt) call psb_info(ctxt, my_rank, np) @@ -670,6 +791,71 @@ program psb_spmv_kernel case ('FALSE','F','0','NO','N','OFF') use_gpu = .false. end select + else if (index(psb_toupper(trim(arg)), '--MATRIX=') == 1) then + matrix_file = adjustl(arg(10:len_trim(arg))) + else if (index(psb_toupper(trim(arg)), '--FMT=') == 1) then + arg = psb_toupper(adjustl(arg(7:len_trim(arg)))) + if ((trim(arg) == 'MM') .or. (trim(arg) == 'HB')) matrix_fmt = trim(arg) + else if (index(psb_toupper(trim(arg)), '--MTX_FMT=') == 1) then + arg = psb_toupper(adjustl(arg(10:len_trim(arg)))) + if ((trim(arg) == 'MM') .or. (trim(arg) == 'HB')) matrix_fmt = trim(arg) + else if (index(psb_toupper(trim(arg)), '--DIM=') == 1) then + read(arg(7:len_trim(arg)),*,iostat=ios) idim_arg + if ((ios /= 0) .or. (idim_arg < 2)) idim_arg = -1 + else if (index(psb_toupper(trim(arg)), '--TIMES=') == 1) then + read(arg(9:len_trim(arg)),*,iostat=ios) times_arg + if ((ios /= 0) .or. (times_arg < 1)) times_arg = -1 + else if (index(psb_toupper(trim(arg)), '--ITERS=') == 1) then + read(arg(9:len_trim(arg)),*,iostat=ios) times_arg + if ((ios /= 0) .or. (times_arg < 1)) times_arg = -1 + else if (index(psb_toupper(trim(arg)), '--CPU_FORMAT=') == 1) then + cpu_fmt = psb_toupper(adjustl(arg(14:len_trim(arg)))) + else if (index(psb_toupper(trim(arg)), '--CPU_FMT=') == 1) then + cpu_fmt = psb_toupper(adjustl(arg(11:len_trim(arg)))) + else if (index(psb_toupper(trim(arg)), '--GPU_FORMAT=') == 1) then + gpu_fmt = psb_toupper(adjustl(arg(14:len_trim(arg)))) + else if (index(psb_toupper(trim(arg)), '--GPU_FMT=') == 1) then + gpu_fmt = psb_toupper(adjustl(arg(11:len_trim(arg)))) + else if ((trim(psb_toupper(arg)) == '--NOOVERLAP') .or. (trim(psb_toupper(arg)) == '--NO_OVERLAP')) then + do_swap = .false. + else if ((trim(psb_toupper(arg)) == '--OVERLAP') .or. (trim(psb_toupper(arg)) == '--SWAP')) then + do_swap = .true. + else if (trim(psb_toupper(arg)) == '--MATRIX') then + if (k < command_argument_count()) call get_command_argument(k+1,matrix_file) + else if (trim(psb_toupper(arg)) == '--FMT') then + if (k < command_argument_count()) then + call get_command_argument(k+1,arg) + arg = psb_toupper(trim(arg)) + if ((trim(arg) == 'MM') .or. (trim(arg) == 'HB')) matrix_fmt = trim(arg) + end if + else if (trim(psb_toupper(arg)) == '--MTX_FMT') then + if (k < command_argument_count()) then + call get_command_argument(k+1,arg) + arg = psb_toupper(trim(arg)) + if ((trim(arg) == 'MM') .or. (trim(arg) == 'HB')) matrix_fmt = trim(arg) + end if + else if (trim(psb_toupper(arg)) == '--DIM') then + if (k < command_argument_count()) then + call get_command_argument(k+1,arg) + read(arg,*,iostat=ios) idim_arg + if ((ios /= 0) .or. (idim_arg < 2)) idim_arg = -1 + end if + else if ((trim(psb_toupper(arg)) == '--TIMES') .or. (trim(psb_toupper(arg)) == '--ITERS')) then + if (k < command_argument_count()) then + call get_command_argument(k+1,arg) + read(arg,*,iostat=ios) times_arg + if ((ios /= 0) .or. (times_arg < 1)) times_arg = -1 + end if + else if ((trim(psb_toupper(arg)) == '--CPU_FORMAT') .or. (trim(psb_toupper(arg)) == '--CPU_FMT')) then + if (k < command_argument_count()) then + call get_command_argument(k+1,arg) + cpu_fmt = psb_toupper(trim(arg)) + end if + else if ((trim(psb_toupper(arg)) == '--GPU_FORMAT') .or. (trim(psb_toupper(arg)) == '--GPU_FMT')) then + if (k < command_argument_count()) then + call get_command_argument(k+1,arg) + gpu_fmt = psb_toupper(trim(arg)) + end if end if end do @@ -683,9 +869,12 @@ program psb_spmv_kernel write(psb_out_unit,*) 'Welcome to PSBLAS version: ', psb_version_string_ write(psb_out_unit,*) 'This is the psb_spmv_kernel sample program' write(psb_out_unit,'("GPU enabled : ",l1)') use_gpu + write(psb_out_unit,'("Usage: ./psb_spmv_kernel [--gpu=TRUE|FALSE] [--dim=N] [--times=N] ",& + &"[--cpu_fmt=CSR|COO|CSC|ELL|HLL] [--gpu_fmt=HLL|ELL|CSR|HDIA] [--matrix=] [--fmt=MM|HB] ",& + &"[--overlap|--nooverlap]")') end if - call run_spmv_kernel(ctxt,use_gpu) + call run_spmv_kernel(ctxt,use_gpu,matrix_file,matrix_fmt,cpu_fmt,gpu_fmt,idim_arg,times_arg,do_swap) #ifdef PSB_HAVE_CUDA if (use_gpu) call psb_cuda_exit() diff --git a/test/comm/swapdata/README.md b/test/comm/swapdata/README.md index 812105bc..a7811f62 100644 --- a/test/comm/swapdata/README.md +++ b/test/comm/swapdata/README.md @@ -17,3 +17,13 @@ Communication patterns exercised: This test validates the low-level communication behavior in isolation, without the full SpMV overlap pipeline. + +Run options +----------- + +- Synthetic 3D stencil descriptor (default): + - `./runs/psb_comm_test [--dim N] [--iters N] [--mode both|baseline|neighbor|persistent]` +- External matrix descriptor: + - `./runs/psb_comm_test [--iters N] [--mode both|baseline|neighbor|persistent] --matrix [--fmt MM|HB]` + +With `--matrix`, the descriptor is built from the distributed input matrix pattern. diff --git a/test/comm/swapdata/psb_comm_test.F90 b/test/comm/swapdata/psb_comm_test.F90 index c7bb08c5..40e83734 100644 --- a/test/comm/swapdata/psb_comm_test.F90 +++ b/test/comm/swapdata/psb_comm_test.F90 @@ -17,6 +17,7 @@ ! program psb_comm_test use psb_base_mod + use psb_util_mod use psb_error_mod, only: psb_set_debug_level, psb_debug_ext_ use psi_mod use psb_comm_factory_mod, only: psb_comm_set, psb_comm_free @@ -29,9 +30,12 @@ program psb_comm_test integer(psb_ipk_) :: idim integer(psb_ipk_) :: argc integer(psb_ipk_) :: iters - character(len=32) :: arg + character(len=256) :: arg character(len=16) :: mode + character(len=256) :: matrix_file + character(len=2) :: matrix_fmt logical :: debug_swapdata + logical :: use_external_matrix ! ---- descriptor / context ---- type(psb_ctxt_type) :: ctxt @@ -39,6 +43,8 @@ program psb_comm_test integer(psb_ipk_) :: my_rank, np, info, i, nr, number_of_local_rows integer(psb_lpk_) :: m, nt integer(psb_lpk_), allocatable :: myidx(:) + type(psb_dspmat_type) :: a_mat + type(psb_ldspmat_type) :: aux_a ! ---- vectors ---- type(psb_d_vect_type) :: v_baseline, v_neighbor, v_neighbor_persistent @@ -55,6 +61,7 @@ program psb_comm_test ! ---- error / reporting ---- integer(psb_ipk_) :: n_pass, n_total, imode logical :: run_baseline, run_neighbor, run_persistent + logical :: mat_allocated logical :: comm_ok real(psb_dpk_) :: err, tol real(psb_dpk_) :: t0, t1, dt, tsum_baseline, tsum_neighbor, tsum_neighbor_persistent @@ -70,6 +77,10 @@ program psb_comm_test iters = 5 mode = 'both' debug_swapdata = .false. + matrix_file = '' + matrix_fmt = 'MM' + use_external_matrix = .false. + mat_allocated = .false. ! ---- parse command-line argument for idim ---- idim = 10 @@ -86,8 +97,24 @@ program psb_comm_test call get_command_argument(i+1, arg) read(arg, *) iters end if + else if (index(psb_toupper(trim(arg)),'--MATRIX=') == 1) then + matrix_file = adjustl(arg(10:len_trim(arg))) + else if (trim(psb_toupper(arg)) == '--MATRIX') then + if (i < argc) then + call get_command_argument(i+1, matrix_file) + end if + else if (index(psb_toupper(trim(arg)),'--FMT=') == 1) then + arg = psb_toupper(adjustl(arg(7:len_trim(arg)))) + if ((trim(arg) == 'MM') .or. (trim(arg) == 'HB')) matrix_fmt = trim(arg) + else if (trim(psb_toupper(arg)) == '--FMT') then + if (i < argc) then + call get_command_argument(i+1, arg) + arg = psb_toupper(trim(arg)) + if ((trim(arg) == 'MM') .or. (trim(arg) == 'HB')) matrix_fmt = trim(arg) + end if end if end do + use_external_matrix = (len_trim(matrix_file) > 0) ! parse optional mode flag do i = 1, argc @@ -126,7 +153,7 @@ program psb_comm_test run_persistent = .true. end select - if (idim <= 0) then + if ((.not.use_external_matrix) .and. (idim <= 0)) then write(*,*) 'Invalid dimension specified. Usage: --dim ' call psb_abort(ctxt) end if @@ -140,48 +167,83 @@ program psb_comm_test write(psb_out_unit,'("================================================")') write(psb_out_unit,'(" Test: D-type halo baseline vs neighbor topo")') write(psb_out_unit,'(" Processes : ",i0)') np - write(psb_out_unit,'(" Grid : ",i0," x ",i0," x ",i0)') idim,idim,idim + if (use_external_matrix) then + write(psb_out_unit,'(" Matrix : ",a)') trim(matrix_file) + write(psb_out_unit,'(" Format : ",a)') trim(matrix_fmt) + else + write(psb_out_unit,'(" Grid : ",i0," x ",i0," x ",i0)') idim,idim,idim + end if + write(psb_out_unit,'(" Usage : ./psb_comm_test [--dim N] [--iters N] [--mode ...] ",& + &"[--matrix ] [--fmt MM|HB]")') write(psb_out_unit,'("================================================")') end if ! ================================================================== ! 2. Build descriptor with 7-point stencil connectivity ! ================================================================== - m = (1_psb_lpk_ * idim) * idim * idim - nt = (m + np - 1) / np - nr = max(0, min(int(nt,psb_ipk_), int(m - (my_rank * nt),psb_ipk_))) - - call psb_cdall(ctxt, desc_a, info, nl=nr) - if (info /= psb_success_) then - write(psb_err_unit,*) my_rank, 'cdall error:', info - call psb_abort(ctxt) - end if + if (use_external_matrix) then + select case(psb_toupper(trim(matrix_fmt))) + case('MM') + call mm_mat_read(aux_a,info,filename=trim(matrix_file)) + case('HB') + call hb_read(aux_a,info,filename=trim(matrix_file)) + case default + info = psb_err_internal_error_ + end select + if (info /= psb_success_) then + write(psb_err_unit,*) my_rank, 'matrix read error:', info + call psb_abort(ctxt) + end if + if (aux_a%get_nrows() /= aux_a%get_ncols()) then + write(psb_err_unit,*) my_rank, 'matrix must be square for this test' + call psb_abort(ctxt) + end if + m = aux_a%get_nrows() + call psb_matdist(aux_a, a_mat, ctxt, desc_a, info, fmt='CSR', parts=part_block) + if (info /= psb_success_) then + write(psb_err_unit,*) my_rank, 'matdist error:', info + call psb_abort(ctxt) + end if + mat_allocated = .true. + myidx = desc_a%get_global_indices() + number_of_local_rows = size(myidx) + else + m = (1_psb_lpk_ * idim) * idim * idim + nt = (m + np - 1) / np + nr = max(0, min(int(nt,psb_ipk_), int(m - (my_rank * nt),psb_ipk_))) - myidx = desc_a%get_global_indices() - number_of_local_rows = size(myidx) + call psb_cdall(ctxt, desc_a, info, nl=nr) + if (info /= psb_success_) then + write(psb_err_unit,*) my_rank, 'cdall error:', info + call psb_abort(ctxt) + end if - do i = 1, number_of_local_rows - call psb_cdins(1_psb_ipk_, (/myidx(i)/), (/myidx(i)/), desc_a, info) - if (myidx(i) > 1) & - & call psb_cdins(1_psb_ipk_, (/myidx(i)/), (/myidx(i)-1/), desc_a, info) - if (myidx(i) < m) & - & call psb_cdins(1_psb_ipk_, (/myidx(i)/), (/myidx(i)+1/), desc_a, info) - if (myidx(i) > idim) & - & call psb_cdins(1_psb_ipk_, (/myidx(i)/), (/myidx(i)-idim/), desc_a, info) - if (myidx(i) + idim <= m) & - & call psb_cdins(1_psb_ipk_, (/myidx(i)/), (/myidx(i)+idim/), desc_a, info) - if (myidx(i) > int(idim,psb_lpk_)*idim) & - & call psb_cdins(1_psb_ipk_, (/myidx(i)/), & - & (/myidx(i) - int(idim,psb_lpk_)*idim/), desc_a, info) - if (myidx(i) + int(idim,psb_lpk_)*idim <= m) & - & call psb_cdins(1_psb_ipk_, (/myidx(i)/), & - & (/myidx(i) + int(idim,psb_lpk_)*idim/), desc_a, info) - end do + myidx = desc_a%get_global_indices() + number_of_local_rows = size(myidx) + + do i = 1, number_of_local_rows + call psb_cdins(1_psb_ipk_, (/myidx(i)/), (/myidx(i)/), desc_a, info) + if (myidx(i) > 1) & + & call psb_cdins(1_psb_ipk_, (/myidx(i)/), (/myidx(i)-1/), desc_a, info) + if (myidx(i) < m) & + & call psb_cdins(1_psb_ipk_, (/myidx(i)/), (/myidx(i)+1/), desc_a, info) + if (myidx(i) > idim) & + & call psb_cdins(1_psb_ipk_, (/myidx(i)/), (/myidx(i)-idim/), desc_a, info) + if (myidx(i) + idim <= m) & + & call psb_cdins(1_psb_ipk_, (/myidx(i)/), (/myidx(i)+idim/), desc_a, info) + if (myidx(i) > int(idim,psb_lpk_)*idim) & + & call psb_cdins(1_psb_ipk_, (/myidx(i)/), & + & (/myidx(i) - int(idim,psb_lpk_)*idim/), desc_a, info) + if (myidx(i) + int(idim,psb_lpk_)*idim <= m) & + & call psb_cdins(1_psb_ipk_, (/myidx(i)/), & + & (/myidx(i) + int(idim,psb_lpk_)*idim/), desc_a, info) + end do - call psb_cdasb(desc_a, info) - if (info /= psb_success_) then - write(psb_err_unit,*) my_rank, 'cdasb error:', info - call psb_abort(ctxt) + call psb_cdasb(desc_a, info) + if (info /= psb_success_) then + write(psb_err_unit,*) my_rank, 'cdasb error:', info + call psb_abort(ctxt) + end if end if nrow = desc_a%get_local_rows() ! owned @@ -574,6 +636,7 @@ program psb_comm_test 9999 call psb_gefree(v_baseline, desc_a, info) call psb_gefree(v_neighbor, desc_a, info) call psb_gefree(v_neighbor_persistent, desc_a, info) + if (mat_allocated) call psb_spfree(a_mat, desc_a, info) call psb_cdfree(desc_a, info) call psb_exit(ctxt) diff --git a/test/pdegen/psb_d_pde3d.F90 b/test/pdegen/psb_d_pde3d.F90 index 4b98cafe..27a5e53e 100644 --- a/test/pdegen/psb_d_pde3d.F90 +++ b/test/pdegen/psb_d_pde3d.F90 @@ -837,14 +837,6 @@ program psb_d_pde3d & err=err,itrace=itrace,& & istop=istopc) case('BICGSTAB','BICGSTABL','BICG','CG','CGS','FCG','GCR','RGMRES') - call psb_comm_set(psb_comm_persistent_ineighbor_alltoallv_,xxv%v%comm_handle,info) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='comm init' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - call psb_krylov(kmethd,a,prec,bv,xxv,eps,& & desc_a,info,itmax=itmax,iter=iter,err=err,itrace=itrace,& & istop=istopc,irst=irst)