[FIX] Modified spmv test to check overlap

communication_v2
Stack-1 3 days ago
parent fb5ba59693
commit d231efe35a

@ -176,7 +176,11 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,&
!if (me==0) write(0,*) 'going for overlap ',a%ad%get_fmt(),' ',a%and%get_fmt() !if (me==0) write(0,*) 'going for overlap ',a%ad%get_fmt(),' ',a%and%get_fmt()
if (do_timings) call psb_barrier(ctxt) if (do_timings) call psb_barrier(ctxt)
if (do_timings) call psb_tic(mv_phase1) if (do_timings) call psb_tic(mv_phase1)
if (doswap_) call psi_swapdata(psb_comm_status_start_, dzero, x%v, desc_a, info, data=psb_comm_halo_) if (doswap_) then
call psi_swapdata(psb_comm_status_start_, dzero, x%v, desc_a, info, data=psb_comm_halo_)
else
call psi_swapdata(psb_comm_status_sync_, dzero, x%v, desc_a, info, data=psb_comm_halo_)
end if
if (do_timings) call psb_toc(mv_phase1) if (do_timings) call psb_toc(mv_phase1)
if (do_timings) call psb_tic(mv_phase2) if (do_timings) call psb_tic(mv_phase2)
call a%ad%spmm(alpha,x%v,beta,y%v,info) call a%ad%spmm(alpha,x%v,beta,y%v,info)
@ -195,9 +199,7 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,&
if (do_timings) call psb_barrier(ctxt) if (do_timings) call psb_barrier(ctxt)
if (do_timings) call psb_tic(mv_phase11) if (do_timings) call psb_tic(mv_phase11)
if (doswap_) then call psi_swapdata(psb_comm_status_sync_, dzero, x%v, desc_a, info, data=psb_comm_halo_)
call psi_swapdata(psb_comm_status_sync_, dzero, x%v, desc_a, info, data=psb_comm_halo_)
end if
if (do_timings) call psb_toc(mv_phase11) if (do_timings) call psb_toc(mv_phase11)
if (do_timings) call psb_tic(mv_phase12) if (do_timings) call psb_tic(mv_phase12)
call psb_csmm(alpha,a,x,beta,y,info) call psb_csmm(alpha,a,x,beta,y,info)

@ -184,44 +184,7 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold, dupl, bld_and)
end if end if
if (bld_and_) then if (bld_and_) then
!!$ allocate(a%ad,mold=a%a) call a%split_nd(n_row,n_col,info)
!!$ allocate(a%and,mold=a%a)o
!!$ call a%split_nd(n_row,n_col,info)
!!$ block
!!$ character(len=1024) :: fname
!!$ type(psb_d_coo_sparse_mat) :: acoo
!!$ type(psb_d_csr_sparse_mat), allocatable :: aclip
!!$ type(psb_d_ecsr_sparse_mat), allocatable :: andclip
!!$ logical, parameter :: use_ecsr=.true.
!!$ allocate(aclip)
!!$ call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.)
!!$ allocate(a%ad,mold=a%a)
!!$ call a%ad%mv_from_coo(acoo,info)
!!$ call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.)
!!$ if (use_ecsr) then
!!$ allocate(andclip)
!!$ call andclip%mv_from_coo(acoo,info)
!!$ call move_alloc(andclip,a%and)
!!$ else
!!$ allocate(a%and,mold=a%a)
!!$ call a%and%mv_from_coo(acoo,info)
!!$ end if
!!$ if (.false.) then
!!$ write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx'
!!$ open(25,file=fname)
!!$ call a%ad%print(25)
!!$ close(25)
!!$ write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx'
!!$ open(25,file=fname)
!!$ call a%and%print(25)
!!$ close(25)
!!$ !call andclip%set_cols(n_col)
!!$ write(*,*) me,' ',trim(name),' ad ',&
!!$ &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col
!!$ write(*,*) me,' ',trim(name),' and ',&
!!$ &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col
!!$ end if
!!$ end block
else else
if (allocated(a%ad)) deallocate(a%ad) if (allocated(a%ad)) deallocate(a%ad)
if (allocated(a%and)) deallocate(a%and) if (allocated(a%and)) deallocate(a%and)

@ -554,7 +554,7 @@ contains
character(len=8) :: afmt character(len=8) :: afmt
character(len=64) :: env_buf character(len=64) :: env_buf
integer(psb_ipk_) :: my_rank, np, info, err_act integer(psb_ipk_) :: my_rank, np, info, err_act
integer(psb_ipk_) :: idim, times, i, n_global integer(psb_ipk_) :: idim, times, i
integer :: env_len, env_status, ios integer :: env_len, env_status, ios
real(psb_dpk_) :: alpha, beta, t0, t1, dt, avg_t real(psb_dpk_) :: alpha, beta, t0, t1, dt, avg_t
logical :: use_external_matrix logical :: use_external_matrix
@ -629,10 +629,8 @@ contains
call psb_barrier(ctxt) call psb_barrier(ctxt)
if (use_external_matrix) then if (use_external_matrix) then
call load_external_matrix(ctxt, matrix_file, matrix_fmt, a, y, x, desc_a, afmt, info) call load_external_matrix(ctxt, matrix_file, matrix_fmt, a, y, x, desc_a, afmt, info)
n_global = int(a%get_nrows(),kind=psb_ipk_)
else else
call psb_d_gen_pde3d(ctxt,idim,a,y,x,desc_a,afmt,info) call psb_d_gen_pde3d(ctxt,idim,a,y,x,desc_a,afmt,info)
n_global = idim * idim * idim
end if end if
if (info /= psb_success_) goto 9999 if (info /= psb_success_) goto 9999
@ -693,7 +691,9 @@ contains
else else
write(psb_out_unit,'(" idim : ",i0)') idim write(psb_out_unit,'(" idim : ",i0)') idim
end if end if
write(psb_out_unit,'(" global unknowns : ",i0)') n_global write(psb_out_unit,'(" global non zeros : ",i0)') a%get_nzeros()
write(psb_out_unit,'(" global rows : ",i0)') a%get_nrows()
write(psb_out_unit,'(" global cols : ",i0)') a%get_ncols()
write(psb_out_unit,'(" repetitions : ",i0)') times write(psb_out_unit,'(" repetitions : ",i0)') times
write(psb_out_unit,'(" comm backend : ",a)') trim(psb_toupper(trim(comm_mode))) write(psb_out_unit,'(" comm backend : ",a)') trim(psb_toupper(trim(comm_mode)))
write(psb_out_unit,'(" total time [s] : ",es12.5)') dt write(psb_out_unit,'(" total time [s] : ",es12.5)') dt
@ -790,7 +790,7 @@ program psb_spmv_kernel
character(len=8) :: cpu_fmt character(len=8) :: cpu_fmt
character(len=8) :: gpu_fmt character(len=8) :: gpu_fmt
integer(psb_ipk_) :: idim_arg, times_arg integer(psb_ipk_) :: idim_arg, times_arg
logical :: do_swap logical :: do_overlap
integer :: kmode integer :: kmode
integer, parameter :: n_comm_modes = 5 integer, parameter :: n_comm_modes = 5
character(len=20), parameter :: comm_modes(n_comm_modes) = [character(len=20) :: & character(len=20), parameter :: comm_modes(n_comm_modes) = [character(len=20) :: &
@ -803,7 +803,7 @@ program psb_spmv_kernel
matrix_fmt = 'MM' matrix_fmt = 'MM'
cpu_fmt = 'CSR' cpu_fmt = 'CSR'
gpu_fmt = 'HLG' gpu_fmt = 'HLG'
do_swap = .true. do_overlap = .true.
call psb_init(ctxt) call psb_init(ctxt)
call psb_info(ctxt, my_rank, np) call psb_info(ctxt, my_rank, np)
@ -849,9 +849,9 @@ program psb_spmv_kernel
else if (index(psb_toupper(trim(arg)), '--GPU_FMT=') == 1) then else if (index(psb_toupper(trim(arg)), '--GPU_FMT=') == 1) then
gpu_fmt = psb_toupper(adjustl(arg(11:len_trim(arg)))) gpu_fmt = psb_toupper(adjustl(arg(11:len_trim(arg))))
else if ((trim(psb_toupper(arg)) == '--NOOVERLAP') .or. (trim(psb_toupper(arg)) == '--NO_OVERLAP')) then else if ((trim(psb_toupper(arg)) == '--NOOVERLAP') .or. (trim(psb_toupper(arg)) == '--NO_OVERLAP')) then
do_swap = .false. do_overlap = .false.
else if ((trim(psb_toupper(arg)) == '--OVERLAP') .or. (trim(psb_toupper(arg)) == '--SWAP')) then else if ((trim(psb_toupper(arg)) == '--OVERLAP') .or. (trim(psb_toupper(arg)) == '--SWAP')) then
do_swap = .true. do_overlap = .true.
else if (trim(psb_toupper(arg)) == '--MATRIX') then else if (trim(psb_toupper(arg)) == '--MATRIX') then
if (k < command_argument_count()) call get_command_argument(k+1,matrix_file) if (k < command_argument_count()) call get_command_argument(k+1,matrix_file)
else if (trim(psb_toupper(arg)) == '--FMT') then else if (trim(psb_toupper(arg)) == '--FMT') then
@ -911,7 +911,7 @@ program psb_spmv_kernel
write(psb_out_unit,'(/,"=== Backend sweep: ",a," ===")') trim(comm_modes(kmode)) write(psb_out_unit,'(/,"=== Backend sweep: ",a," ===")') trim(comm_modes(kmode))
end if end if
call run_spmv_kernel(ctxt, use_gpu, matrix_file, matrix_fmt, cpu_fmt, gpu_fmt, & call run_spmv_kernel(ctxt, use_gpu, matrix_file, matrix_fmt, cpu_fmt, gpu_fmt, &
& idim_arg, times_arg, do_swap, comm_modes(kmode)) & idim_arg, times_arg, do_overlap, comm_modes(kmode))
end do end do
#ifdef PSB_HAVE_CUDA #ifdef PSB_HAVE_CUDA

Loading…
Cancel
Save