You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
psblas3/test/halo/psb_halo_df_test.f90

559 lines
20 KiB
Fortran

!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
program psb_df_sample
use psb_base_mod
use psb_prec_mod
use psb_krylov_mod
use psb_util_mod
use getp
use mpi
implicit none
! input parameters
character(len=40) :: kmethd, ptype, mtrx_file, rhs_file,renum
! sparse matrices
type(psb_dspmat_type) :: a, aux_a
! preconditioner data
type(psb_dprec_type) :: prec
! dense matrices
real(psb_dpk_), allocatable, target :: aux_b(:,:), d(:)
real(psb_dpk_), allocatable , save :: x_col_glob(:), r_col_glob(:)
real(psb_dpk_), pointer :: b_col_glob(:)
type(psb_d_vect_type) :: b_col, x_col, r_col
! communications data structure
type(psb_desc_type):: desc_a
integer(psb_ipk_) :: ictxt, me, np
! solver paramters
integer(psb_ipk_) :: iter, itmax, ierr, itrace, ircode,&
& methd, istopc, irst
integer(psb_epk_) :: amatsize, precsize, descsize
real(psb_dpk_) :: err, eps, cond
character(len=5) :: afmt
character(len=20) :: name, part
character(len=2) :: filefmt
integer(psb_ipk_), parameter :: iunit=12
integer(psb_ipk_) :: iparm(20)
! other variables
integer(psb_ipk_) :: i,info,j,m_problem, err_act
integer(psb_ipk_) :: internal, m,ii,nnzero
real(psb_dpk_) :: t1, t2, tprec, t_low, t_high, ave_request_create_t, request_create_t
real(psb_dpk_) :: r_amax, b_amax, scale,resmx,resmxp
integer(psb_ipk_) :: nrhs, nrow, n_row, dim, ne, nv, ncol
integer(psb_ipk_), allocatable :: ivg(:), perm(:)
integer(psb_ipk_), allocatable :: ipv(:)
character(len=40) :: fname, fnout
! persistent communication
integer(psb_lpk_), allocatable :: iv(:)
real(psb_dpk_), allocatable :: xa(:)
integer(psb_ipk_) :: num_iterations, num_neighbors, &
min_neighbors, max_neighbors, &
sum_neighbors
integer(psb_ipk_) :: sum_snd, min_snd, max_snd, num_snd, tot_snd, sum_tot_snd
integer(psb_ipk_) :: sum_rcv, min_rcv, max_rcv, num_rcv, tot_rcv, sum_tot_rcv
real(psb_spk_) :: ave_neighbors, ave_snd_buf, ave_rcv_buf, ave_tot_snd, ave_tot_rcv
real(psb_dpk_) :: alltoall_comm_t, ave_alltoall_comm_t, total_time, &
ave_time_pp, ave_time_pi
real(psb_dpk_) :: median_comm_t, ave_alltoall_median_comm_t, min_comm_t, max_comm_t, &
ave_min_comm_t, ave_max_comm_t, sum_median_comm_t, sum_min_comm_t, sum_max_comm_t, &
ave_alltoall_max_comm_t, ave_alltoall_min_comm_t
real(psb_dpk_) :: halo_t_start, halo_t_end, ave_halo_t_pi, &
sum_halo_t, halo_time
real(psb_spk_) :: ave_snd, ave_rcv
integer(psb_ipk_) :: swap_mode
logical :: swap_persistent, swap_nonpersistent, file_exists
call psb_init(ictxt)
call psb_info(ictxt,me,np)
if (me < 0) then
! This should not happen, but just in case
call psb_exit(ictxt)
stop
endif
name='psb_df_sample'
if(psb_errstatus_fatal()) goto 9999
info=psb_success_
call psb_set_errverbosity(itwo)
!
! Hello world
!
if (me == psb_root_) then
write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_
write(*,*) 'This is the ',trim(name),' sample program'
end if
!
! get parameters
!
call get_parms(ictxt,mtrx_file,rhs_file,filefmt,kmethd,ptype,&
& part,afmt,istopc,itmax,itrace,irst,eps)
call psb_barrier(ictxt)
t1 = psb_wtime()
! read the input matrix to be processed and (possibly) the rhs
nrhs = 1
if (me == psb_root_) then
select case(psb_toupper(filefmt))
case('MM')
! For Matrix Market we have an input file for the matrix
! and an (optional) second file for the RHS.
call mm_mat_read(aux_a,info,iunit=iunit,filename=mtrx_file)
if (info == psb_success_) then
if (rhs_file /= 'NONE') then
call mm_array_read(aux_b,info,iunit=iunit,filename=rhs_file)
end if
end if
case ('HB')
! For Harwell-Boeing we have a single file which may or may not
! contain an RHS.
call hb_read(aux_a,info,iunit=iunit,b=aux_b,filename=mtrx_file)
case default
info = -1
write(psb_err_unit,*) 'Wrong choice for fileformat ', filefmt
end select
if (info /= psb_success_) then
write(psb_err_unit,*) 'Error while reading input matrix '
call psb_abort(ictxt)
end if
m_problem = aux_a%get_nrows()
call psb_bcast(ictxt,m_problem)
call psb_mat_renum(psb_mat_renum_identity_,aux_a,info,perm)
! At this point aux_b may still be unallocated
if (size(aux_b,dim=1) == m_problem) then
! if any rhs were present, broadcast the first one
write(psb_err_unit,'("Ok, got an rhs ")')
b_col_glob =>aux_b(:,1)
call psb_gelp('N',perm(1:m_problem),&
& b_col_glob(1:m_problem),info)
else
write(psb_out_unit,'("Generating an rhs...")')
write(psb_out_unit,'(" ")')
call psb_realloc(m_problem,1,aux_b,ircode)
if (ircode /= 0) then
call psb_errpush(psb_err_alloc_dealloc_,name)
goto 9999
endif
b_col_glob => aux_b(:,1)
do i=1, m_problem
b_col_glob(i) = done
enddo
endif
else
call psb_bcast(ictxt,m_problem)
end if
! switch over different partition types
select case(psb_toupper(part))
case('BLOCK')
if (me == psb_root_) write(psb_out_unit,'("Partition type: block")')
call psb_matdist(aux_a, a, ictxt,desc_a,info,fmt=afmt,parts=part_block)
! call desc_a%cd_get_list
! call desc_a%cd_v_get_list
! print *, "desc_a%v_halo_index", desc_a%v_get_list()
case('GRAPH')
if (me == psb_root_) then
write(psb_out_unit,'("Partition type: graph vector")')
write(psb_out_unit,'(" ")')
! write(psb_err_unit,'("Build type: graph")')
call build_mtpart(aux_a,np)
endif
call psb_barrier(ictxt)
call distr_mtpart(psb_root_,ictxt)
call getv_mtpart(ivg)
call psb_matdist(aux_a, a, ictxt,desc_a,info,fmt=afmt,v=ivg)
case default
if (me == psb_root_) write(psb_out_unit,'("Partition type: block")')
call psb_matdist(aux_a, a, ictxt,desc_a,info,fmt=afmt,parts=part_block)
end select
! call psb_scatter(b_col_glob,b_col,desc_a,info,root=psb_root_)
call psb_geall(x_col,desc_a,info)
call x_col%zero() ! change to mpi rank? Artless
!
nrow=desc_a%get_local_rows() ! number of rows
ncol=desc_a%get_local_cols() ! number of columns
iv = [ (i,i=1,ncol)]
! call psb_loc_to_glob(iv,desc_a,info) !
call desc_a%l2gv1(iv,info)
xa = iv
! call desc_a%locgv1(iv,info)
! ---- test where halo regions are set to -1, sending 10+me
! xa(nrow+1:ncol) = -1 ! (10 + me) * -1
! ! call MPI_Barrier(MPI_COMM_WORLD, ierr)
! ! do i=0,np
! ! if (me .eq. i) then
! ! print *, "======================="
! ! print *, "me = ", me, "iv ="
! ! print '(225 I4)', iv
! ! print *, "-----------------------"
! ! end if
! ! call MPI_Barrier(MPI_COMM_WORLD, ierr)
! ! end do
! ! call MPI_Barrier(MPI_COMM_WORLD, ierr)
! ! call MPI_Barrier(MPI_COMM_WORLD, ierr)
! ! do i=0,np
! ! if (me .eq. i) then
! ! print *, "======================="
! ! print *, "me = ", me, "xa ="
! ! print '(225 F4.0)', xa
! ! print *, "-----------------------"
! ! end if
! ! call MPI_Barrier(MPI_COMM_WORLD, ierr)
! ! end do
! ! call MPI_Barrier(MPI_COMM_WORLD, ierr)
! call x_col%set_vect(xa)
! call MPI_Barrier(MPI_COMM_WORLD, ierr)
! call psb_halo(x_col,desc_a,info,mode=psb_swap_persistent_)
! xa = x_col%get_vect()
! do i=nrow+1,ncol
! if (xa(i) /= iv(i)) then ! ha
! write(0,*) me, ': MISMATCH i=',i,"xa(i)=",xa(i),"iv(i)",iv(i)
! ! goto 9999
! end if
! end do
num_iterations = ITERATIONS
swap_mode = SWAPCHOICE
swap_persistent = iand(swap_mode,psb_swap_persistent_) /= 0
swap_nonpersistent = iand(swap_mode,psb_swap_nonpersistent_) /= 0
halo_time = 0
do iter = 1, num_iterations
xa = iv
xa(nrow+1:ncol) = -1
call x_col%set_vect(xa)
call MPI_Barrier(MPI_COMM_WORLD,ierr)
halo_t_start = MPI_Wtime()
call psb_halo(x_col,desc_a,info,mode=swap_mode)
halo_t_end = MPI_Wtime() - halo_t_start
call MPI_Barrier(MPI_COMM_WORLD,ierr)
halo_time = halo_time + halo_t_end
xa = x_col%get_vect()
do i=nrow+1,ncol
if (xa(i) /= iv(i)) then ! ha
write(0,*) me, ': MISMATCH i=',i,"xa(i)=",xa(i),"iv(i)",iv(i)
goto 9999
end if
end do
end do
! call MPI_Barrier(MPI_COMM_WORLD, ierr)
! do i=0,np
! if (me .eq. i) then
! print *, "======================="
! print *, "me = ", me, " xa after halo"
! print '(225 F4.0)', xa
! print *, "me = ", me, " iv after halo"
! print '(225 I4)', iv
! print *, "-----------------------"
! end if
! call MPI_Barrier(MPI_COMM_WORLD, ierr)
! end do
! call MPI_Barrier(MPI_COMM_WORLD, ierr)
! goto 9999
call MPI_Barrier(MPI_COMM_WORLD, ierr)
call flush(output_unit)
call MPI_Barrier(MPI_COMM_WORLD, ierr)
if (allocated(x_col%v)) then
if (allocated(x_col%v%p)) then
! -- collect times ---
! halo time
call MPI_Reduce(halo_time, sum_halo_t, 1, &
MPI_DOUBLE_PRECISION, MPI_SUM, psb_root_, MPI_COMM_WORLD, ierr)
! total time
call MPI_Reduce(x_col%v%p%total_time, total_time, 1, &
MPI_DOUBLE_PRECISION, MPI_SUM, psb_root_, MPI_COMM_WORLD, ierr)
if (swap_persistent .or. swap_nonpersistent) then
! collect communication times
call median(x_col%v%p%alltoall_comm_time_a(1:x_col%v%p%comm_count), &
median_comm_t, min_comm_t, max_comm_t)
call MPI_Reduce(median_comm_t, sum_median_comm_t, 1, &
MPI_DOUBLE_PRECISION, MPI_SUM, psb_root_, MPI_COMM_WORLD, ierr)
call MPI_Reduce(min_comm_t, sum_min_comm_t, 1, &
MPI_DOUBLE_PRECISION, MPI_SUM, psb_root_, MPI_COMM_WORLD, ierr)
call MPI_Reduce(max_comm_t, sum_max_comm_t, 1, &
MPI_DOUBLE_PRECISION, MPI_SUM, psb_root_, MPI_COMM_WORLD, ierr)
call MPI_Reduce(x_col%v%p%alltoall_comm_time, alltoall_comm_t, 1, &
MPI_DOUBLE_PRECISION, MPI_SUM, psb_root_, MPI_COMM_WORLD, ierr)
end if
if (swap_persistent) then
! collect MPIX request initialization time
call MPI_Reduce(x_col%v%p%request_create_time, request_create_t, 1, MPI_DOUBLE_PRECISION, MPI_SUM, &
psb_root_, MPI_COMM_WORLD, ierr)
end if
! collect min, max, and average snd/rcv elements
tot_snd = sum(x_col%v%p%snd_counts)
call MPI_Reduce(tot_snd, sum_tot_snd, 1, MPI_INTEGER, MPI_SUM, &
psb_root_, MPI_COMM_WORLD, ierr)
ave_tot_snd = sum_tot_snd / np
ave_snd = real(tot_snd) / real(x_col%v%p%snd_count)
call MPI_Reduce(ave_snd, ave_snd_buf, 1, MPI_REAL, &
MPI_SUM, psb_root_, MPI_COMM_WORLD, ierr)
ave_snd_buf = ave_snd_buf / np
print *, "snd_counts = ", x_col%v%p%snd_counts
call MPI_Reduce(minval(x_col%v%p%snd_counts), min_snd, 1, MPI_INTEGER, MPI_MIN, &
psb_root_, MPI_COMM_WORLD, ierr)
call MPI_Reduce(maxval(x_col%v%p%snd_counts), max_snd, 1, MPI_INTEGER, MPI_MAX, &
psb_root_, MPI_COMM_WORLD, ierr)
! collect min, max, and average rcv elements
tot_rcv = sum(x_col%v%p%rcv_counts)
call MPI_Reduce(tot_rcv, sum_tot_rcv, 1, MPI_INTEGER, MPI_SUM, &
psb_root_, MPI_COMM_WORLD, ierr)
ave_tot_rcv = sum_tot_rcv / np
ave_rcv = real(tot_rcv) / real(x_col%v%p%rcv_count)
call MPI_Reduce(ave_rcv, ave_rcv_buf, 1, MPI_REAL, &
MPI_SUM, psb_root_, MPI_COMM_WORLD, ierr)
ave_rcv_buf = ave_rcv_buf / np
call MPI_Reduce(minval(x_col%v%p%rcv_counts), min_rcv, 1, MPI_INTEGER, MPI_MIN, &
psb_root_, MPI_COMM_WORLD, ierr)
call MPI_Reduce(maxval(x_col%v%p%rcv_counts), max_rcv, 1, MPI_INTEGER, MPI_MAX, &
psb_root_, MPI_COMM_WORLD, ierr)
! collect min, max, and average number of neighbors
num_neighbors = size(x_col%v%p%snd_to, dim=1)
call MPI_Reduce(num_neighbors, sum_neighbors, 1, MPI_INTEGER, MPI_SUM, &
psb_root_, MPI_COMM_WORLD, ierr)
ave_neighbors = REAL(sum_neighbors) / np
call MPI_Reduce(num_neighbors, min_neighbors, 1, MPI_INTEGER, MPI_MIN, &
psb_root_, MPI_COMM_WORLD, ierr)
call MPI_Reduce(num_neighbors, max_neighbors, 1, MPI_INTEGER, MPI_MAX, &
psb_root_, MPI_COMM_WORLD, ierr)
if (me == psb_root_) then
inquire(file='halo_output.txt', exist=file_exists)
if (.not. file_exists) then
open(1,file='halo_output.txt')
write(1,'(A)',advance='no') "partition;"
write(1,'(A)',advance='no') "np;"
write(1,'(A)',advance='no') "num_iterations;"
write(1,'(A)',advance='no') "swap_mode;"
write(1,'(A)',advance='no') "ave_halo_t_pi;"
! write(1,'(A)',advance='no') "total_time;"
! write(1,'(A)',advance='no') "ave_time_pp;"
write(1,'(A)',advance='no') "ave_time_pi;"
write(1,'(A)',advance='no') "alltoall_comm_t;"
write(1,'(A)',advance='no') "ave_alltoall_comm_t;"
write(1,'(A)',advance='no') "ave_alltoall_median_comm_t;"
write(1,'(A)',advance='no') "ave_alltoall_max_comm_t;"
write(1,'(A)',advance='no') "ave_alltoall_min_comm_t;"
write(1,'(A)',advance='no') "request_create_t;"
write(1,'(A)',advance='no') "ave_request_create_t;"
write(1,'(A)',advance='no') "ave_neighbors;"
write(1,'(A)',advance='no') "min_neighbors;"
write(1,'(A)',advance='no') "max_neighbors;"
write(1,'(A)',advance='no') "ave_tot_snd;"
write(1,'(A)',advance='no') "ave_tot_rcv;"
write(1,'(A)',advance='no') "ave_snd_buf;"
write(1,'(A)',advance='no') "ave_rcv_buf;"
write(1,'(A)',advance='no') "max_snd;"
write(1,'(A)',advance='no') "max_rcv;"
write(1,'(A)',advance='no') "min_snd;"
write(1,'(A)',advance='yes')"min_rcv;"
else
open(1,file='halo_output.txt', access='APPEND')
end if
! write(1,*) "flt type max_buf[B] visits time[s] time[%] time/visit[us] region"
! write(1,*) ""
! converting microseconds
total_time = total_time * 1000000
ave_time_pp = total_time / np
ave_time_pi = total_time / (np * num_iterations)
sum_halo_t = sum_halo_t * 1000000
ave_halo_t_pi = sum_halo_t / (np * num_iterations)
alltoall_comm_t = alltoall_comm_t * 1000000
ave_alltoall_comm_t = alltoall_comm_t / (np * num_iterations)
sum_median_comm_t = sum_median_comm_t * 1000000
ave_alltoall_median_comm_t = sum_median_comm_t / np
sum_max_comm_t = sum_max_comm_t * 1000000
ave_alltoall_max_comm_t = sum_max_comm_t / np
sum_min_comm_t = sum_min_comm_t * 1000000
ave_alltoall_min_comm_t = sum_min_comm_t / np
! print *, "alltoall", alltoall_comm_t, ave_alltoall_comm_t
if (swap_persistent) then
ave_request_create_t = (request_create_t / np) * 1000000
request_create_t = request_create_t * 1000000
else if (swap_nonpersistent) then
ave_request_create_t = -1.0
request_create_t = -1.0
end if
! write(1,*, advance='no') 37 65 73 64
write(1,'(A5 A1)',advance='no') psb_toupper(part), ';'
write(1,'(I5 A1)',advance='no') np, ';'
write(1,'(I6 A1)',advance='no') num_iterations, ';'
write(1,'(I5 A1)' ,advance='no') swap_mode, ';'
write(1,'(I5 A1)' ,advance='no') ave_halo_t_pi, ';'
! write(1,'(F20.4 A1)',advance='no') total_time, ';'
! write(1,'(F20.4 A1)',advance='no') ave_time_pp, ';'
write(1,'(F20.4 A1)',advance='no') ave_time_pi, ';'
write(1,'(F20.4 A1)',advance='no') alltoall_comm_t, ';'
write(1,'(F20.4 A1)',advance='no') ave_alltoall_comm_t, ';'
write(1,'(F20.4 A1)',advance='no') ave_alltoall_median_comm_t, ';'
write(1,'(F20.4 A1)',advance='no') ave_alltoall_max_comm_t, ';'
write(1,'(F20.4 A1)',advance='no') ave_alltoall_min_comm_t, ';'
write(1,'(F10.2 A1)',advance='no') request_create_t, ';'
write(1,'(F10.2 A1)',advance='no') ave_request_create_t, ';'
write(1,'(F10.2 A1)',advance='no') ave_neighbors, ';'
write(1,'(I5 A1)' ,advance='no') min_neighbors, ';'
write(1,'(I5 A1)' ,advance='no') max_neighbors, ';'
write(1,'(F15.2 A1)',advance='no') ave_tot_snd, ';'
write(1,'(F15.2 A1)',advance='no') ave_tot_rcv, ';'
write(1,'(F15.2 A1)',advance='no') ave_snd_buf, ';'
write(1,'(F15.2 A1)',advance='no') ave_rcv_buf, ';'
write(1,'(I7 A1)',advance='no') max_snd, ';'
write(1,'(I7 A1)',advance='no') max_rcv, ';'
write(1,'(I7 A1)',advance='no') min_snd, ';'
write(1,'(I7 A1)',advance='no') min_rcv
! write(1,'(F9.2 A1)',advance='no') , ';'
! write(1,'(I5 A1)',advance='no') , ';'
write(1,'()')
! ---- need to write buffer size, min and average and max snd/rcv elements
! write(1,())
! ---- need to write buffer size, min and average and max neighbors
end if
end if
end if
call flush(output_unit)
call MPI_Barrier(MPI_COMM_WORLD, ierr)
if (me == psb_root_) then
print *, "did halo communication", num_iterations, "times"
end if
998 format(i8,4(2x,g20.14))
993 format(i6,4(1x,e12.6))
! print *, "* calling free funcs"
! call psb_gefree(b_col, desc_a,info)
call psb_gefree(x_col, desc_a,info)
call psb_spfree(a, desc_a,info)
call prec%free(info)
call psb_cdfree(desc_a,info)
call psb_exit(ictxt)
if (me .eq. 0) then
print *, "--HALO TEST FIN--"
end if
stop
9999 call psb_error(ictxt)
stop
contains
subroutine sort(n, a)
implicit none
integer :: n, i, j
real(psb_dpk_) :: a(n), x
do i = 2, n
x = a(i)
j = i - 1
do while (j >= 1)
if (a(j) <= x) exit
a(j + 1) = a(j)
j = j - 1
end do
a(j + 1) = x
end do
end subroutine sort
subroutine median(a, median_val, min, max)
real(psb_dpk_), dimension(:), intent(in) :: a
real(psb_dpk_), intent(out) :: median_val, min, max
integer :: l, u
real(psb_dpk_), dimension(size(a,1)) :: ac
ac = a
! this is not an intrinsic: peek a sort algo from
! Category:Sorting, fixing it to work with real if
! it uses integer instead.
l = size(a,1)
call sort(l, ac)
if ( mod(l, 2) == 0 ) then
median_val = (ac(l/2+1) + ac(l/2))/2.0
else
median_val = ac(l/2+1)
end if
min = ac(1)
max = ac(ubound(ac, dim=1))
u = ubound(ac, dim=1)
end subroutine median
end program psb_df_sample