You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
559 lines
20 KiB
Fortran
559 lines
20 KiB
Fortran
|
|
!
|
|
! Parallel Sparse BLAS version 3.5
|
|
! (C) Copyright 2006-2018
|
|
! Salvatore Filippone
|
|
! Alfredo Buttari
|
|
!
|
|
! Redistribution and use in source and binary forms, with or without
|
|
! modification, are permitted provided that the following conditions
|
|
! are met:
|
|
! 1. Redistributions of source code must retain the above copyright
|
|
! notice, this list of conditions and the following disclaimer.
|
|
! 2. Redistributions in binary form must reproduce the above copyright
|
|
! notice, this list of conditions, and the following disclaimer in the
|
|
! documentation and/or other materials provided with the distribution.
|
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
|
! not be used to endorse or promote products derived from this
|
|
! software without specific written permission.
|
|
!
|
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
! POSSIBILITY OF SUCH DAMAGE.
|
|
!
|
|
!
|
|
|
|
|
|
program psb_df_sample
|
|
use psb_base_mod
|
|
use psb_prec_mod
|
|
use psb_krylov_mod
|
|
use psb_util_mod
|
|
use getp
|
|
use mpi
|
|
implicit none
|
|
|
|
! input parameters
|
|
character(len=40) :: kmethd, ptype, mtrx_file, rhs_file,renum
|
|
|
|
! sparse matrices
|
|
type(psb_dspmat_type) :: a, aux_a
|
|
|
|
! preconditioner data
|
|
type(psb_dprec_type) :: prec
|
|
|
|
! dense matrices
|
|
real(psb_dpk_), allocatable, target :: aux_b(:,:), d(:)
|
|
real(psb_dpk_), allocatable , save :: x_col_glob(:), r_col_glob(:)
|
|
real(psb_dpk_), pointer :: b_col_glob(:)
|
|
type(psb_d_vect_type) :: b_col, x_col, r_col
|
|
|
|
! communications data structure
|
|
type(psb_desc_type):: desc_a
|
|
|
|
integer(psb_ipk_) :: ictxt, me, np
|
|
|
|
! solver paramters
|
|
integer(psb_ipk_) :: iter, itmax, ierr, itrace, ircode,&
|
|
& methd, istopc, irst
|
|
integer(psb_epk_) :: amatsize, precsize, descsize
|
|
real(psb_dpk_) :: err, eps, cond
|
|
|
|
character(len=5) :: afmt
|
|
character(len=20) :: name, part
|
|
character(len=2) :: filefmt
|
|
integer(psb_ipk_), parameter :: iunit=12
|
|
integer(psb_ipk_) :: iparm(20)
|
|
|
|
! other variables
|
|
integer(psb_ipk_) :: i,info,j,m_problem, err_act
|
|
integer(psb_ipk_) :: internal, m,ii,nnzero
|
|
real(psb_dpk_) :: t1, t2, tprec, t_low, t_high, ave_request_create_t, request_create_t
|
|
real(psb_dpk_) :: r_amax, b_amax, scale,resmx,resmxp
|
|
integer(psb_ipk_) :: nrhs, nrow, n_row, dim, ne, nv, ncol
|
|
integer(psb_ipk_), allocatable :: ivg(:), perm(:)
|
|
integer(psb_ipk_), allocatable :: ipv(:)
|
|
character(len=40) :: fname, fnout
|
|
|
|
! persistent communication
|
|
integer(psb_lpk_), allocatable :: iv(:)
|
|
real(psb_dpk_), allocatable :: xa(:)
|
|
integer(psb_ipk_) :: num_iterations, num_neighbors, &
|
|
min_neighbors, max_neighbors, &
|
|
sum_neighbors
|
|
integer(psb_ipk_) :: sum_snd, min_snd, max_snd, num_snd, tot_snd, sum_tot_snd
|
|
integer(psb_ipk_) :: sum_rcv, min_rcv, max_rcv, num_rcv, tot_rcv, sum_tot_rcv
|
|
real(psb_spk_) :: ave_neighbors, ave_snd_buf, ave_rcv_buf, ave_tot_snd, ave_tot_rcv
|
|
real(psb_dpk_) :: alltoall_comm_t, ave_alltoall_comm_t, total_time, &
|
|
ave_time_pp, ave_time_pi
|
|
real(psb_dpk_) :: median_comm_t, ave_alltoall_median_comm_t, min_comm_t, max_comm_t, &
|
|
ave_min_comm_t, ave_max_comm_t, sum_median_comm_t, sum_min_comm_t, sum_max_comm_t, &
|
|
ave_alltoall_max_comm_t, ave_alltoall_min_comm_t
|
|
real(psb_dpk_) :: halo_t_start, halo_t_end, ave_halo_t_pi, &
|
|
sum_halo_t, halo_time
|
|
real(psb_spk_) :: ave_snd, ave_rcv
|
|
integer(psb_ipk_) :: swap_mode
|
|
logical :: swap_persistent, swap_nonpersistent, file_exists
|
|
|
|
call psb_init(ictxt)
|
|
call psb_info(ictxt,me,np)
|
|
|
|
if (me < 0) then
|
|
! This should not happen, but just in case
|
|
call psb_exit(ictxt)
|
|
stop
|
|
endif
|
|
|
|
|
|
name='psb_df_sample'
|
|
if(psb_errstatus_fatal()) goto 9999
|
|
info=psb_success_
|
|
call psb_set_errverbosity(itwo)
|
|
!
|
|
! Hello world
|
|
!
|
|
if (me == psb_root_) then
|
|
write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_
|
|
write(*,*) 'This is the ',trim(name),' sample program'
|
|
end if
|
|
!
|
|
! get parameters
|
|
!
|
|
call get_parms(ictxt,mtrx_file,rhs_file,filefmt,kmethd,ptype,&
|
|
& part,afmt,istopc,itmax,itrace,irst,eps)
|
|
|
|
call psb_barrier(ictxt)
|
|
t1 = psb_wtime()
|
|
! read the input matrix to be processed and (possibly) the rhs
|
|
nrhs = 1
|
|
|
|
if (me == psb_root_) then
|
|
select case(psb_toupper(filefmt))
|
|
case('MM')
|
|
! For Matrix Market we have an input file for the matrix
|
|
! and an (optional) second file for the RHS.
|
|
call mm_mat_read(aux_a,info,iunit=iunit,filename=mtrx_file)
|
|
if (info == psb_success_) then
|
|
if (rhs_file /= 'NONE') then
|
|
call mm_array_read(aux_b,info,iunit=iunit,filename=rhs_file)
|
|
end if
|
|
end if
|
|
|
|
case ('HB')
|
|
! For Harwell-Boeing we have a single file which may or may not
|
|
! contain an RHS.
|
|
call hb_read(aux_a,info,iunit=iunit,b=aux_b,filename=mtrx_file)
|
|
|
|
case default
|
|
info = -1
|
|
write(psb_err_unit,*) 'Wrong choice for fileformat ', filefmt
|
|
end select
|
|
if (info /= psb_success_) then
|
|
write(psb_err_unit,*) 'Error while reading input matrix '
|
|
call psb_abort(ictxt)
|
|
end if
|
|
|
|
m_problem = aux_a%get_nrows()
|
|
call psb_bcast(ictxt,m_problem)
|
|
call psb_mat_renum(psb_mat_renum_identity_,aux_a,info,perm)
|
|
|
|
! At this point aux_b may still be unallocated
|
|
if (size(aux_b,dim=1) == m_problem) then
|
|
! if any rhs were present, broadcast the first one
|
|
write(psb_err_unit,'("Ok, got an rhs ")')
|
|
b_col_glob =>aux_b(:,1)
|
|
call psb_gelp('N',perm(1:m_problem),&
|
|
& b_col_glob(1:m_problem),info)
|
|
else
|
|
write(psb_out_unit,'("Generating an rhs...")')
|
|
write(psb_out_unit,'(" ")')
|
|
call psb_realloc(m_problem,1,aux_b,ircode)
|
|
if (ircode /= 0) then
|
|
call psb_errpush(psb_err_alloc_dealloc_,name)
|
|
goto 9999
|
|
endif
|
|
|
|
b_col_glob => aux_b(:,1)
|
|
do i=1, m_problem
|
|
b_col_glob(i) = done
|
|
enddo
|
|
endif
|
|
|
|
else
|
|
call psb_bcast(ictxt,m_problem)
|
|
|
|
end if
|
|
|
|
! switch over different partition types
|
|
select case(psb_toupper(part))
|
|
case('BLOCK')
|
|
if (me == psb_root_) write(psb_out_unit,'("Partition type: block")')
|
|
call psb_matdist(aux_a, a, ictxt,desc_a,info,fmt=afmt,parts=part_block)
|
|
! call desc_a%cd_get_list
|
|
! call desc_a%cd_v_get_list
|
|
! print *, "desc_a%v_halo_index", desc_a%v_get_list()
|
|
|
|
|
|
case('GRAPH')
|
|
if (me == psb_root_) then
|
|
write(psb_out_unit,'("Partition type: graph vector")')
|
|
write(psb_out_unit,'(" ")')
|
|
! write(psb_err_unit,'("Build type: graph")')
|
|
call build_mtpart(aux_a,np)
|
|
|
|
endif
|
|
call psb_barrier(ictxt)
|
|
call distr_mtpart(psb_root_,ictxt)
|
|
call getv_mtpart(ivg)
|
|
call psb_matdist(aux_a, a, ictxt,desc_a,info,fmt=afmt,v=ivg)
|
|
|
|
case default
|
|
if (me == psb_root_) write(psb_out_unit,'("Partition type: block")')
|
|
call psb_matdist(aux_a, a, ictxt,desc_a,info,fmt=afmt,parts=part_block)
|
|
|
|
end select
|
|
|
|
! call psb_scatter(b_col_glob,b_col,desc_a,info,root=psb_root_)
|
|
call psb_geall(x_col,desc_a,info)
|
|
|
|
call x_col%zero() ! change to mpi rank? Artless
|
|
!
|
|
nrow=desc_a%get_local_rows() ! number of rows
|
|
ncol=desc_a%get_local_cols() ! number of columns
|
|
iv = [ (i,i=1,ncol)]
|
|
|
|
! call psb_loc_to_glob(iv,desc_a,info) !
|
|
call desc_a%l2gv1(iv,info)
|
|
xa = iv
|
|
! call desc_a%locgv1(iv,info)
|
|
|
|
! ---- test where halo regions are set to -1, sending 10+me
|
|
|
|
! xa(nrow+1:ncol) = -1 ! (10 + me) * -1
|
|
! ! call MPI_Barrier(MPI_COMM_WORLD, ierr)
|
|
! ! do i=0,np
|
|
! ! if (me .eq. i) then
|
|
! ! print *, "======================="
|
|
! ! print *, "me = ", me, "iv ="
|
|
! ! print '(225 I4)', iv
|
|
! ! print *, "-----------------------"
|
|
! ! end if
|
|
! ! call MPI_Barrier(MPI_COMM_WORLD, ierr)
|
|
! ! end do
|
|
! ! call MPI_Barrier(MPI_COMM_WORLD, ierr)
|
|
|
|
! ! call MPI_Barrier(MPI_COMM_WORLD, ierr)
|
|
! ! do i=0,np
|
|
! ! if (me .eq. i) then
|
|
! ! print *, "======================="
|
|
! ! print *, "me = ", me, "xa ="
|
|
! ! print '(225 F4.0)', xa
|
|
! ! print *, "-----------------------"
|
|
! ! end if
|
|
! ! call MPI_Barrier(MPI_COMM_WORLD, ierr)
|
|
! ! end do
|
|
! ! call MPI_Barrier(MPI_COMM_WORLD, ierr)
|
|
|
|
! call x_col%set_vect(xa)
|
|
! call MPI_Barrier(MPI_COMM_WORLD, ierr)
|
|
! call psb_halo(x_col,desc_a,info,mode=psb_swap_persistent_)
|
|
|
|
! xa = x_col%get_vect()
|
|
! do i=nrow+1,ncol
|
|
! if (xa(i) /= iv(i)) then ! ha
|
|
! write(0,*) me, ': MISMATCH i=',i,"xa(i)=",xa(i),"iv(i)",iv(i)
|
|
! ! goto 9999
|
|
! end if
|
|
! end do
|
|
num_iterations = ITERATIONS
|
|
swap_mode = SWAPCHOICE
|
|
swap_persistent = iand(swap_mode,psb_swap_persistent_) /= 0
|
|
swap_nonpersistent = iand(swap_mode,psb_swap_nonpersistent_) /= 0
|
|
halo_time = 0
|
|
do iter = 1, num_iterations
|
|
xa = iv
|
|
xa(nrow+1:ncol) = -1
|
|
call x_col%set_vect(xa)
|
|
call MPI_Barrier(MPI_COMM_WORLD,ierr)
|
|
halo_t_start = MPI_Wtime()
|
|
call psb_halo(x_col,desc_a,info,mode=swap_mode)
|
|
halo_t_end = MPI_Wtime() - halo_t_start
|
|
call MPI_Barrier(MPI_COMM_WORLD,ierr)
|
|
halo_time = halo_time + halo_t_end
|
|
|
|
xa = x_col%get_vect()
|
|
|
|
do i=nrow+1,ncol
|
|
if (xa(i) /= iv(i)) then ! ha
|
|
write(0,*) me, ': MISMATCH i=',i,"xa(i)=",xa(i),"iv(i)",iv(i)
|
|
goto 9999
|
|
end if
|
|
end do
|
|
end do
|
|
|
|
! call MPI_Barrier(MPI_COMM_WORLD, ierr)
|
|
! do i=0,np
|
|
! if (me .eq. i) then
|
|
! print *, "======================="
|
|
! print *, "me = ", me, " xa after halo"
|
|
! print '(225 F4.0)', xa
|
|
! print *, "me = ", me, " iv after halo"
|
|
! print '(225 I4)', iv
|
|
! print *, "-----------------------"
|
|
! end if
|
|
! call MPI_Barrier(MPI_COMM_WORLD, ierr)
|
|
! end do
|
|
! call MPI_Barrier(MPI_COMM_WORLD, ierr)
|
|
! goto 9999
|
|
|
|
call MPI_Barrier(MPI_COMM_WORLD, ierr)
|
|
call flush(output_unit)
|
|
call MPI_Barrier(MPI_COMM_WORLD, ierr)
|
|
if (allocated(x_col%v)) then
|
|
if (allocated(x_col%v%p)) then
|
|
! -- collect times ---
|
|
! halo time
|
|
call MPI_Reduce(halo_time, sum_halo_t, 1, &
|
|
MPI_DOUBLE_PRECISION, MPI_SUM, psb_root_, MPI_COMM_WORLD, ierr)
|
|
|
|
! total time
|
|
call MPI_Reduce(x_col%v%p%total_time, total_time, 1, &
|
|
MPI_DOUBLE_PRECISION, MPI_SUM, psb_root_, MPI_COMM_WORLD, ierr)
|
|
|
|
if (swap_persistent .or. swap_nonpersistent) then
|
|
! collect communication times
|
|
call median(x_col%v%p%alltoall_comm_time_a(1:x_col%v%p%comm_count), &
|
|
median_comm_t, min_comm_t, max_comm_t)
|
|
|
|
call MPI_Reduce(median_comm_t, sum_median_comm_t, 1, &
|
|
MPI_DOUBLE_PRECISION, MPI_SUM, psb_root_, MPI_COMM_WORLD, ierr)
|
|
call MPI_Reduce(min_comm_t, sum_min_comm_t, 1, &
|
|
MPI_DOUBLE_PRECISION, MPI_SUM, psb_root_, MPI_COMM_WORLD, ierr)
|
|
call MPI_Reduce(max_comm_t, sum_max_comm_t, 1, &
|
|
MPI_DOUBLE_PRECISION, MPI_SUM, psb_root_, MPI_COMM_WORLD, ierr)
|
|
|
|
call MPI_Reduce(x_col%v%p%alltoall_comm_time, alltoall_comm_t, 1, &
|
|
MPI_DOUBLE_PRECISION, MPI_SUM, psb_root_, MPI_COMM_WORLD, ierr)
|
|
end if
|
|
if (swap_persistent) then
|
|
! collect MPIX request initialization time
|
|
call MPI_Reduce(x_col%v%p%request_create_time, request_create_t, 1, MPI_DOUBLE_PRECISION, MPI_SUM, &
|
|
psb_root_, MPI_COMM_WORLD, ierr)
|
|
end if
|
|
! collect min, max, and average snd/rcv elements
|
|
tot_snd = sum(x_col%v%p%snd_counts)
|
|
call MPI_Reduce(tot_snd, sum_tot_snd, 1, MPI_INTEGER, MPI_SUM, &
|
|
psb_root_, MPI_COMM_WORLD, ierr)
|
|
ave_tot_snd = sum_tot_snd / np
|
|
ave_snd = real(tot_snd) / real(x_col%v%p%snd_count)
|
|
call MPI_Reduce(ave_snd, ave_snd_buf, 1, MPI_REAL, &
|
|
MPI_SUM, psb_root_, MPI_COMM_WORLD, ierr)
|
|
ave_snd_buf = ave_snd_buf / np
|
|
print *, "snd_counts = ", x_col%v%p%snd_counts
|
|
call MPI_Reduce(minval(x_col%v%p%snd_counts), min_snd, 1, MPI_INTEGER, MPI_MIN, &
|
|
psb_root_, MPI_COMM_WORLD, ierr)
|
|
call MPI_Reduce(maxval(x_col%v%p%snd_counts), max_snd, 1, MPI_INTEGER, MPI_MAX, &
|
|
psb_root_, MPI_COMM_WORLD, ierr)
|
|
! collect min, max, and average rcv elements
|
|
tot_rcv = sum(x_col%v%p%rcv_counts)
|
|
call MPI_Reduce(tot_rcv, sum_tot_rcv, 1, MPI_INTEGER, MPI_SUM, &
|
|
psb_root_, MPI_COMM_WORLD, ierr)
|
|
ave_tot_rcv = sum_tot_rcv / np
|
|
ave_rcv = real(tot_rcv) / real(x_col%v%p%rcv_count)
|
|
call MPI_Reduce(ave_rcv, ave_rcv_buf, 1, MPI_REAL, &
|
|
MPI_SUM, psb_root_, MPI_COMM_WORLD, ierr)
|
|
ave_rcv_buf = ave_rcv_buf / np
|
|
call MPI_Reduce(minval(x_col%v%p%rcv_counts), min_rcv, 1, MPI_INTEGER, MPI_MIN, &
|
|
psb_root_, MPI_COMM_WORLD, ierr)
|
|
call MPI_Reduce(maxval(x_col%v%p%rcv_counts), max_rcv, 1, MPI_INTEGER, MPI_MAX, &
|
|
psb_root_, MPI_COMM_WORLD, ierr)
|
|
|
|
! collect min, max, and average number of neighbors
|
|
num_neighbors = size(x_col%v%p%snd_to, dim=1)
|
|
call MPI_Reduce(num_neighbors, sum_neighbors, 1, MPI_INTEGER, MPI_SUM, &
|
|
psb_root_, MPI_COMM_WORLD, ierr)
|
|
ave_neighbors = REAL(sum_neighbors) / np
|
|
call MPI_Reduce(num_neighbors, min_neighbors, 1, MPI_INTEGER, MPI_MIN, &
|
|
psb_root_, MPI_COMM_WORLD, ierr)
|
|
call MPI_Reduce(num_neighbors, max_neighbors, 1, MPI_INTEGER, MPI_MAX, &
|
|
psb_root_, MPI_COMM_WORLD, ierr)
|
|
|
|
if (me == psb_root_) then
|
|
inquire(file='halo_output.txt', exist=file_exists)
|
|
if (.not. file_exists) then
|
|
open(1,file='halo_output.txt')
|
|
write(1,'(A)',advance='no') "partition;"
|
|
write(1,'(A)',advance='no') "np;"
|
|
write(1,'(A)',advance='no') "num_iterations;"
|
|
write(1,'(A)',advance='no') "swap_mode;"
|
|
write(1,'(A)',advance='no') "ave_halo_t_pi;"
|
|
! write(1,'(A)',advance='no') "total_time;"
|
|
! write(1,'(A)',advance='no') "ave_time_pp;"
|
|
write(1,'(A)',advance='no') "ave_time_pi;"
|
|
write(1,'(A)',advance='no') "alltoall_comm_t;"
|
|
write(1,'(A)',advance='no') "ave_alltoall_comm_t;"
|
|
write(1,'(A)',advance='no') "ave_alltoall_median_comm_t;"
|
|
write(1,'(A)',advance='no') "ave_alltoall_max_comm_t;"
|
|
write(1,'(A)',advance='no') "ave_alltoall_min_comm_t;"
|
|
write(1,'(A)',advance='no') "request_create_t;"
|
|
write(1,'(A)',advance='no') "ave_request_create_t;"
|
|
write(1,'(A)',advance='no') "ave_neighbors;"
|
|
|
|
write(1,'(A)',advance='no') "min_neighbors;"
|
|
write(1,'(A)',advance='no') "max_neighbors;"
|
|
write(1,'(A)',advance='no') "ave_tot_snd;"
|
|
write(1,'(A)',advance='no') "ave_tot_rcv;"
|
|
write(1,'(A)',advance='no') "ave_snd_buf;"
|
|
write(1,'(A)',advance='no') "ave_rcv_buf;"
|
|
write(1,'(A)',advance='no') "max_snd;"
|
|
write(1,'(A)',advance='no') "max_rcv;"
|
|
write(1,'(A)',advance='no') "min_snd;"
|
|
write(1,'(A)',advance='yes')"min_rcv;"
|
|
else
|
|
open(1,file='halo_output.txt', access='APPEND')
|
|
end if
|
|
|
|
|
|
! write(1,*) "flt type max_buf[B] visits time[s] time[%] time/visit[us] region"
|
|
! write(1,*) ""
|
|
! converting microseconds
|
|
total_time = total_time * 1000000
|
|
ave_time_pp = total_time / np
|
|
ave_time_pi = total_time / (np * num_iterations)
|
|
sum_halo_t = sum_halo_t * 1000000
|
|
ave_halo_t_pi = sum_halo_t / (np * num_iterations)
|
|
alltoall_comm_t = alltoall_comm_t * 1000000
|
|
ave_alltoall_comm_t = alltoall_comm_t / (np * num_iterations)
|
|
sum_median_comm_t = sum_median_comm_t * 1000000
|
|
ave_alltoall_median_comm_t = sum_median_comm_t / np
|
|
sum_max_comm_t = sum_max_comm_t * 1000000
|
|
ave_alltoall_max_comm_t = sum_max_comm_t / np
|
|
sum_min_comm_t = sum_min_comm_t * 1000000
|
|
ave_alltoall_min_comm_t = sum_min_comm_t / np
|
|
|
|
! print *, "alltoall", alltoall_comm_t, ave_alltoall_comm_t
|
|
if (swap_persistent) then
|
|
ave_request_create_t = (request_create_t / np) * 1000000
|
|
request_create_t = request_create_t * 1000000
|
|
else if (swap_nonpersistent) then
|
|
ave_request_create_t = -1.0
|
|
request_create_t = -1.0
|
|
end if
|
|
|
|
! write(1,*, advance='no') 37 65 73 64
|
|
|
|
write(1,'(A5 A1)',advance='no') psb_toupper(part), ';'
|
|
write(1,'(I5 A1)',advance='no') np, ';'
|
|
write(1,'(I6 A1)',advance='no') num_iterations, ';'
|
|
write(1,'(I5 A1)' ,advance='no') swap_mode, ';'
|
|
write(1,'(I5 A1)' ,advance='no') ave_halo_t_pi, ';'
|
|
! write(1,'(F20.4 A1)',advance='no') total_time, ';'
|
|
! write(1,'(F20.4 A1)',advance='no') ave_time_pp, ';'
|
|
write(1,'(F20.4 A1)',advance='no') ave_time_pi, ';'
|
|
write(1,'(F20.4 A1)',advance='no') alltoall_comm_t, ';'
|
|
write(1,'(F20.4 A1)',advance='no') ave_alltoall_comm_t, ';'
|
|
write(1,'(F20.4 A1)',advance='no') ave_alltoall_median_comm_t, ';'
|
|
write(1,'(F20.4 A1)',advance='no') ave_alltoall_max_comm_t, ';'
|
|
write(1,'(F20.4 A1)',advance='no') ave_alltoall_min_comm_t, ';'
|
|
write(1,'(F10.2 A1)',advance='no') request_create_t, ';'
|
|
write(1,'(F10.2 A1)',advance='no') ave_request_create_t, ';'
|
|
write(1,'(F10.2 A1)',advance='no') ave_neighbors, ';'
|
|
|
|
write(1,'(I5 A1)' ,advance='no') min_neighbors, ';'
|
|
write(1,'(I5 A1)' ,advance='no') max_neighbors, ';'
|
|
write(1,'(F15.2 A1)',advance='no') ave_tot_snd, ';'
|
|
write(1,'(F15.2 A1)',advance='no') ave_tot_rcv, ';'
|
|
write(1,'(F15.2 A1)',advance='no') ave_snd_buf, ';'
|
|
write(1,'(F15.2 A1)',advance='no') ave_rcv_buf, ';'
|
|
write(1,'(I7 A1)',advance='no') max_snd, ';'
|
|
write(1,'(I7 A1)',advance='no') max_rcv, ';'
|
|
write(1,'(I7 A1)',advance='no') min_snd, ';'
|
|
write(1,'(I7 A1)',advance='no') min_rcv
|
|
|
|
! write(1,'(F9.2 A1)',advance='no') , ';'
|
|
! write(1,'(I5 A1)',advance='no') , ';'
|
|
write(1,'()')
|
|
|
|
! ---- need to write buffer size, min and average and max snd/rcv elements
|
|
! write(1,())
|
|
! ---- need to write buffer size, min and average and max neighbors
|
|
end if
|
|
end if
|
|
end if
|
|
call flush(output_unit)
|
|
call MPI_Barrier(MPI_COMM_WORLD, ierr)
|
|
if (me == psb_root_) then
|
|
print *, "did halo communication", num_iterations, "times"
|
|
end if
|
|
|
|
998 format(i8,4(2x,g20.14))
|
|
993 format(i6,4(1x,e12.6))
|
|
|
|
! print *, "* calling free funcs"
|
|
! call psb_gefree(b_col, desc_a,info)
|
|
call psb_gefree(x_col, desc_a,info)
|
|
call psb_spfree(a, desc_a,info)
|
|
call prec%free(info)
|
|
call psb_cdfree(desc_a,info)
|
|
call psb_exit(ictxt)
|
|
if (me .eq. 0) then
|
|
print *, "--HALO TEST FIN--"
|
|
end if
|
|
stop
|
|
|
|
9999 call psb_error(ictxt)
|
|
|
|
stop
|
|
|
|
|
|
contains
|
|
subroutine sort(n, a)
|
|
implicit none
|
|
integer :: n, i, j
|
|
real(psb_dpk_) :: a(n), x
|
|
|
|
do i = 2, n
|
|
x = a(i)
|
|
j = i - 1
|
|
do while (j >= 1)
|
|
if (a(j) <= x) exit
|
|
a(j + 1) = a(j)
|
|
j = j - 1
|
|
end do
|
|
a(j + 1) = x
|
|
end do
|
|
end subroutine sort
|
|
|
|
subroutine median(a, median_val, min, max)
|
|
real(psb_dpk_), dimension(:), intent(in) :: a
|
|
real(psb_dpk_), intent(out) :: median_val, min, max
|
|
|
|
integer :: l, u
|
|
real(psb_dpk_), dimension(size(a,1)) :: ac
|
|
|
|
ac = a
|
|
! this is not an intrinsic: peek a sort algo from
|
|
! Category:Sorting, fixing it to work with real if
|
|
! it uses integer instead.
|
|
l = size(a,1)
|
|
call sort(l, ac)
|
|
|
|
if ( mod(l, 2) == 0 ) then
|
|
median_val = (ac(l/2+1) + ac(l/2))/2.0
|
|
else
|
|
median_val = ac(l/2+1)
|
|
end if
|
|
min = ac(1)
|
|
max = ac(ubound(ac, dim=1))
|
|
u = ubound(ac, dim=1)
|
|
end subroutine median
|
|
end program psb_df_sample
|