scr-persistent-collective
Soren Rasmussen 6 years ago
parent cbc7a87acd
commit 8311c65d2f

@ -2,9 +2,9 @@ INSTALLDIR=../..
INCDIR=$(INSTALLDIR)/include/ INCDIR=$(INSTALLDIR)/include/
MODDIR=$(INSTALLDIR)/modules/ MODDIR=$(INSTALLDIR)/modules/
include $(INCDIR)/Make.inc.psblas include $(INCDIR)/Make.inc.psblas
P=4 P=8
debug= debug=
FFLAGS += -cpp -DITERATIONS=$(ITERATIONS) FFLAGS += -cpp -DITERATIONS=$(ITERATIONS) -DSWAPCHOICE=$(SWAPCHOICE)
FFLAGS += $(debug) FFLAGS += $(debug)
FCOPT += $(FFLAGS) FCOPT += $(FFLAGS)
scorep_vars=SCOREP_WRAPPER_COMPILER_FLAGS="-O2" \ scorep_vars=SCOREP_WRAPPER_COMPILER_FLAGS="-O2" \
@ -26,25 +26,25 @@ EXEDIR=./runs
all: psb_halo_df_test all: psb_halo_df_test
psb_halo_df_test: getp.o psb_halo_df_test.o psb_halo_df_test: getp.o psb_halo_df_test.o
$(scorep_opt) $(FLINK) $(LOPT) $^ -o halo_persistant_$(ITERATIONS)_iter $(PSBLAS_LIB) $(LDLIBS) $(scorep_opt) $(FLINK) $(LOPT) $^ -o halo_$(SWAPCHOICE)$(ITERATIONS)_iter $(PSBLAS_LIB) $(LDLIBS)
halo_test: getp.o psb_halo_df_test.o halo_test: getp.o psb_halo_df_test.o
$(scorep_opt) $(FLINK) $(LOPT) $^ -o halo_test $(PSBLAS_LIB) $(LDLIBS) $(scorep_opt) $(FLINK) $(LOPT) $^ -o halo_test $(PSBLAS_LIB) $(LDLIBS)
clean: clean:
rm -f *~ *.o psb_halo_df_test1 psb_halo_df_test halo_test \ rm -f *~ *.o psb_halo_df_test1 psb_halo_df_test halo_test \
halo_persistant_output.txt \
rm -rf test-results/* rm -rf test-results/*
/bin/rm -f $(DFOBJS) $(ZFOBJS) $(SFOBJS) $(CFOBJS)\ /bin/rm -f $(DFOBJS) $(ZFOBJS) $(SFOBJS) $(CFOBJS)\
*$(.mod) $(EXEDIR)/psb_*f_sample *$(.mod) $(EXEDIR)/psb_*f_sample
cleanall: clean cleanall: clean
rm -f halo_output.txt
rm -rf scorep-2019* rm -rf scorep-2019*
rm -rf scorep-failed-* rm -rf scorep-failed-*
score: score:
scorep-score -r $(scorep_dir)/profile.cubex | grep -e MPI -e 'type max_buf' scorep-score -r $(scorep_dir)/profile.cubex | grep -e MPI -e 'type max_buf'
run: psb_halo_df_test run: psb_halo_df_test
$(scorep_opt) mpirun -np $(P) ./psb_halo_df_test_$(ITERATIONS) matrices/ck104/ck104.inp $(scorep_opt) mpirun -np $(P) ./halo_$(SWAPCHOICE)_$(ITERATIONS)_iter matrices/ck104/ck104.inp
r: halo_test r: halo_test
$(scorep_opt) mpirun -np $(P) ./halo_test matrices/ck104/ck104.inp $(scorep_opt) mpirun -np $(P) ./halo_test matrices/ck104/ck104.inp

@ -3,7 +3,7 @@
## MPI submission script for PBS on DELTA ## MPI submission script for PBS on DELTA
## -------------------------------------- ## --------------------------------------
#PBS -N umbrella_corp #PBS -N umbrella_corp
#PBS -l select=1:ncpus=16:mpiprocs=16 #PBS -l select=2:ncpus=16:mpiprocs=16
## ##
## half_hour - 30 minutes ## half_hour - 30 minutes
## one_hour - 1 hour ## one_hour - 1 hour
@ -55,7 +55,7 @@ ml
echo $PATH echo $PATH
which mpirun which mpirun
echo "========= START =========" echo "========= START ========="
timeout 5m cpuburn # timeout 5m cpuburn
./run_tests.sh ./run_tests.sh
# echo "mpirun -machinefile hostfile -ppn 1 -np ${cpus} ${PROG} -v" # echo "mpirun -machinefile hostfile -ppn 1 -np ${cpus} ${PROG} -v"
# export OMP_NUM_THREADS=16 # export OMP_NUM_THREADS=16

@ -92,14 +92,16 @@ program psb_df_sample
integer(psb_ipk_) :: sum_snd, min_snd, max_snd, num_snd, tot_snd, sum_tot_snd integer(psb_ipk_) :: sum_snd, min_snd, max_snd, num_snd, tot_snd, sum_tot_snd
integer(psb_ipk_) :: sum_rcv, min_rcv, max_rcv, num_rcv, tot_rcv, sum_tot_rcv integer(psb_ipk_) :: sum_rcv, min_rcv, max_rcv, num_rcv, tot_rcv, sum_tot_rcv
real(psb_spk_) :: ave_neighbors, ave_snd_buf, ave_rcv_buf, ave_tot_snd, ave_tot_rcv real(psb_spk_) :: ave_neighbors, ave_snd_buf, ave_rcv_buf, ave_tot_snd, ave_tot_rcv
real(psb_dpk_) :: alltoall_comm_t, ave_alltoall_comm_t, total_time, ave_time real(psb_dpk_) :: alltoall_comm_t, ave_alltoall_comm_t, total_time, &
ave_time_pp, ave_time_pi
real(psb_dpk_) :: median_comm_t, ave_alltoall_median_comm_t, min_comm_t, max_comm_t, & real(psb_dpk_) :: median_comm_t, ave_alltoall_median_comm_t, min_comm_t, max_comm_t, &
ave_min_comm_t, ave_max_comm_t, sum_median_comm_t, sum_min_comm_t, sum_max_comm_t, & ave_min_comm_t, ave_max_comm_t, sum_median_comm_t, sum_min_comm_t, sum_max_comm_t, &
ave_alltoall_max_comm_t, ave_alltoall_min_comm_t ave_alltoall_max_comm_t, ave_alltoall_min_comm_t
real(psb_dpk_) :: halo_t_start, halo_t_end, ave_halo_t_pi, &
sum_halo_t, halo_time
real(psb_spk_) :: ave_snd, ave_rcv real(psb_spk_) :: ave_snd, ave_rcv
integer(psb_ipk_) :: swap_mode integer(psb_ipk_) :: swap_mode
logical :: swap_persistent, swap_nonpersistent logical :: swap_persistent, swap_nonpersistent, file_exists
logical, parameter :: openfile=OPENFILE
call psb_init(ictxt) call psb_init(ictxt)
call psb_info(ictxt,me,np) call psb_info(ictxt,me,np)
@ -275,12 +277,17 @@ program psb_df_sample
swap_mode = SWAPCHOICE swap_mode = SWAPCHOICE
swap_persistent = iand(swap_mode,psb_swap_persistent_) /= 0 swap_persistent = iand(swap_mode,psb_swap_persistent_) /= 0
swap_nonpersistent = iand(swap_mode,psb_swap_nonpersistent_) /= 0 swap_nonpersistent = iand(swap_mode,psb_swap_nonpersistent_) /= 0
halo_time = 0
do iter = 1, num_iterations do iter = 1, num_iterations
xa = iv xa = iv
xa(nrow+1:ncol) = -1 xa(nrow+1:ncol) = -1
call x_col%set_vect(xa) call x_col%set_vect(xa)
call MPI_Barrier(MPI_COMM_WORLD,ierr)
halo_t_start = MPI_Wtime()
call psb_halo(x_col,desc_a,info,mode=swap_mode) call psb_halo(x_col,desc_a,info,mode=swap_mode)
halo_t_end = MPI_Wtime() - halo_t_start
call MPI_Barrier(MPI_COMM_WORLD,ierr)
halo_time = halo_time + halo_t_end
xa = x_col%get_vect() xa = x_col%get_vect()
@ -313,6 +320,10 @@ program psb_df_sample
if (allocated(x_col%v)) then if (allocated(x_col%v)) then
if (allocated(x_col%v%p)) then if (allocated(x_col%v%p)) then
! -- collect times --- ! -- collect times ---
! halo time
call MPI_Reduce(halo_time, sum_halo_t, 1, &
MPI_DOUBLE_PRECISION, MPI_SUM, psb_root_, MPI_COMM_WORLD, ierr)
! total time ! total time
call MPI_Reduce(x_col%v%p%total_time, total_time, 1, & call MPI_Reduce(x_col%v%p%total_time, total_time, 1, &
MPI_DOUBLE_PRECISION, MPI_SUM, psb_root_, MPI_COMM_WORLD, ierr) MPI_DOUBLE_PRECISION, MPI_SUM, psb_root_, MPI_COMM_WORLD, ierr)
@ -346,6 +357,7 @@ program psb_df_sample
call MPI_Reduce(ave_snd, ave_snd_buf, 1, MPI_REAL, & call MPI_Reduce(ave_snd, ave_snd_buf, 1, MPI_REAL, &
MPI_SUM, psb_root_, MPI_COMM_WORLD, ierr) MPI_SUM, psb_root_, MPI_COMM_WORLD, ierr)
ave_snd_buf = ave_snd_buf / np ave_snd_buf = ave_snd_buf / np
print *, "snd_counts = ", x_col%v%p%snd_counts
call MPI_Reduce(minval(x_col%v%p%snd_counts), min_snd, 1, MPI_INTEGER, MPI_MIN, & call MPI_Reduce(minval(x_col%v%p%snd_counts), min_snd, 1, MPI_INTEGER, MPI_MIN, &
psb_root_, MPI_COMM_WORLD, ierr) psb_root_, MPI_COMM_WORLD, ierr)
call MPI_Reduce(maxval(x_col%v%p%snd_counts), max_snd, 1, MPI_INTEGER, MPI_MAX, & call MPI_Reduce(maxval(x_col%v%p%snd_counts), max_snd, 1, MPI_INTEGER, MPI_MAX, &
@ -375,14 +387,17 @@ program psb_df_sample
psb_root_, MPI_COMM_WORLD, ierr) psb_root_, MPI_COMM_WORLD, ierr)
if (me == psb_root_) then if (me == psb_root_) then
if (openfile .and. (np .eq. 2)) then inquire(file='halo_output.txt', exist=file_exists)
if (.not. file_exists) then
open(1,file='halo_output.txt') open(1,file='halo_output.txt')
write(1,'(A)',advance='no') "partition;" write(1,'(A)',advance='no') "partition;"
write(1,'(A)',advance='no') "np;" write(1,'(A)',advance='no') "np;"
write(1,'(A)',advance='no') "num_iterations;" write(1,'(A)',advance='no') "num_iterations;"
write(1,'(A)',advance='no') "swap_mode;" write(1,'(A)',advance='no') "swap_mode;"
write(1,'(A)',advance='no') "total_time;" write(1,'(A)',advance='no') "ave_halo_t_pi;"
write(1,'(A)',advance='no') "ave_time;" ! write(1,'(A)',advance='no') "total_time;"
! write(1,'(A)',advance='no') "ave_time_pp;"
write(1,'(A)',advance='no') "ave_time_pi;"
write(1,'(A)',advance='no') "alltoall_comm_t;" write(1,'(A)',advance='no') "alltoall_comm_t;"
write(1,'(A)',advance='no') "ave_alltoall_comm_t;" write(1,'(A)',advance='no') "ave_alltoall_comm_t;"
write(1,'(A)',advance='no') "ave_alltoall_median_comm_t;" write(1,'(A)',advance='no') "ave_alltoall_median_comm_t;"
@ -411,7 +426,10 @@ program psb_df_sample
! write(1,*) "" ! write(1,*) ""
! converting microseconds ! converting microseconds
total_time = total_time * 1000000 total_time = total_time * 1000000
ave_time = total_time / np ave_time_pp = total_time / np
ave_time_pi = total_time / (np * num_iterations)
sum_halo_t = sum_halo_t * 1000000
ave_halo_t_pi = sum_halo_t / (np * num_iterations)
alltoall_comm_t = alltoall_comm_t * 1000000 alltoall_comm_t = alltoall_comm_t * 1000000
ave_alltoall_comm_t = alltoall_comm_t / (np * num_iterations) ave_alltoall_comm_t = alltoall_comm_t / (np * num_iterations)
sum_median_comm_t = sum_median_comm_t * 1000000 sum_median_comm_t = sum_median_comm_t * 1000000
@ -436,15 +454,17 @@ program psb_df_sample
write(1,'(I5 A1)',advance='no') np, ';' write(1,'(I5 A1)',advance='no') np, ';'
write(1,'(I6 A1)',advance='no') num_iterations, ';' write(1,'(I6 A1)',advance='no') num_iterations, ';'
write(1,'(I5 A1)' ,advance='no') swap_mode, ';' write(1,'(I5 A1)' ,advance='no') swap_mode, ';'
write(1,'(F20.4 A1)',advance='no') total_time, ';' write(1,'(I5 A1)' ,advance='no') ave_halo_t_pi, ';'
write(1,'(F20.4 A1)',advance='no') ave_time, ';' ! write(1,'(F20.4 A1)',advance='no') total_time, ';'
! write(1,'(F20.4 A1)',advance='no') ave_time_pp, ';'
write(1,'(F20.4 A1)',advance='no') ave_time_pi, ';'
write(1,'(F20.4 A1)',advance='no') alltoall_comm_t, ';' write(1,'(F20.4 A1)',advance='no') alltoall_comm_t, ';'
write(1,'(F20.4 A1)',advance='no') ave_alltoall_comm_t, ';' write(1,'(F20.4 A1)',advance='no') ave_alltoall_comm_t, ';'
write(1,'(F20.4 A1)',advance='no') ave_alltoall_median_comm_t, ';' write(1,'(F20.4 A1)',advance='no') ave_alltoall_median_comm_t, ';'
write(1,'(F20.4 A1)',advance='no') ave_alltoall_max_comm_t, ';' write(1,'(F20.4 A1)',advance='no') ave_alltoall_max_comm_t, ';'
write(1,'(F20.4 A1)',advance='no') ave_alltoall_min_comm_t, ';' write(1,'(F20.4 A1)',advance='no') ave_alltoall_min_comm_t, ';'
write(1,'(F10.2 A1)',advance='no') request_create_t, ';' write(1,'(F10.2 A1)',advance='no') request_create_t, ';'
write(1,'(F10.2 A1)',advance='no') ave_request_create_t, ';' write(1,'(F10.2 A1)',advance='no') ave_request_create_t, ';'
write(1,'(F10.2 A1)',advance='no') ave_neighbors, ';' write(1,'(F10.2 A1)',advance='no') ave_neighbors, ';'
write(1,'(I5 A1)' ,advance='no') min_neighbors, ';' write(1,'(I5 A1)' ,advance='no') min_neighbors, ';'
@ -456,7 +476,7 @@ program psb_df_sample
write(1,'(I7 A1)',advance='no') max_snd, ';' write(1,'(I7 A1)',advance='no') max_snd, ';'
write(1,'(I7 A1)',advance='no') max_rcv, ';' write(1,'(I7 A1)',advance='no') max_rcv, ';'
write(1,'(I7 A1)',advance='no') min_snd, ';' write(1,'(I7 A1)',advance='no') min_snd, ';'
write(1,'(I7 A1)',advance='no') min_rcv, ';' write(1,'(I7 A1)',advance='no') min_rcv
! write(1,'(F9.2 A1)',advance='no') , ';' ! write(1,'(F9.2 A1)',advance='no') , ';'
! write(1,'(I5 A1)',advance='no') , ';' ! write(1,'(I5 A1)',advance='no') , ';'

@ -1,9 +1,9 @@
#!/bin/bash #!/bin/bash
matrix_dir=../matrices matrix_dir=../matrices
bin_dir=../bin bin_dir=../bin
# "cz148"
# "cz308"
declare -a matrices=( declare -a matrices=(
"cz148"
"cz308"
"cz628" "cz628"
"cz1268" "cz1268"
"cz2548" "cz2548"
@ -14,44 +14,72 @@ declare -a matrices=(
) )
declare -a iterations=( declare -a iterations=(
"10" "300" "600" "900" "1200" "1500" "1800" "2100" "2400" "2700" "3000" "1" "10" "25" "50" "100" "150" "200" "300" "600" "1000" "1500" "2000" "3000"
) )
# declare -a iterations=(
# "10" "100"
# )
declare -a numprocs=(
"2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "13" "14" "15" "16"
)
# ----------- BUILD -----------
# for iter in "${iterations[@]}"; do
# make clean
# make ITERATIONS=${iter}
# mv halo_persistant_${iter}_iter bin/
# done
declare -a swapchoices=(
"psb_swap_persistent_" "psb_swap_nonpersistent_"
)
# ------------ RUN ------------ # -- for testing --
# declare -a iterations=(
# "1" "10"
# )
declare -a numprocs=(
"16"
)
declare -a matrices=(
"cz20468"
"cz40948"
)
# -----------------
# ----------- BUILD -----------
build=true
if [ "$build" = true ]; then
for swap in "${swapchoices[@]}"; do
for iter in "${iterations[@]}"; do
make clean
make ITERATIONS=${iter} SWAPCHOICE=${swap}
mv halo_${swap}${iter}_iter bin/
done
done
fi
# go to directory to run # ------------ RUN ------------
run=false
if [ "$run" = true ]; then
for matrix in "${matrices[@]}"; do for matrix in "${matrices[@]}"; do
for iter in "${iterations[@]}"; do
# echo $matrix $iter
rm run/* rm run/*
cd run cd run
bin=halo_persistant_${iter}_iter for swap in "${swapchoices[@]}"; do
# matrix=cz308 for iter in "${iterations[@]}"; do
# echo $matrix $iter
bin=halo_${swap}${iter}_iter
cp ${matrix_dir}/${matrix}/* ./ cp ${matrix_dir}/${matrix}/* ./
cp ${bin_dir}/${bin} ./ cp ${bin_dir}/${bin} ./
mkdir ../results/halo-persistant/${matrix}_matrix for np in "${numprocs[@]}"; do
write_to=../results/halo-persistant/${matrix}_matrix/${iter}_iter_${matrix}_matrix
SCOREP_TIMER=gettimeofday \ SCOREP_TIMER=gettimeofday \
SCOREP_EXPERIMENT_DIRECTORY=${write_to} \ SCOREP_EXPERIMENT_DIRECTORY=../results/scorep/${matrix}${swap}${iter} \
SCOREP_WRAPPER_COMPILER_FLAGS="-O2" \ SCOREP_WRAPPER_COMPILER_FLAGS="-O2" \
SCOREP_MPI_ENABLE_GROUP=all \ SCOREP_MPI_ENABLE_GROUP=all \
mpirun --map-by ppr:16:node -machinefile $PBS_NODEFILE ./${bin} ${matrix}.inp mpirun --map-by ppr:${np}:node -machinefile $PBS_NODEFILE ./${bin} ${matrix}.inp
mv halo_persistant_output.txt ${write_to}
cd .. # mpirun -np ${np} ./${bin} ${matrix}.inp
done
done
done done
write_to=../results/${matrix}.output
mv halo_output.txt ${write_to}
cd ..
done done
fi

Loading…
Cancel
Save