new Makefile, test compile well + data vect

oacc_loloum
tloloum 2 years ago
parent a81d1d9b68
commit c7bbfb8b68

@ -1,7 +1,6 @@
TOPDIR=../.. TOPDIR=../..
include $(TOPDIR)/Make.inc include $(TOPDIR)/Make.inc
# Directories
LIBDIR=$(TOPDIR)/lib/ LIBDIR=$(TOPDIR)/lib/
PSBLIBDIR=$(TOPDIR)/lib/ PSBLIBDIR=$(TOPDIR)/lib/
PSBINCDIR=$(TOPDIR)/include PSBINCDIR=$(TOPDIR)/include
@ -10,36 +9,30 @@ INCDIR=$(TOPDIR)/include
MODDIR=$(TOPDIR)/modules MODDIR=$(TOPDIR)/modules
EXEDIR=./runs EXEDIR=./runs
# Libraries PSBLAS_LIB= -L$(LIBDIR) -L$(PSBLIBDIR) -lpsb_openacc -lpsb_base -lpsb_ext -lpsb_util -lopenblas -lmetis
PSBLAS_LIB= -L$(LIBDIR) -L$(PSBLIBDIR) -lpsb_util -lpsb_ext -lpsb_base -lpsb_openacc -lopenblas -lmetis
LDLIBS=$(PSBGPULDLIBS) LDLIBS=$(PSBGPULDLIBS)
# Includes
FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FMFLAG). $(FMFLAG)$(PSBMODDIR) $(FMFLAG)$(PSBINCDIR) $(LIBRSB_DEFINES) FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FMFLAG). $(FMFLAG)$(PSBMODDIR) $(FMFLAG)$(PSBINCDIR) $(LIBRSB_DEFINES)
# Compiler flags
FFLAGS=-O0 -march=native -fopenacc -foffload=nvptx-none="-march=sm_70" FFLAGS=-O0 -march=native -fopenacc -foffload=nvptx-none="-march=sm_70"
CFLAGS=-O0 -march=native CFLAGS=-O0 -march=native
# Source files SRCS=test.F90 vectoacc.F90 datavect.F90
SRCS=test.F90 vectoacc.F90
CSRC=timers.c CSRC=timers.c
# Object files
OBJS=$(SRCS:.F90=.o) $(CSRC:.c=.o) OBJS=$(SRCS:.F90=.o) $(CSRC:.c=.o)
# Default rule all: dir $(OBJS)
all: dir $(FC) $(FFLAGS) $(OBJS) -o datavect $(FINCLUDES) $(PSBLAS_LIB) $(LDLIBS)
/bin/mv datavect $(EXEDIR)
dir: dir:
@if test ! -d $(EXEDIR); then mkdir $(EXEDIR); fi @if test ! -d $(EXEDIR); then mkdir $(EXEDIR); fi
# Pattern rule for creating executables
%: %.o timers.o %: %.o timers.o
$(FC) $(FFLAGS) $^ -o $@ $(FINCLUDES) $(PSBLAS_LIB) $(LDLIBS) $(FC) $(FFLAGS) $^ -o $@ $(FINCLUDES) $(PSBLAS_LIB) $(LDLIBS)
/bin/mv $@ $(EXEDIR) /bin/mv $@ $(EXEDIR)
# Compilation rules
%.o: %.F90 %.o: %.F90
$(FC) $(FFLAGS) $(FINCLUDES) -c $< -o $@ $(FC) $(FFLAGS) $(FINCLUDES) -c $< -o $@
@ -49,5 +42,4 @@ dir:
clean: clean:
/bin/rm -fr *.o *.mod $(EXEDIR)/* /bin/rm -fr *.o *.mod $(EXEDIR)/*
# Phony targets
.PHONY: all dir clean .PHONY: all dir clean

@ -0,0 +1,84 @@
program datavect
use psb_base_mod
use psb_oacc_mod
implicit none
type(psb_d_vect_oacc) :: v3, v4, v5
integer(psb_ipk_) :: info, n, i, old_percentage, percentage
real(psb_dpk_) :: alpha, dot_dev, dot_host, t_alloc_host, t_alloc_dev, t_calc_host, t_calc_dev
double precision, external :: etime
double precision :: time_start, time_end
integer, parameter :: min_size = 1000, max_size = 100000000, step_size = 1000000
integer, parameter :: ntests = 80, ngpu = 20
integer :: size
character(len=20) :: filename
open(unit=10, file='performance_data.csv', status='unknown')
write(10, '(A, A, A, A, A)') 'Size,Alloc_Host,Alloc_Dev,Calc_Host,Calc_Dev'
write(*, *) 'Test of the vector operations with OpenACC'
alpha = 2.0
old_percentage = 0
do size = min_size, max_size, step_size
n = size
percentage = int(real(size - min_size) / real(max_size - min_size) * 100.0)
if (percentage /= old_percentage) then
write(*, '(A,I3,A)', advance='no') 'Progress: ', percentage, '%'
write(*,'(A)', advance='no') char(13)
old_percentage = percentage
end if
time_start = etime()
call v3%all(n, info)
call v4%all(n, info)
call v5%all(n, info)
time_end = etime()
t_alloc_host = (time_end - time_start)
do i = 1, n
v3%v(i) = real(i, psb_dpk_)
v4%v(i) = real(n - i, psb_dpk_)
end do
call v3%scal(alpha)
call v3%set_host()
call v4%set_host()
time_start = etime()
do i = 1, ntests
dot_host = sum(v3%v * v4%v)
end do
time_end = etime()
t_calc_host = (time_end - time_start) / real(ntests)
time_start = etime()
call v3%set_dev()
call v4%set_dev()
call v3%sync_space()
call v4%sync_space()
time_end = etime()
t_alloc_dev = (time_end - time_start)
time_start = etime()
do i = 1, ntests
dot_dev = v3%dot_v(n, v4)
end do
!$acc wait
time_end = etime()
t_calc_dev = (time_end - time_start) / real(ntests)
write(10, '(I10, 1X, ES12.5, 1X, ES12.5, 1X, ES12.5, 1X, ES12.5)') size, t_alloc_host, t_alloc_dev, t_calc_host, t_calc_dev
call v3%free(info)
call v4%free(info)
call v5%free(info)
end do
close(10)
write(*, *) 'Performance data written to performance_data.csv'
end program datavect

@ -1,85 +1,99 @@
program vectoacc program vectoacc
use psb_base_mod use psb_base_mod
use psb_oacc_mod use psb_oacc_mod
implicit none implicit none
type(psb_d_vect_oacc) :: v3, v4, v5 type(psb_d_vect_oacc) :: v3, v4, v5
integer(psb_ipk_) :: info, n, i integer(psb_ipk_) :: info, n, i
real(psb_dpk_) :: alpha, beta, result real(psb_dpk_) :: alpha, beta, result
double precision, external :: etime double precision, external :: etime
real(psb_dpk_) :: dot_host, dot_dev, t_host, t_dev real(psb_dpk_) :: dot_host, dot_dev, t_host, t_dev, t_alloc_host, t_alloc_dev, t_calc_host, t_calc_dev
double precision :: time_start, time_end double precision :: time_start, time_end
integer(psb_ipk_), parameter :: ntests=80, ngpu=20 integer(psb_ipk_), parameter :: ntests=80, ngpu=20
write(*, *) 'Test of the vector operations with OpenACC' write(*, *) 'Test of the vector operations with OpenACC'
write(*, *) 'Enter the size of the vectors' write(*, *) 'Enter the size of the vectors'
read(*, *) n read(*, *) n
alpha = 2.0 alpha = 2.0
beta = 0.5 beta = 0.5
call v3%all(n, info) time_start = etime()
call v4%all(n, info) call v3%all(n, info)
call v5%all(n, info) call v4%all(n, info)
call v5%all(n, info)
do i = 1, n time_end = etime()
v3%v(i) = real(i, psb_dpk_) t_alloc_host = time_end - time_start
v4%v(i) = real(n - i, psb_dpk_) write(*, *) 'Allocation time on host: ', t_alloc_host, ' sec'
end do
do i = 1, n
call v3%set_dev() v3%v(i) = real(i, psb_dpk_)
call v4%set_dev() v4%v(i) = real(n - i, psb_dpk_)
end do
call v3%scal(alpha)
call v3%sync() call v3%set_dev()
call v4%set_dev()
do i = 1, n
if (v3%v(i) /= alpha * real(i, psb_dpk_)) then call v3%scal(alpha)
write(*, *) 'Scal error : index', i call v3%sync()
end if
end do do i = 1, n
write(*, *) 'Scal test passed' if (v3%v(i) /= alpha * real(i, psb_dpk_)) then
write(*, *) 'Scal error : index', i
result = v3%dot_v(n, v4) end if
call v3%sync() end do
call v4%sync() write(*, *) 'Scal test passed'
if (result /= sum(v3%v * v4%v)) then
write(*, *) 'Dot_v error, expected result:', sum(v3%v * v4%v), 'instead of :', result result = v3%dot_v(n, v4)
end if call v3%sync()
write(*, *) 'Dot_v test passed' call v4%sync()
if (result /= sum(v3%v * v4%v)) then
result = v3%nrm2(n) write(*, *) 'Dot_v error, expected result:', sum(v3%v * v4%v), 'instead of :', result
call v3%sync() end if
if (result /= sqrt(sum(v3%v ** 2))) then write(*, *) 'Dot_v test passed'
write(*, *) 'nrm2 error, expected result:', sqrt(sum(v3%v ** 2)), 'instead of :', result
end if result = v3%nrm2(n)
write(*, *) 'nrm2 test passed' call v3%sync()
if (result /= sqrt(sum(v3%v ** 2))) then
call v3%set_host() write(*, *) 'nrm2 error, expected result:', sqrt(sum(v3%v ** 2)), 'instead of :', result
call v4%set_host() end if
write(*, *) 'nrm2 test passed'
time_start = etime()
do i = 1, ntests call v3%set_host()
dot_host = sum(v3%v * v4%v) call v4%set_host()
end do
time_end = etime() time_start = etime()
t_host = (time_end - time_start) / real(ntests) do i = 1, ntests
write(*, *) 'Performance host: ', t_host, ' sec' dot_host = sum(v3%v * v4%v)
end do
call v3%set_dev() time_end = etime()
call v4%set_dev() t_calc_host = (time_end - time_start) / real(ntests)
time_start = etime() write(*, *) 'Host calculation time: ', t_calc_host, ' sec'
do i = 1, ntests
dot_dev = v3%dot_v(n, v4) call v3%set_dev()
end do call v4%set_dev()
!$acc wait
time_end = etime() time_start = etime()
t_dev = (time_end - time_start) / real(ntests) call v3%sync_space()
write(*, *) 'Performance device: ', t_dev, ' sec' call v4%sync_space()
time_end = etime()
call v3%free(info) t_alloc_dev = time_end - time_start
call v4%free(info) write(*, *) 'Allocation time on device: ', t_alloc_dev, ' sec'
call v5%free(info)
time_start = etime()
do i = 1, ntests
dot_dev = v3%dot_v(n, v4)
end do
!$acc wait
time_end = etime()
t_calc_dev = (time_end - time_start) / real(ntests)
write(*, *) 'Device calculation time: ', t_calc_dev, ' sec'
call v3%free(info)
call v4%free(info)
call v5%free(info)
end program vectoacc end program vectoacc
Loading…
Cancel
Save