oacc : d_vect + i_vect + d_csr
parent
39cfcd3893
commit
2b8671fba6
@ -0,0 +1,48 @@
|
||||
.SUFFIXES:
|
||||
.SUFFIXES: .F90 .f90 .o .s .c
|
||||
|
||||
# Compilers and flags
|
||||
CC=mpicc
|
||||
FC=mpif90
|
||||
FCOPT=-O0 -march=native
|
||||
OFFLOAD=-fopenacc -foffload=nvptx-none="-march=sm_70"
|
||||
|
||||
# Directories
|
||||
LIBDIR=../lib
|
||||
INCDIR=../include
|
||||
MODDIR=../modules
|
||||
IMPLDIR=./impl # Adding the impl directory
|
||||
|
||||
# Include and library paths
|
||||
INCLUDES=-I$(LIBDIR) -I$(INCDIR) -I$(MODDIR) -I$(IMPLDIR)
|
||||
LIBS=-L$(LIBDIR) -lpsb_util -lpsb_ext -lpsb_base -lopenblas -lmetis
|
||||
|
||||
# Source files
|
||||
FOBJS= psb_i_oacc_vect_mod.o psb_d_oacc_vect_mod.o \
|
||||
psb_oacc_mod.o psb_d_oacc_csr_mat_mod.o \
|
||||
impl/psb_d_oacc_csr_vect_mv.o
|
||||
|
||||
# Library name
|
||||
LIBNAME=libpsb_openacc.a
|
||||
|
||||
# Rules
|
||||
all: $(LIBNAME)
|
||||
|
||||
$(LIBNAME): $(FOBJS)
|
||||
ar cr $(LIBNAME) $(FOBJS)
|
||||
/bin/cp -p $(LIBNAME) $(LIBDIR)
|
||||
|
||||
clean:
|
||||
/bin/rm -fr *.o $(LIBNAME) *.mod impl/*.o
|
||||
|
||||
.f90.o:
|
||||
$(FC) $(FCOPT) $(OFFLOAD) $(INCLUDES) -c $< -o $@
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $< -o $@
|
||||
|
||||
.F90.o:
|
||||
$(FC) $(FCOPT) $(OFFLOAD) $(INCLUDES) -c $< -o $@
|
||||
|
||||
.F90.s:
|
||||
$(FC) $(FCOPT) $(INCLUDES) -c -S $< -o $@
|
@ -0,0 +1,61 @@
|
||||
subroutine psb_d_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans)
|
||||
use psb_base_mod
|
||||
use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_vect_mv
|
||||
implicit none
|
||||
|
||||
real(psb_dpk_), intent(in) :: alpha, beta
|
||||
class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
|
||||
class(psb_d_base_vect_type), intent(inout) :: x, y
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, optional, intent(in) :: trans
|
||||
|
||||
integer(psb_ipk_) :: m, n
|
||||
|
||||
info = psb_success_
|
||||
m = a%get_nrows()
|
||||
n = a%get_ncols()
|
||||
|
||||
if ((n /= size(x%v)) .or. (n /= size(y%v))) then
|
||||
write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
|
||||
info = psb_err_invalid_mat_state_
|
||||
return
|
||||
end if
|
||||
|
||||
if (a%is_host()) call a%sync()
|
||||
if (x%is_host()) call x%sync()
|
||||
if (y%is_host()) call y%sync()
|
||||
|
||||
call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info)
|
||||
call y%set_dev()
|
||||
|
||||
contains
|
||||
|
||||
subroutine inner_spmv(m, n, alpha, val, ja, irp, x, beta, y, info)
|
||||
implicit none
|
||||
integer(psb_ipk_) :: m, n
|
||||
real(psb_dpk_), intent(in) :: alpha, beta
|
||||
real(psb_dpk_) :: val(:), x(:), y(:)
|
||||
integer(psb_ipk_) :: ja(:), irp(:)
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
integer(psb_ipk_) :: i, j, ii, isz
|
||||
real(psb_dpk_) :: tmp
|
||||
integer(psb_ipk_), parameter :: vsz = 256
|
||||
|
||||
info = 0
|
||||
|
||||
!$acc parallel loop vector_length(vsz) private(isz)
|
||||
do ii = 1, m, vsz
|
||||
isz = min(vsz, m - ii + 1)
|
||||
!$acc loop independent private(tmp)
|
||||
do i = ii, ii + isz - 1
|
||||
tmp = 0.0_psb_dpk_
|
||||
!$acc loop seq
|
||||
do j = irp(i), irp(i + 1) - 1
|
||||
tmp = tmp + val(j) * x(ja(j))
|
||||
end do
|
||||
y(i) = alpha * tmp + beta * y(i)
|
||||
end do
|
||||
end do
|
||||
end subroutine inner_spmv
|
||||
|
||||
end subroutine psb_d_oacc_csr_vect_mv
|
@ -0,0 +1,185 @@
|
||||
module psb_d_oacc_csr_mat_mod
|
||||
|
||||
use iso_c_binding
|
||||
use psb_d_mat_mod
|
||||
use psb_d_oacc_vect_mod
|
||||
use oaccsparse_mod
|
||||
|
||||
integer(psb_ipk_), parameter, private :: is_host = -1
|
||||
integer(psb_ipk_), parameter, private :: is_sync = 0
|
||||
integer(psb_ipk_), parameter, private :: is_dev = 1
|
||||
|
||||
type, extends(psb_d_csr_sparse_mat) :: psb_d_oacc_csr_sparse_mat
|
||||
integer(psb_ipk_) :: devstate = is_host
|
||||
contains
|
||||
procedure, pass(a) :: all => d_oacc_csr_all
|
||||
procedure, pass(a) :: is_host => d_oacc_csr_is_host
|
||||
procedure, pass(a) :: is_sync => d_oacc_csr_is_sync
|
||||
procedure, pass(a) :: is_dev => d_oacc_csr_is_dev
|
||||
procedure, pass(a) :: set_host => d_oacc_csr_set_host
|
||||
procedure, pass(a) :: set_sync => d_oacc_csr_set_sync
|
||||
procedure, pass(a) :: set_dev => d_oacc_csr_set_dev
|
||||
procedure, pass(a) :: sync_space => d_oacc_csr_sync_space
|
||||
procedure, pass(a) :: sync => d_oacc_csr_sync
|
||||
procedure, pass(a) :: vect_mv => psb_d_oacc_csr_vect_mv
|
||||
end type psb_d_oacc_csr_sparse_mat
|
||||
|
||||
interface
|
||||
subroutine psb_d_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans)
|
||||
import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_
|
||||
class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
|
||||
real(psb_dpk_), intent(in) :: alpha, beta
|
||||
class(psb_d_base_vect_type), intent(inout) :: x, y
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, optional, intent(in) :: trans
|
||||
end subroutine psb_d_oacc_csr_vect_mv
|
||||
end interface
|
||||
|
||||
contains
|
||||
|
||||
subroutine d_oacc_csr_all(m, n, nz, a, info)
|
||||
implicit none
|
||||
integer(psb_ipk_), intent(in) :: m, n, nz
|
||||
class(psb_d_oacc_csr_sparse_mat), intent(out) :: a
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
info = 0
|
||||
if (allocated(a%val)) then
|
||||
!$acc exit data delete(a%val) finalize
|
||||
deallocate(a%val, stat=info)
|
||||
end if
|
||||
if (allocated(a%ja)) then
|
||||
!$acc exit data delete(a%ja) finalize
|
||||
deallocate(a%ja, stat=info)
|
||||
end if
|
||||
if (allocated(a%irp)) then
|
||||
!$acc exit data delete(a%irp) finalize
|
||||
deallocate(a%irp, stat=info)
|
||||
end if
|
||||
|
||||
call a%set_nrows(m)
|
||||
call a%set_ncols(n)
|
||||
|
||||
allocate(a%val(nz),stat=info)
|
||||
allocate(a%ja(nz),stat=info)
|
||||
allocate(a%irp(m+1),stat=info)
|
||||
if (info == 0) call a%set_host()
|
||||
if (info == 0) call a%sync_space()
|
||||
end subroutine d_oacc_csr_all
|
||||
|
||||
function d_oacc_csr_is_host(a) result(res)
|
||||
implicit none
|
||||
class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
|
||||
logical :: res
|
||||
|
||||
res = (a%devstate == is_host)
|
||||
end function d_oacc_csr_is_host
|
||||
|
||||
function d_oacc_csr_is_sync(a) result(res)
|
||||
implicit none
|
||||
class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
|
||||
logical :: res
|
||||
|
||||
res = (a%devstate == is_sync)
|
||||
end function d_oacc_csr_is_sync
|
||||
|
||||
function d_oacc_csr_is_dev(a) result(res)
|
||||
implicit none
|
||||
class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
|
||||
logical :: res
|
||||
|
||||
res = (a%devstate == is_dev)
|
||||
end function d_oacc_csr_is_dev
|
||||
|
||||
subroutine d_oacc_csr_set_host(a)
|
||||
implicit none
|
||||
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
|
||||
|
||||
a%devstate = is_host
|
||||
end subroutine d_oacc_csr_set_host
|
||||
|
||||
subroutine d_oacc_csr_set_sync(a)
|
||||
implicit none
|
||||
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
|
||||
|
||||
a%devstate = is_sync
|
||||
end subroutine d_oacc_csr_set_sync
|
||||
|
||||
subroutine d_oacc_csr_set_dev(a)
|
||||
implicit none
|
||||
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
|
||||
|
||||
a%devstate = is_dev
|
||||
end subroutine d_oacc_csr_set_dev
|
||||
|
||||
subroutine d_oacc_csr_sync_space(a)
|
||||
implicit none
|
||||
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
|
||||
if (allocated(a%val)) then
|
||||
call d_oacc_create_dev(a%val)
|
||||
end if
|
||||
if (allocated(a%ja)) then
|
||||
call i_oacc_create_dev(a%ja)
|
||||
end if
|
||||
if (allocated(a%irp)) then
|
||||
call i_oacc_create_dev(a%irp)
|
||||
end if
|
||||
contains
|
||||
subroutine d_oacc_create_dev(v)
|
||||
implicit none
|
||||
real(psb_dpk_), intent(in) :: v(:)
|
||||
!$acc enter data copyin(v)
|
||||
end subroutine d_oacc_create_dev
|
||||
subroutine i_oacc_create_dev(v)
|
||||
implicit none
|
||||
integer(psb_ipk_), intent(in) :: v(:)
|
||||
!$acc enter data copyin(v)
|
||||
end subroutine i_oacc_create_dev
|
||||
end subroutine d_oacc_csr_sync_space
|
||||
|
||||
subroutine d_oacc_csr_sync(a)
|
||||
implicit none
|
||||
class(psb_d_oacc_csr_sparse_mat), target, intent(in) :: a
|
||||
class(psb_d_oacc_csr_sparse_mat), pointer :: tmpa
|
||||
integer(psb_ipk_) :: info
|
||||
|
||||
tmpa => a
|
||||
if (a%is_dev()) then
|
||||
call d_oacc_csr_to_host(a%val)
|
||||
call i_oacc_csr_to_host(a%ja)
|
||||
call i_oacc_csr_to_host(a%irp)
|
||||
else if (a%is_host()) then
|
||||
call d_oacc_csr_to_dev(a%val)
|
||||
call i_oacc_csr_to_dev(a%ja)
|
||||
call i_oacc_csr_to_dev(a%irp)
|
||||
end if
|
||||
call tmpa%set_sync()
|
||||
end subroutine d_oacc_csr_sync
|
||||
|
||||
subroutine d_oacc_csr_to_dev(v)
|
||||
implicit none
|
||||
real(psb_dpk_), intent(in) :: v(:)
|
||||
!$acc update device(v)
|
||||
end subroutine d_oacc_csr_to_dev
|
||||
|
||||
subroutine d_oacc_csr_to_host(v)
|
||||
implicit none
|
||||
real(psb_dpk_), intent(in) :: v(:)
|
||||
!$acc update self(v)
|
||||
end subroutine d_oacc_csr_to_host
|
||||
|
||||
subroutine i_oacc_csr_to_dev(v)
|
||||
implicit none
|
||||
integer(psb_ipk_), intent(in) :: v(:)
|
||||
!$acc update device(v)
|
||||
end subroutine i_oacc_csr_to_dev
|
||||
|
||||
subroutine i_oacc_csr_to_host(v)
|
||||
implicit none
|
||||
integer(psb_ipk_), intent(in) :: v(:)
|
||||
!$acc update self(v)
|
||||
end subroutine i_oacc_csr_to_host
|
||||
|
||||
|
||||
end module psb_d_oacc_csr_mat_mod
|
||||
|
@ -0,0 +1,966 @@
|
||||
module psb_d_oacc_vect_mod
|
||||
use iso_c_binding
|
||||
use psb_const_mod
|
||||
use psb_error_mod
|
||||
use psb_d_vect_mod
|
||||
use psb_i_oacc_vect_mod
|
||||
use psb_i_vect_mod
|
||||
|
||||
integer(psb_ipk_), parameter, private :: is_host = -1
|
||||
integer(psb_ipk_), parameter, private :: is_sync = 0
|
||||
integer(psb_ipk_), parameter, private :: is_dev = 1
|
||||
|
||||
type, extends(psb_d_base_vect_type) :: psb_d_vect_oacc
|
||||
integer :: state = is_host
|
||||
|
||||
contains
|
||||
procedure, pass(x) :: get_nrows => d_oacc_get_nrows
|
||||
procedure, nopass :: get_fmt => d_oacc_get_fmt
|
||||
|
||||
procedure, pass(x) :: all => d_oacc_vect_all
|
||||
procedure, pass(x) :: zero => d_oacc_zero
|
||||
procedure, pass(x) :: asb_m => d_oacc_asb_m
|
||||
procedure, pass(x) :: sync => d_oacc_sync
|
||||
procedure, pass(x) :: sync_space => d_oacc_sync_space
|
||||
procedure, pass(x) :: bld_x => d_oacc_bld_x
|
||||
procedure, pass(x) :: bld_mn => d_oacc_bld_mn
|
||||
procedure, pass(x) :: free => d_oacc_vect_free
|
||||
procedure, pass(x) :: ins_a => d_oacc_ins_a
|
||||
procedure, pass(x) :: ins_v => d_oacc_ins_v
|
||||
procedure, pass(x) :: is_host => d_oacc_is_host
|
||||
procedure, pass(x) :: is_dev => d_oacc_is_dev
|
||||
procedure, pass(x) :: is_sync => d_oacc_is_sync
|
||||
procedure, pass(x) :: set_host => d_oacc_set_host
|
||||
procedure, pass(x) :: set_dev => d_oacc_set_dev
|
||||
procedure, pass(x) :: set_sync => d_oacc_set_sync
|
||||
procedure, pass(x) :: set_scal => d_oacc_set_scal
|
||||
|
||||
procedure, pass(x) :: gthzv_x => d_oacc_gthzv_x
|
||||
procedure, pass(x) :: gthzbuf_x => d_oacc_gthzbuf
|
||||
procedure, pass(y) :: sctb => d_oacc_sctb
|
||||
procedure, pass(y) :: sctb_x => d_oacc_sctb_x
|
||||
procedure, pass(y) :: sctb_buf => d_oacc_sctb_buf
|
||||
|
||||
procedure, pass(x) :: get_size => oacc_get_size
|
||||
procedure, pass(x) :: dot_v => d_oacc_vect_dot
|
||||
procedure, pass(x) :: dot_a => d_oacc_dot_a
|
||||
procedure, pass(y) :: axpby_v => d_oacc_axpby_v
|
||||
procedure, pass(y) :: axpby_a => d_oacc_axpby_a
|
||||
procedure, pass(z) :: abgdxyz => d_oacc_abgdxyz
|
||||
procedure, pass(y) :: mlt_v => d_oacc_mlt_v
|
||||
procedure, pass(y) :: mlt_a => d_oacc_mlt_a
|
||||
procedure, pass(z) :: mlt_a_2 => d_oacc_mlt_a_2
|
||||
procedure, pass(z) :: mlt_v_2 => d_oacc_mlt_v_2
|
||||
procedure, pass(x) :: scal => d_oacc_scal
|
||||
procedure, pass(x) :: nrm2 => d_oacc_nrm2
|
||||
procedure, pass(x) :: amax => d_oacc_amax
|
||||
procedure, pass(x) :: asum => d_oacc_asum
|
||||
procedure, pass(x) :: absval1 => d_oacc_absval1
|
||||
procedure, pass(x) :: absval2 => d_oacc_absval2
|
||||
|
||||
end type psb_d_vect_oacc
|
||||
|
||||
real(psb_dpk_), allocatable :: v1(:),v2(:),p(:)
|
||||
|
||||
contains
|
||||
|
||||
subroutine d_oacc_absval1(x)
|
||||
implicit none
|
||||
class(psb_d_vect_oacc), intent(inout) :: x
|
||||
integer(psb_ipk_) :: n, i
|
||||
|
||||
if (x%is_host()) call x%sync_space()
|
||||
n = size(x%v)
|
||||
!$acc parallel loop
|
||||
do i = 1, n
|
||||
x%v(i) = abs(x%v(i))
|
||||
end do
|
||||
call x%set_dev()
|
||||
end subroutine d_oacc_absval1
|
||||
|
||||
subroutine d_oacc_absval2(x, y)
|
||||
implicit none
|
||||
class(psb_d_vect_oacc), intent(inout) :: x
|
||||
class(psb_d_base_vect_type), intent(inout) :: y
|
||||
integer(psb_ipk_) :: n
|
||||
integer(psb_ipk_) :: i
|
||||
|
||||
n = min(size(x%v), size(y%v))
|
||||
select type (yy => y)
|
||||
class is (psb_d_vect_oacc)
|
||||
if (x%is_host()) call x%sync()
|
||||
if (yy%is_host()) call yy%sync()
|
||||
!$acc parallel loop
|
||||
do i = 1, n
|
||||
yy%v(i) = abs(x%v(i))
|
||||
end do
|
||||
class default
|
||||
if (x%is_dev()) call x%sync()
|
||||
if (y%is_dev()) call y%sync()
|
||||
call x%psb_d_base_vect_type%absval(y)
|
||||
end select
|
||||
end subroutine d_oacc_absval2
|
||||
|
||||
|
||||
|
||||
subroutine d_oacc_scal(alpha, x)
|
||||
implicit none
|
||||
class(psb_d_vect_oacc), intent(inout) :: x
|
||||
real(psb_dpk_), intent(in) :: alpha
|
||||
integer(psb_ipk_) :: info
|
||||
integer(psb_ipk_) :: i
|
||||
|
||||
if (x%is_host()) call x%sync_space()
|
||||
!$acc parallel loop
|
||||
do i = 1, size(x%v)
|
||||
x%v(i) = alpha * x%v(i)
|
||||
end do
|
||||
call x%set_dev()
|
||||
end subroutine d_oacc_scal
|
||||
|
||||
function d_oacc_nrm2(n, x) result(res)
|
||||
implicit none
|
||||
class(psb_d_vect_oacc), intent(inout) :: x
|
||||
integer(psb_ipk_), intent(in) :: n
|
||||
real(psb_dpk_) :: res
|
||||
integer(psb_ipk_) :: info
|
||||
real(psb_dpk_) :: sum
|
||||
integer(psb_ipk_) :: i
|
||||
|
||||
if (x%is_host()) call x%sync_space()
|
||||
sum = 0.0
|
||||
!$acc parallel loop reduction(+:sum)
|
||||
do i = 1, n
|
||||
sum = sum + x%v(i) * x%v(i)
|
||||
end do
|
||||
res = sqrt(sum)
|
||||
end function d_oacc_nrm2
|
||||
|
||||
function d_oacc_amax(n, x) result(res)
|
||||
implicit none
|
||||
class(psb_d_vect_oacc), intent(inout) :: x
|
||||
integer(psb_ipk_), intent(in) :: n
|
||||
real(psb_dpk_) :: res
|
||||
integer(psb_ipk_) :: info
|
||||
real(psb_dpk_) :: max_val
|
||||
integer(psb_ipk_) :: i
|
||||
|
||||
if (x%is_host()) call x%sync_space()
|
||||
max_val = -huge(0.0)
|
||||
!$acc parallel loop reduction(max:max_val)
|
||||
do i = 1, n
|
||||
if (x%v(i) > max_val) max_val = x%v(i)
|
||||
end do
|
||||
res = max_val
|
||||
end function d_oacc_amax
|
||||
|
||||
function d_oacc_asum(n, x) result(res)
|
||||
implicit none
|
||||
class(psb_d_vect_oacc), intent(inout) :: x
|
||||
integer(psb_ipk_), intent(in) :: n
|
||||
real(psb_dpk_) :: res
|
||||
integer(psb_ipk_) :: info
|
||||
real(psb_dpk_) :: sum
|
||||
integer(psb_ipk_) :: i
|
||||
|
||||
if (x%is_host()) call x%sync_space()
|
||||
sum = 0.0
|
||||
!$acc parallel loop reduction(+:sum)
|
||||
do i = 1, n
|
||||
sum = sum + abs(x%v(i))
|
||||
end do
|
||||
res = sum
|
||||
end function d_oacc_asum
|
||||
|
||||
|
||||
subroutine d_oacc_mlt_v(x, y, info)
|
||||
use psi_serial_mod
|
||||
implicit none
|
||||
class(psb_d_base_vect_type), intent(inout) :: x
|
||||
class(psb_d_vect_oacc), intent(inout) :: y
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
integer(psb_ipk_) :: i, n
|
||||
|
||||
info = 0
|
||||
n = min(x%get_nrows(), y%get_nrows())
|
||||
select type(xx => x)
|
||||
type is (psb_d_base_vect_type)
|
||||
if (y%is_dev()) call y%sync()
|
||||
!$acc parallel loop
|
||||
do i = 1, n
|
||||
y%v(i) = y%v(i) * xx%v(i)
|
||||
end do
|
||||
call y%set_host()
|
||||
class default
|
||||
if (xx%is_dev()) call xx%sync()
|
||||
if (y%is_dev()) call y%sync()
|
||||
!$acc parallel loop
|
||||
do i = 1, n
|
||||
y%v(i) = y%v(i) * xx%v(i)
|
||||
end do
|
||||
call y%set_host()
|
||||
end select
|
||||
end subroutine d_oacc_mlt_v
|
||||
|
||||
subroutine d_oacc_mlt_a(x, y, info)
|
||||
use psi_serial_mod
|
||||
implicit none
|
||||
real(psb_dpk_), intent(in) :: x(:)
|
||||
class(psb_d_vect_oacc), intent(inout) :: y
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
integer(psb_ipk_) :: i, n
|
||||
|
||||
info = 0
|
||||
if (y%is_dev()) call y%sync_space()
|
||||
!$acc parallel loop
|
||||
do i = 1, size(x)
|
||||
y%v(i) = y%v(i) * x(i)
|
||||
end do
|
||||
call y%set_host()
|
||||
end subroutine d_oacc_mlt_a
|
||||
|
||||
subroutine d_oacc_mlt_a_2(alpha, x, y, beta, z, info)
|
||||
use psi_serial_mod
|
||||
implicit none
|
||||
real(psb_dpk_), intent(in) :: alpha, beta
|
||||
real(psb_dpk_), intent(in) :: x(:)
|
||||
real(psb_dpk_), intent(in) :: y(:)
|
||||
class(psb_d_vect_oacc), intent(inout) :: z
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
integer(psb_ipk_) :: i, n
|
||||
|
||||
info = 0
|
||||
if (z%is_dev()) call z%sync_space()
|
||||
!$acc parallel loop
|
||||
do i = 1, size(x)
|
||||
z%v(i) = alpha * x(i) * y(i) + beta * z%v(i)
|
||||
end do
|
||||
call z%set_host()
|
||||
end subroutine d_oacc_mlt_a_2
|
||||
|
||||
subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
|
||||
use psi_serial_mod
|
||||
use psb_string_mod
|
||||
implicit none
|
||||
real(psb_dpk_), intent(in) :: alpha, beta
|
||||
class(psb_d_base_vect_type), intent(inout) :: x
|
||||
class(psb_d_base_vect_type), intent(inout) :: y
|
||||
class(psb_d_vect_oacc), intent(inout) :: z
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character(len=1), intent(in), optional :: conjgx, conjgy
|
||||
integer(psb_ipk_) :: i, n
|
||||
logical :: conjgx_, conjgy_
|
||||
|
||||
conjgx_ = .false.
|
||||
conjgy_ = .false.
|
||||
if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
|
||||
if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
|
||||
|
||||
n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
|
||||
|
||||
info = 0
|
||||
select type(xx => x)
|
||||
class is (psb_d_vect_oacc)
|
||||
select type (yy => y)
|
||||
class is (psb_d_vect_oacc)
|
||||
if (xx%is_host()) call xx%sync_space()
|
||||
if (yy%is_host()) call yy%sync_space()
|
||||
if ((beta /= dzero) .and. (z%is_host())) call z%sync_space()
|
||||
!$acc parallel loop
|
||||
do i = 1, n
|
||||
z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
|
||||
end do
|
||||
call z%set_dev()
|
||||
class default
|
||||
if (xx%is_dev()) call xx%sync_space()
|
||||
if (yy%is_dev()) call yy%sync()
|
||||
if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space()
|
||||
!$acc parallel loop
|
||||
do i = 1, n
|
||||
z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
|
||||
end do
|
||||
call z%set_host()
|
||||
end select
|
||||
class default
|
||||
if (x%is_dev()) call x%sync()
|
||||
if (y%is_dev()) call y%sync()
|
||||
if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space()
|
||||
!$acc parallel loop
|
||||
do i = 1, n
|
||||
z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i)
|
||||
end do
|
||||
call z%set_host()
|
||||
end select
|
||||
end subroutine d_oacc_mlt_v_2
|
||||
|
||||
|
||||
subroutine d_oacc_axpby_v(m, alpha, x, beta, y, info)
|
||||
use psi_serial_mod
|
||||
implicit none
|
||||
integer(psb_ipk_), intent(in) :: m
|
||||
class(psb_d_base_vect_type), intent(inout) :: x
|
||||
class(psb_d_vect_oacc), intent(inout) :: y
|
||||
real(psb_dpk_), intent(in) :: alpha, beta
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
integer(psb_ipk_) :: nx, ny, i
|
||||
|
||||
info = psb_success_
|
||||
|
||||
select type(xx => x)
|
||||
type is (psb_d_vect_oacc)
|
||||
if ((beta /= dzero) .and. y%is_host()) call y%sync_space()
|
||||
if (xx%is_host()) call xx%sync_space()
|
||||
nx = size(xx%v)
|
||||
ny = size(y%v)
|
||||
if ((nx < m) .or. (ny < m)) then
|
||||
info = psb_err_internal_error_
|
||||
else
|
||||
!$acc parallel loop
|
||||
do i = 1, m
|
||||
y%v(i) = alpha * xx%v(i) + beta * y%v(i)
|
||||
end do
|
||||
end if
|
||||
call y%set_dev()
|
||||
class default
|
||||
if ((alpha /= dzero) .and. (x%is_dev())) call x%sync()
|
||||
call y%axpby(m, alpha, x%v, beta, info)
|
||||
end select
|
||||
end subroutine d_oacc_axpby_v
|
||||
|
||||
subroutine d_oacc_axpby_a(m, alpha, x, beta, y, info)
|
||||
use psi_serial_mod
|
||||
implicit none
|
||||
integer(psb_ipk_), intent(in) :: m
|
||||
real(psb_dpk_), intent(in) :: x(:)
|
||||
class(psb_d_vect_oacc), intent(inout) :: y
|
||||
real(psb_dpk_), intent(in) :: alpha, beta
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
integer(psb_ipk_) :: i
|
||||
|
||||
if ((beta /= dzero) .and. (y%is_dev())) call y%sync_space()
|
||||
!$acc parallel loop
|
||||
do i = 1, m
|
||||
y%v(i) = alpha * x(i) + beta * y%v(i)
|
||||
end do
|
||||
call y%set_host()
|
||||
end subroutine d_oacc_axpby_a
|
||||
|
||||
subroutine d_oacc_abgdxyz(m, alpha, beta, gamma, delta, x, y, z, info)
|
||||
use psi_serial_mod
|
||||
implicit none
|
||||
integer(psb_ipk_), intent(in) :: m
|
||||
class(psb_d_base_vect_type), intent(inout) :: x
|
||||
class(psb_d_base_vect_type), intent(inout) :: y
|
||||
class(psb_d_vect_oacc), intent(inout) :: z
|
||||
real(psb_dpk_), intent(in) :: alpha, beta, gamma, delta
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
integer(psb_ipk_) :: nx, ny, nz, i
|
||||
logical :: gpu_done
|
||||
|
||||
info = psb_success_
|
||||
gpu_done = .false.
|
||||
|
||||
select type(xx => x)
|
||||
class is (psb_d_vect_oacc)
|
||||
select type(yy => y)
|
||||
class is (psb_d_vect_oacc)
|
||||
select type(zz => z)
|
||||
class is (psb_d_vect_oacc)
|
||||
if ((beta /= dzero) .and. yy%is_host()) call yy%sync_space()
|
||||
if ((delta /= dzero) .and. zz%is_host()) call zz%sync_space()
|
||||
if (xx%is_host()) call xx%sync_space()
|
||||
nx = size(xx%v)
|
||||
ny = size(yy%v)
|
||||
nz = size(zz%v)
|
||||
if ((nx < m) .or. (ny < m) .or. (nz < m)) then
|
||||
info = psb_err_internal_error_
|
||||
else
|
||||
!$acc parallel loop
|
||||
do i = 1, m
|
||||
yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
|
||||
zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
|
||||
end do
|
||||
end if
|
||||
call yy%set_dev()
|
||||
call zz%set_dev()
|
||||
gpu_done = .true.
|
||||
end select
|
||||
end select
|
||||
end select
|
||||
|
||||
if (.not. gpu_done) then
|
||||
if (x%is_host()) call x%sync()
|
||||
if (y%is_host()) call y%sync()
|
||||
if (z%is_host()) call z%sync()
|
||||
call y%axpby(m, alpha, x, beta, info)
|
||||
call z%axpby(m, gamma, y, delta, info)
|
||||
end if
|
||||
end subroutine d_oacc_abgdxyz
|
||||
|
||||
|
||||
subroutine d_oacc_sctb_buf(i, n, idx, beta, y)
|
||||
use psb_base_mod
|
||||
implicit none
|
||||
integer(psb_ipk_) :: i, n
|
||||
class(psb_i_base_vect_type) :: idx
|
||||
real(psb_dpk_) :: beta
|
||||
class(psb_d_vect_oacc) :: y
|
||||
integer(psb_ipk_) :: info
|
||||
|
||||
if (.not.allocated(y%combuf)) then
|
||||
call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
|
||||
return
|
||||
end if
|
||||
|
||||
select type(ii => idx)
|
||||
class is (psb_i_vect_oacc)
|
||||
if (ii%is_host()) call ii%sync_space(info)
|
||||
if (y%is_host()) call y%sync_space()
|
||||
|
||||
!$acc parallel loop
|
||||
do i = 1, n
|
||||
y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i)
|
||||
end do
|
||||
|
||||
class default
|
||||
!$acc parallel loop
|
||||
do i = 1, n
|
||||
y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i)
|
||||
end do
|
||||
end select
|
||||
end subroutine d_oacc_sctb_buf
|
||||
|
||||
subroutine d_oacc_sctb_x(i, n, idx, x, beta, y)
|
||||
use psb_base_mod
|
||||
implicit none
|
||||
integer(psb_ipk_):: i, n
|
||||
class(psb_i_base_vect_type) :: idx
|
||||
real(psb_dpk_) :: beta, x(:)
|
||||
class(psb_d_vect_oacc) :: y
|
||||
integer(psb_ipk_) :: info, ni
|
||||
|
||||
select type(ii => idx)
|
||||
class is (psb_i_vect_oacc)
|
||||
if (ii%is_host()) call ii%sync_space(info)
|
||||
class default
|
||||
call psb_errpush(info, 'd_oacc_sctb_x')
|
||||
return
|
||||
end select
|
||||
|
||||
if (y%is_host()) call y%sync_space()
|
||||
|
||||
!$acc parallel loop
|
||||
do i = 1, n
|
||||
y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i)
|
||||
end do
|
||||
|
||||
call y%set_dev()
|
||||
end subroutine d_oacc_sctb_x
|
||||
|
||||
|
||||
|
||||
subroutine d_oacc_sctb(n, idx, x, beta, y)
|
||||
use psb_base_mod
|
||||
implicit none
|
||||
integer(psb_ipk_) :: n
|
||||
integer(psb_ipk_) :: idx(:)
|
||||
real(psb_dpk_) :: beta, x(:)
|
||||
class(psb_d_vect_oacc) :: y
|
||||
integer(psb_ipk_) :: info
|
||||
integer(psb_ipk_) :: i
|
||||
|
||||
if (n == 0) return
|
||||
if (y%is_dev()) call y%sync_space()
|
||||
|
||||
!$acc parallel loop
|
||||
do i = 1, n
|
||||
y%v(idx(i)) = beta * y%v(idx(i)) + x(i)
|
||||
end do
|
||||
|
||||
call y%set_host()
|
||||
end subroutine d_oacc_sctb
|
||||
|
||||
|
||||
subroutine d_oacc_gthzbuf(i, n, idx, x)
|
||||
use psb_base_mod
|
||||
implicit none
|
||||
integer(psb_ipk_) :: i, n
|
||||
class(psb_i_base_vect_type) :: idx
|
||||
class(psb_d_vect_oacc) :: x
|
||||
integer(psb_ipk_) :: info
|
||||
|
||||
info = 0
|
||||
if (.not.allocated(x%combuf)) then
|
||||
call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
|
||||
return
|
||||
end if
|
||||
|
||||
select type(ii => idx)
|
||||
class is (psb_i_vect_oacc)
|
||||
if (ii%is_host()) call ii%sync_space(info)
|
||||
class default
|
||||
call psb_errpush(info, 'd_oacc_gthzbuf')
|
||||
return
|
||||
end select
|
||||
|
||||
if (x%is_host()) call x%sync_space()
|
||||
|
||||
!$acc parallel loop
|
||||
do i = 1, n
|
||||
x%combuf(i) = x%v(idx%v(i))
|
||||
end do
|
||||
end subroutine d_oacc_gthzbuf
|
||||
|
||||
subroutine d_oacc_gthzv_x(i, n, idx, x, y)
|
||||
use psb_base_mod
|
||||
implicit none
|
||||
integer(psb_ipk_) :: i, n
|
||||
class(psb_i_base_vect_type):: idx
|
||||
real(psb_dpk_) :: y(:)
|
||||
class(psb_d_vect_oacc):: x
|
||||
integer(psb_ipk_) :: info
|
||||
|
||||
info = 0
|
||||
|
||||
select type(ii => idx)
|
||||
class is (psb_i_vect_oacc)
|
||||
if (ii%is_host()) call ii%sync_space(info)
|
||||
class default
|
||||
call psb_errpush(info, 'd_oacc_gthzv_x')
|
||||
return
|
||||
end select
|
||||
|
||||
if (x%is_host()) call x%sync_space()
|
||||
|
||||
!$acc parallel loop
|
||||
do i = 1, n
|
||||
y(i) = x%v(idx%v(i))
|
||||
end do
|
||||
end subroutine d_oacc_gthzv_x
|
||||
|
||||
subroutine d_oacc_ins_v(n, irl, val, dupl, x, info)
|
||||
use psi_serial_mod
|
||||
implicit none
|
||||
class(psb_d_vect_oacc), intent(inout) :: x
|
||||
integer(psb_ipk_), intent(in) :: n, dupl
|
||||
class(psb_i_base_vect_type), intent(inout) :: irl
|
||||
class(psb_d_base_vect_type), intent(inout) :: val
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
integer(psb_ipk_) :: i, isz
|
||||
logical :: done_oacc
|
||||
|
||||
info = 0
|
||||
if (psb_errstatus_fatal()) return
|
||||
|
||||
done_oacc = .false.
|
||||
select type(virl => irl)
|
||||
type is (psb_i_vect_oacc)
|
||||
select type(vval => val)
|
||||
type is (psb_d_vect_oacc)
|
||||
if (vval%is_host()) call vval%sync_space()
|
||||
if (virl%is_host()) call virl%sync_space(info)
|
||||
if (x%is_host()) call x%sync_space()
|
||||
!$acc parallel loop
|
||||
do i = 1, n
|
||||
x%v(virl%v(i)) = vval%v(i)
|
||||
end do
|
||||
call x%set_dev()
|
||||
done_oacc = .true.
|
||||
end select
|
||||
end select
|
||||
|
||||
if (.not.done_oacc) then
|
||||
select type(virl => irl)
|
||||
type is (psb_i_vect_oacc)
|
||||
if (virl%is_dev()) call virl%sync_space(info)
|
||||
end select
|
||||
select type(vval => val)
|
||||
type is (psb_d_vect_oacc)
|
||||
if (vval%is_dev()) call vval%sync_space()
|
||||
end select
|
||||
call x%ins(n, irl%v, val%v, dupl, info)
|
||||
end if
|
||||
|
||||
if (info /= 0) then
|
||||
call psb_errpush(info, 'oacc_vect_ins')
|
||||
return
|
||||
end if
|
||||
|
||||
end subroutine d_oacc_ins_v
|
||||
|
||||
|
||||
|
||||
subroutine d_oacc_ins_a(n, irl, val, dupl, x, info)
|
||||
use psi_serial_mod
|
||||
implicit none
|
||||
class(psb_d_vect_oacc), intent(inout) :: x
|
||||
integer(psb_ipk_), intent(in) :: n, dupl
|
||||
integer(psb_ipk_), intent(in) :: irl(:)
|
||||
real(psb_dpk_), intent(in) :: val(:)
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
integer(psb_ipk_) :: i
|
||||
|
||||
info = 0
|
||||
if (x%is_dev()) call x%sync_space()
|
||||
call x%psb_d_base_vect_type%ins(n, irl, val, dupl, info)
|
||||
call x%set_host()
|
||||
!$acc update device(x%v)
|
||||
|
||||
end subroutine d_oacc_ins_a
|
||||
|
||||
|
||||
|
||||
subroutine d_oacc_bld_mn(x, n)
|
||||
use psb_base_mod
|
||||
implicit none
|
||||
integer(psb_mpk_), intent(in) :: n
|
||||
class(psb_d_vect_oacc), intent(inout) :: x
|
||||
integer(psb_ipk_) :: info
|
||||
|
||||
call x%all(n, info)
|
||||
if (info /= 0) then
|
||||
call psb_errpush(info, 'd_oacc_bld_mn', i_err=(/n, n, n, n, n/))
|
||||
end if
|
||||
call x%set_host()
|
||||
!$acc update device(x%v)
|
||||
|
||||
end subroutine d_oacc_bld_mn
|
||||
|
||||
|
||||
subroutine d_oacc_bld_x(x, this)
|
||||
use psb_base_mod
|
||||
implicit none
|
||||
real(psb_dpk_), intent(in) :: this(:)
|
||||
class(psb_d_vect_oacc), intent(inout) :: x
|
||||
integer(psb_ipk_) :: info
|
||||
|
||||
call psb_realloc(size(this), x%v, info)
|
||||
if (info /= 0) then
|
||||
info = psb_err_alloc_request_
|
||||
call psb_errpush(info, 'd_oacc_bld_x', &
|
||||
i_err=(/size(this), izero, izero, izero, izero/))
|
||||
return
|
||||
end if
|
||||
|
||||
x%v(:) = this(:)
|
||||
call x%set_host()
|
||||
!$acc update device(x%v)
|
||||
|
||||
end subroutine d_oacc_bld_x
|
||||
|
||||
|
||||
subroutine d_oacc_asb_m(n, x, info)
|
||||
use psb_base_mod
|
||||
implicit none
|
||||
integer(psb_mpk_), intent(in) :: n
|
||||
class(psb_d_vect_oacc), intent(inout) :: x
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
integer(psb_mpk_) :: nd
|
||||
|
||||
info = psb_success_
|
||||
|
||||
if (x%is_dev()) then
|
||||
nd = size(x%v)
|
||||
if (nd < n) then
|
||||
call x%sync()
|
||||
call x%psb_d_base_vect_type%asb(n, info)
|
||||
if (info == psb_success_) call x%sync_space()
|
||||
call x%set_host()
|
||||
end if
|
||||
else
|
||||
if (size(x%v) < n) then
|
||||
call x%psb_d_base_vect_type%asb(n, info)
|
||||
if (info == psb_success_) call x%sync_space()
|
||||
call x%set_host()
|
||||
end if
|
||||
end if
|
||||
end subroutine d_oacc_asb_m
|
||||
|
||||
|
||||
|
||||
subroutine d_oacc_set_scal(x, val, first, last)
|
||||
class(psb_d_vect_oacc), intent(inout) :: x
|
||||
real(psb_dpk_), intent(in) :: val
|
||||
integer(psb_ipk_), optional :: first, last
|
||||
|
||||
integer(psb_ipk_) :: first_, last_
|
||||
first_ = 1
|
||||
last_ = x%get_nrows()
|
||||
if (present(first)) first_ = max(1, first)
|
||||
if (present(last)) last_ = min(last, last_)
|
||||
|
||||
!$acc parallel loop
|
||||
do i = first_, last_
|
||||
x%v(i) = val
|
||||
end do
|
||||
!$acc end parallel loop
|
||||
|
||||
call x%set_dev()
|
||||
end subroutine d_oacc_set_scal
|
||||
|
||||
|
||||
|
||||
subroutine d_oacc_zero(x)
|
||||
use psi_serial_mod
|
||||
implicit none
|
||||
class(psb_d_vect_oacc), intent(inout) :: x
|
||||
call x%set_dev()
|
||||
call x%set_scal(dzero)
|
||||
end subroutine d_oacc_zero
|
||||
|
||||
function d_oacc_get_nrows(x) result(res)
|
||||
implicit none
|
||||
class(psb_d_vect_oacc), intent(in) :: x
|
||||
integer(psb_ipk_) :: res
|
||||
|
||||
if (allocated(x%v)) res = size(x%v)
|
||||
end function d_oacc_get_nrows
|
||||
|
||||
function d_oacc_get_fmt() result(res)
|
||||
implicit none
|
||||
character(len=5) :: res
|
||||
res = "dOACC"
|
||||
|
||||
end function d_oacc_get_fmt
|
||||
|
||||
function d_oacc_vect_dot(n, x, y) result(res)
|
||||
implicit none
|
||||
class(psb_d_vect_oacc), intent(inout) :: x
|
||||
class(psb_d_base_vect_type), intent(inout) :: y
|
||||
integer(psb_ipk_), intent(in) :: n
|
||||
real(psb_dpk_) :: res
|
||||
real(psb_dpk_), external :: ddot
|
||||
integer(psb_ipk_) :: info
|
||||
integer(psb_ipk_) :: i
|
||||
|
||||
res = dzero
|
||||
|
||||
select type(yy => y)
|
||||
type is (psb_d_base_vect_type)
|
||||
if (x%is_dev()) call x%sync()
|
||||
res = ddot(n, x%v, 1, yy%v, 1)
|
||||
type is (psb_d_vect_oacc)
|
||||
if (x%is_host()) call x%sync()
|
||||
if (yy%is_host()) call yy%sync()
|
||||
|
||||
!$acc parallel loop reduction(+:res) present(x%v, yy%v)
|
||||
do i = 1, n
|
||||
res = res + x%v(i) * yy%v(i)
|
||||
end do
|
||||
!$acc end parallel loop
|
||||
|
||||
class default
|
||||
call x%sync()
|
||||
res = y%dot(n, x%v)
|
||||
end select
|
||||
|
||||
end function d_oacc_vect_dot
|
||||
|
||||
|
||||
|
||||
|
||||
function d_oacc_dot_a(n, x, y) result(res)
|
||||
implicit none
|
||||
class(psb_d_vect_oacc), intent(inout) :: x
|
||||
real(psb_dpk_), intent(in) :: y(:)
|
||||
integer(psb_ipk_), intent(in) :: n
|
||||
real(psb_dpk_) :: res
|
||||
real(psb_dpk_), external :: ddot
|
||||
|
||||
if (x%is_dev()) call x%sync()
|
||||
res = ddot(n, y, 1, x%v, 1)
|
||||
|
||||
end function d_oacc_dot_a
|
||||
|
||||
! subroutine d_oacc_set_vect(x,y)
|
||||
! implicit none
|
||||
! class(psb_d_vect_oacc), intent(inout) :: x
|
||||
! real(psb_dpk_), intent(in) :: y(:)
|
||||
! integer(psb_ipk_) :: info
|
||||
|
||||
! if (size(x%v) /= size(y)) then
|
||||
! call x%free(info)
|
||||
! call x%all(size(y),info)
|
||||
! end if
|
||||
! x%v(:) = y(:)
|
||||
! call x%set_host()
|
||||
! end subroutine d_oacc_set_vect
|
||||
|
||||
subroutine d_oacc_to_dev(v)
|
||||
implicit none
|
||||
real(psb_dpk_) :: v(:)
|
||||
!$acc update device(v)
|
||||
end subroutine d_oacc_to_dev
|
||||
|
||||
subroutine d_oacc_to_host(v)
|
||||
implicit none
|
||||
real(psb_dpk_) :: v(:)
|
||||
!$acc update self(v)
|
||||
end subroutine d_oacc_to_host
|
||||
|
||||
subroutine d_oacc_sync_space(x)
|
||||
implicit none
|
||||
class(psb_d_vect_oacc), intent(inout) :: x
|
||||
if (allocated(x%v)) then
|
||||
call d_oacc_create_dev(x%v)
|
||||
end if
|
||||
contains
|
||||
subroutine d_oacc_create_dev(v)
|
||||
implicit none
|
||||
real(psb_dpk_) :: v(:)
|
||||
!$acc enter data copyin(v)
|
||||
end subroutine d_oacc_create_dev
|
||||
end subroutine d_oacc_sync_space
|
||||
|
||||
subroutine d_oacc_sync(x)
|
||||
implicit none
|
||||
class(psb_d_vect_oacc), intent(inout) :: x
|
||||
if (x%is_dev()) then
|
||||
call d_oacc_to_host(x%v)
|
||||
end if
|
||||
if (x%is_host()) then
|
||||
call d_oacc_to_dev(x%v)
|
||||
end if
|
||||
call x%set_sync()
|
||||
end subroutine d_oacc_sync
|
||||
|
||||
subroutine d_oacc_set_host(x)
|
||||
implicit none
|
||||
class(psb_d_vect_oacc), intent(inout) :: x
|
||||
|
||||
x%state = is_host
|
||||
end subroutine d_oacc_set_host
|
||||
|
||||
subroutine d_oacc_set_dev(x)
|
||||
implicit none
|
||||
class(psb_d_vect_oacc), intent(inout) :: x
|
||||
|
||||
x%state = is_dev
|
||||
end subroutine d_oacc_set_dev
|
||||
|
||||
subroutine d_oacc_set_sync(x)
|
||||
implicit none
|
||||
class(psb_d_vect_oacc), intent(inout) :: x
|
||||
|
||||
x%state = is_sync
|
||||
end subroutine d_oacc_set_sync
|
||||
|
||||
function d_oacc_is_dev(x) result(res)
|
||||
implicit none
|
||||
class(psb_d_vect_oacc), intent(in) :: x
|
||||
logical :: res
|
||||
|
||||
res = (x%state == is_dev)
|
||||
end function d_oacc_is_dev
|
||||
|
||||
function d_oacc_is_host(x) result(res)
|
||||
implicit none
|
||||
class(psb_d_vect_oacc), intent(in) :: x
|
||||
logical :: res
|
||||
|
||||
res = (x%state == is_host)
|
||||
end function d_oacc_is_host
|
||||
|
||||
function d_oacc_is_sync(x) result(res)
|
||||
implicit none
|
||||
class(psb_d_vect_oacc), intent(in) :: x
|
||||
logical :: res
|
||||
|
||||
res = (x%state == is_sync)
|
||||
end function d_oacc_is_sync
|
||||
|
||||
subroutine d_oacc_vect_all(n, x, info)
|
||||
use psi_serial_mod
|
||||
use psb_realloc_mod
|
||||
implicit none
|
||||
integer(psb_ipk_), intent(in) :: n
|
||||
class(psb_d_vect_oacc), intent(out) :: x
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
call psb_realloc(n, x%v, info)
|
||||
if (info == 0) then
|
||||
call x%set_host()
|
||||
!$acc enter data create(x%v)
|
||||
call x%sync_space()
|
||||
end if
|
||||
if (info /= 0) then
|
||||
info = psb_err_alloc_request_
|
||||
call psb_errpush(info, 'd_oacc_all', &
|
||||
i_err=(/n, n, n, n, n/))
|
||||
end if
|
||||
end subroutine d_oacc_vect_all
|
||||
|
||||
|
||||
subroutine d_oacc_vect_free(x, info)
|
||||
implicit none
|
||||
class(psb_d_vect_oacc), intent(inout) :: x
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
info = 0
|
||||
if (allocated(x%v)) then
|
||||
!$acc exit data delete(x%v) finalize
|
||||
deallocate(x%v, stat=info)
|
||||
end if
|
||||
|
||||
end subroutine d_oacc_vect_free
|
||||
|
||||
function oacc_get_size(x) result(res)
|
||||
implicit none
|
||||
class(psb_d_vect_oacc), intent(inout) :: x
|
||||
integer(psb_ipk_) :: res
|
||||
|
||||
if (x%is_dev()) call x%sync()
|
||||
res = size(x%v)
|
||||
end function oacc_get_size
|
||||
|
||||
|
||||
subroutine initialize(N)
|
||||
integer(psb_ipk_) :: N
|
||||
integer(psb_ipk_) :: i
|
||||
allocate(v1(N),v2(N),p(N))
|
||||
!$acc enter data create(v1,v2,p)
|
||||
!$acc parallel
|
||||
!$acc loop
|
||||
do i=1,n
|
||||
v1(i) = i
|
||||
v2(i) = n+i
|
||||
end do
|
||||
!$acc end parallel
|
||||
end subroutine initialize
|
||||
subroutine finalize_dev()
|
||||
!$acc exit data delete(v1,v2,p)
|
||||
end subroutine finalize_dev
|
||||
subroutine finalize_host()
|
||||
deallocate(v1,v2,p)
|
||||
end subroutine finalize_host
|
||||
subroutine to_dev()
|
||||
!$acc update device(v1,v2)
|
||||
end subroutine to_dev
|
||||
subroutine to_host()
|
||||
!$acc update self(v1,v2)
|
||||
end subroutine to_host
|
||||
function d_dot(N) result(res)
|
||||
real(kind(1.d0)) :: res
|
||||
integer(psb_ipk_) :: i,N
|
||||
real(kind(1.d0)) :: t1,t2,t3
|
||||
res = 0.0d0
|
||||
!$acc parallel
|
||||
!$acc loop reduction(+:res)
|
||||
do i=1,N
|
||||
res = res + v1(i) * v2(i)
|
||||
end do
|
||||
!$acc end parallel
|
||||
|
||||
end function d_dot
|
||||
function h_dot(N) result(res)
|
||||
integer(psb_ipk_) :: i,N
|
||||
real(kind(1.d0)) :: t1,t2,t3,res
|
||||
res = 0.0d0
|
||||
do i=1,N
|
||||
res = res + v1(i) * v2(i)
|
||||
end do
|
||||
end function h_dot
|
||||
|
||||
end module psb_d_oacc_vect_mod
|
@ -0,0 +1,455 @@
|
||||
module psb_i_oacc_vect_mod
|
||||
use iso_c_binding
|
||||
use psb_const_mod
|
||||
use psb_error_mod
|
||||
use psb_i_vect_mod
|
||||
|
||||
integer(psb_ipk_), parameter, private :: is_host = -1
|
||||
integer(psb_ipk_), parameter, private :: is_sync = 0
|
||||
integer(psb_ipk_), parameter, private :: is_dev = 1
|
||||
|
||||
type, extends(psb_i_base_vect_type) :: psb_i_vect_oacc
|
||||
integer :: state = is_host
|
||||
contains
|
||||
procedure, pass(x) :: get_nrows => i_oacc_get_nrows
|
||||
procedure, nopass :: get_fmt => i_oacc_get_fmt
|
||||
|
||||
procedure, pass(x) :: all => i_oacc_all
|
||||
procedure, pass(x) :: zero => i_oacc_zero
|
||||
procedure, pass(x) :: asb_m => i_oacc_asb_m
|
||||
procedure, pass(x) :: sync => i_oacc_sync
|
||||
procedure, pass(x) :: sync_space => i_oacc_sync_space
|
||||
procedure, pass(x) :: bld_x => i_oacc_bld_x
|
||||
procedure, pass(x) :: bld_mn => i_oacc_bld_mn
|
||||
procedure, pass(x) :: free => i_oacc_free
|
||||
procedure, pass(x) :: ins_a => i_oacc_ins_a
|
||||
procedure, pass(x) :: ins_v => i_oacc_ins_v
|
||||
procedure, pass(x) :: is_host => i_oacc_is_host
|
||||
procedure, pass(x) :: is_dev => i_oacc_is_dev
|
||||
procedure, pass(x) :: is_sync => i_oacc_is_sync
|
||||
procedure, pass(x) :: set_host => i_oacc_set_host
|
||||
procedure, pass(x) :: set_dev => i_oacc_set_dev
|
||||
procedure, pass(x) :: set_sync => i_oacc_set_sync
|
||||
procedure, pass(x) :: set_scal => i_oacc_set_scal
|
||||
procedure, pass(x) :: gthzv_x => i_oacc_gthzv_x
|
||||
procedure, pass(y) :: sctb => i_oacc_sctb
|
||||
procedure, pass(y) :: sctb_x => i_oacc_sctb_x
|
||||
procedure, pass(x) :: gthzbuf => i_oacc_gthzbuf
|
||||
procedure, pass(y) :: sctb_buf => i_oacc_sctb_buf
|
||||
|
||||
final :: i_oacc_vect_finalize
|
||||
end type psb_i_vect_oacc
|
||||
|
||||
public :: psb_i_vect_oacc_
|
||||
private :: constructor
|
||||
interface psb_i_vect_oacc_
|
||||
module procedure constructor
|
||||
end interface psb_i_vect_oacc_
|
||||
|
||||
contains
|
||||
|
||||
function constructor(x) result(this)
|
||||
integer(psb_ipk_) :: x(:)
|
||||
type(psb_i_vect_oacc) :: this
|
||||
integer(psb_ipk_) :: info
|
||||
|
||||
this%v = x
|
||||
call this%asb(size(x), info)
|
||||
end function constructor
|
||||
|
||||
|
||||
subroutine i_oacc_gthzv_x(i, n, idx, x, y)
|
||||
implicit none
|
||||
integer(psb_ipk_) :: i, n
|
||||
class(psb_i_base_vect_type) :: idx
|
||||
integer(psb_ipk_) :: y(:)
|
||||
class(psb_i_vect_oacc) :: x
|
||||
integer :: info
|
||||
|
||||
!$acc parallel loop
|
||||
do i = 1, n
|
||||
y(i) = x%v(idx%v(i))
|
||||
end do
|
||||
end subroutine i_oacc_gthzv_x
|
||||
|
||||
subroutine i_oacc_gthzbuf(i, n, idx, x)
|
||||
implicit none
|
||||
integer(psb_ipk_) :: i, n
|
||||
class(psb_i_base_vect_type) :: idx
|
||||
class(psb_i_vect_oacc) :: x
|
||||
integer :: info
|
||||
|
||||
if (.not.allocated(x%combuf)) then
|
||||
call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
|
||||
return
|
||||
end if
|
||||
|
||||
!$acc parallel loop
|
||||
do i = 1, n
|
||||
x%combuf(i) = x%v(idx%v(i))
|
||||
end do
|
||||
end subroutine i_oacc_gthzbuf
|
||||
|
||||
subroutine i_oacc_sctb(n, idx, x, beta, y)
|
||||
implicit none
|
||||
integer(psb_ipk_) :: n, idx(:)
|
||||
integer(psb_ipk_) :: beta, x(:)
|
||||
class(psb_i_vect_oacc) :: y
|
||||
integer(psb_ipk_) :: info
|
||||
integer :: i
|
||||
|
||||
if (n == 0) return
|
||||
|
||||
!$acc parallel loop
|
||||
do i = 1, n
|
||||
y%v(idx(i)) = beta * y%v(idx(i)) + x(i)
|
||||
end do
|
||||
end subroutine i_oacc_sctb
|
||||
|
||||
subroutine i_oacc_sctb_x(i, n, idx, x, beta, y)
|
||||
implicit none
|
||||
integer(psb_ipk_) :: i, n
|
||||
class(psb_i_base_vect_type) :: idx
|
||||
integer(psb_ipk_) :: beta, x(:)
|
||||
class(psb_i_vect_oacc) :: y
|
||||
integer :: info
|
||||
|
||||
select type(ii => idx)
|
||||
class is (psb_i_vect_oacc)
|
||||
if (ii%is_host()) call ii%sync_space(info)
|
||||
if (y%is_host()) call y%sync_space(info)
|
||||
|
||||
!$acc parallel loop
|
||||
do i = 1, n
|
||||
y%v(ii%v(i)) = beta * y%v(ii%v(i)) + x(i)
|
||||
end do
|
||||
|
||||
class default
|
||||
!$acc parallel loop
|
||||
do i = 1, n
|
||||
y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i)
|
||||
end do
|
||||
end select
|
||||
end subroutine i_oacc_sctb_x
|
||||
|
||||
subroutine i_oacc_sctb_buf(i, n, idx, beta, y)
|
||||
implicit none
|
||||
integer(psb_ipk_) :: i, n
|
||||
class(psb_i_base_vect_type) :: idx
|
||||
integer(psb_ipk_) :: beta
|
||||
class(psb_i_vect_oacc) :: y
|
||||
integer(psb_ipk_) :: info
|
||||
|
||||
if (.not.allocated(y%v)) then
|
||||
call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
|
||||
return
|
||||
end if
|
||||
|
||||
!$acc parallel loop
|
||||
do i = 1, n
|
||||
y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%v(i)
|
||||
end do
|
||||
end subroutine i_oacc_sctb_buf
|
||||
|
||||
subroutine i_oacc_set_host(x)
|
||||
class(psb_i_vect_oacc), intent(inout) :: x
|
||||
x%state = is_host
|
||||
end subroutine i_oacc_set_host
|
||||
|
||||
subroutine i_oacc_set_sync(x)
|
||||
class(psb_i_vect_oacc), intent(inout) :: x
|
||||
x%state = is_sync
|
||||
end subroutine i_oacc_set_sync
|
||||
|
||||
subroutine i_oacc_set_dev(x)
|
||||
class(psb_i_vect_oacc), intent(inout) :: x
|
||||
x%state = is_dev
|
||||
end subroutine i_oacc_set_dev
|
||||
|
||||
subroutine i_oacc_set_scal(x, val, first, last)
|
||||
class(psb_i_vect_oacc), intent(inout) :: x
|
||||
integer(psb_ipk_), intent(in) :: val
|
||||
integer(psb_ipk_), optional :: first, last
|
||||
|
||||
integer(psb_ipk_) :: first_, last_
|
||||
|
||||
first_ = 1
|
||||
last_ = size(x%v)
|
||||
if (present(first)) first_ = max(1, first)
|
||||
if (present(last)) last_ = min(size(x%v), last)
|
||||
|
||||
!$acc parallel loop
|
||||
do i = first_, last_
|
||||
x%v(i) = val
|
||||
end do
|
||||
call x%set_dev()
|
||||
end subroutine i_oacc_set_scal
|
||||
|
||||
function i_oacc_is_host(x) result(res)
|
||||
implicit none
|
||||
class(psb_i_vect_oacc), intent(in) :: x
|
||||
logical :: res
|
||||
|
||||
res = (x%state == is_host)
|
||||
end function i_oacc_is_host
|
||||
|
||||
function i_oacc_is_dev(x) result(res)
|
||||
implicit none
|
||||
class(psb_i_vect_oacc), intent(in) :: x
|
||||
logical :: res
|
||||
|
||||
res = (x%state == is_dev)
|
||||
end function i_oacc_is_dev
|
||||
|
||||
function i_oacc_is_sync(x) result(res)
|
||||
implicit none
|
||||
class(psb_i_vect_oacc), intent(in) :: x
|
||||
logical :: res
|
||||
|
||||
res = (x%state == is_sync)
|
||||
end function i_oacc_is_sync
|
||||
|
||||
subroutine i_oacc_free(x, info)
|
||||
use psb_error_mod
|
||||
implicit none
|
||||
class(psb_i_vect_oacc), intent(inout) :: x
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
info = 0
|
||||
if (allocated(x%v)) deallocate(x%v, stat=info)
|
||||
if (info /= 0) then
|
||||
info = psb_err_alloc_dealloc_
|
||||
call psb_errpush(info, 'i_oacc_free')
|
||||
end if
|
||||
call x%set_sync()
|
||||
end subroutine i_oacc_free
|
||||
|
||||
subroutine i_oacc_ins_a(n, irl, val, dupl, x, info)
|
||||
implicit none
|
||||
class(psb_i_vect_oacc), intent(inout) :: x
|
||||
integer(psb_ipk_), intent(in) :: n, dupl
|
||||
integer(psb_ipk_), intent(in) :: irl(:)
|
||||
integer(psb_ipk_), intent(in) :: val(:)
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
integer(psb_ipk_) :: i
|
||||
|
||||
info = 0
|
||||
if (x%is_dev()) call x%sync()
|
||||
call x%psb_i_base_vect_type%ins(n, irl, val, dupl, info)
|
||||
call x%set_host()
|
||||
end subroutine i_oacc_ins_a
|
||||
|
||||
subroutine i_oacc_ins_v(n, irl, val, dupl, x, info)
|
||||
implicit none
|
||||
class(psb_i_vect_oacc), intent(inout) :: x
|
||||
integer(psb_ipk_), intent(in) :: n, dupl
|
||||
class(psb_i_base_vect_type), intent(inout) :: irl
|
||||
class(psb_i_base_vect_type), intent(inout) :: val
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
integer(psb_ipk_) :: i, isz
|
||||
logical :: done_oacc
|
||||
|
||||
info = 0
|
||||
if (psb_errstatus_fatal()) return
|
||||
|
||||
done_oacc = .false.
|
||||
select type(virl => irl)
|
||||
class is (psb_i_vect_oacc)
|
||||
select type(vval => val)
|
||||
class is (psb_i_vect_oacc)
|
||||
if (vval%is_host()) call vval%sync()
|
||||
if (virl%is_host()) call virl%sync()
|
||||
if (x%is_host()) call x%sync()
|
||||
! Add the OpenACC kernel call here if needed
|
||||
call x%set_dev()
|
||||
done_oacc = .true.
|
||||
end select
|
||||
end select
|
||||
|
||||
if (.not.done_oacc) then
|
||||
if (irl%is_dev()) call irl%sync()
|
||||
if (val%is_dev()) call val%sync()
|
||||
call x%ins(n, irl%v, val%v, dupl, info)
|
||||
end if
|
||||
|
||||
if (info /= 0) then
|
||||
call psb_errpush(info,'i_oacc_ins_v')
|
||||
return
|
||||
end if
|
||||
end subroutine i_oacc_ins_v
|
||||
|
||||
subroutine i_oacc_bld_x(x, this)
|
||||
use psb_error_mod
|
||||
implicit none
|
||||
integer(psb_ipk_), intent(in) :: this(:)
|
||||
class(psb_i_vect_oacc), intent(inout) :: x
|
||||
integer(psb_ipk_) :: info
|
||||
|
||||
call psb_realloc(size(this), x%v, info)
|
||||
if (info /= 0) then
|
||||
info = psb_err_alloc_request_
|
||||
call psb_errpush(info, 'i_oacc_bld_x', i_err = (/size(this), izero, izero, izero, izero/))
|
||||
end if
|
||||
x%v(:) = this(:)
|
||||
call x%set_host()
|
||||
call x%sync()
|
||||
end subroutine i_oacc_bld_x
|
||||
|
||||
subroutine i_oacc_bld_mn(x, n)
|
||||
use psb_error_mod
|
||||
implicit none
|
||||
integer(psb_mpk_), intent(in) :: n
|
||||
class(psb_i_vect_oacc), intent(inout) :: x
|
||||
integer(psb_ipk_) :: info
|
||||
|
||||
call x%all(n, info)
|
||||
if (info /= 0) then
|
||||
call psb_errpush(info, 'i_oacc_bld_mn', i_err = (/n, n, n, n, n/))
|
||||
end if
|
||||
end subroutine i_oacc_bld_mn
|
||||
|
||||
subroutine i_oacc_sync(x)
|
||||
use psb_error_mod
|
||||
implicit none
|
||||
class(psb_i_vect_oacc), intent(inout) :: x
|
||||
integer(psb_ipk_) :: n, info
|
||||
|
||||
info = 0
|
||||
if (x%is_host()) then
|
||||
n = size(x%v)
|
||||
if (.not.allocated(x%v)) then
|
||||
write(*, *) 'Incoherent situation : x%v not allocated'
|
||||
call psb_realloc(n, x%v, info)
|
||||
end if
|
||||
if ((n > size(x%v)) .or. (n > x%get_nrows())) then
|
||||
write(*, *) 'Incoherent situation : sizes', n, size(x%v), x%get_nrows()
|
||||
call psb_realloc(n, x%v, info)
|
||||
end if
|
||||
!$acc update device(x%v)
|
||||
else if (x%is_dev()) then
|
||||
n = size(x%v)
|
||||
if (.not.allocated(x%v)) then
|
||||
write(*, *) 'Incoherent situation : x%v not allocated'
|
||||
call psb_realloc(n, x%v, info)
|
||||
end if
|
||||
!$acc update self(x%v)
|
||||
end if
|
||||
if (info == 0) call x%set_sync()
|
||||
if (info /= 0) then
|
||||
info = psb_err_internal_error_
|
||||
call psb_errpush(info, 'i_oacc_sync')
|
||||
end if
|
||||
end subroutine i_oacc_sync
|
||||
|
||||
subroutine i_oacc_sync_space(x, info)
|
||||
use psb_error_mod
|
||||
implicit none
|
||||
class(psb_i_vect_oacc), intent(inout) :: x
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
integer(psb_ipk_) :: nh, nd
|
||||
|
||||
info = 0
|
||||
if (x%is_dev()) then
|
||||
nh = size(x%v)
|
||||
nd = nh
|
||||
if (nh < nd) then
|
||||
call psb_realloc(nd, x%v, info)
|
||||
end if
|
||||
else
|
||||
nh = size(x%v)
|
||||
nd = nh
|
||||
if (nh < nd) then
|
||||
call psb_realloc(nd, x%v, info)
|
||||
end if
|
||||
end if
|
||||
end subroutine i_oacc_sync_space
|
||||
|
||||
function i_oacc_get_nrows(x) result(res)
|
||||
implicit none
|
||||
class(psb_i_vect_oacc), intent(in) :: x
|
||||
integer(psb_ipk_) :: res
|
||||
|
||||
res = 0
|
||||
if (allocated(x%v)) res = size(x%v)
|
||||
end function i_oacc_get_nrows
|
||||
|
||||
function i_oacc_get_fmt() result(res)
|
||||
implicit none
|
||||
character(len=5) :: res
|
||||
res = 'iOACC'
|
||||
end function i_oacc_get_fmt
|
||||
|
||||
subroutine i_oacc_all(n, x, info)
|
||||
use psb_error_mod
|
||||
implicit none
|
||||
class(psb_i_vect_oacc), intent(out) :: x
|
||||
integer(psb_ipk_), intent(in) :: n
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
call psb_realloc(n, x%v, info)
|
||||
if (info == 0) call x%set_host()
|
||||
if (info == 0) call x%sync_space(info)
|
||||
if (info /= 0) then
|
||||
info = psb_err_alloc_request_
|
||||
call psb_errpush(info, 'i_oacc_all', i_err=(/n, n, n, n, n/))
|
||||
end if
|
||||
end subroutine i_oacc_all
|
||||
|
||||
subroutine i_oacc_zero(x)
|
||||
use psb_error_mod
|
||||
implicit none
|
||||
class(psb_i_vect_oacc), intent(inout) :: x
|
||||
! Ensure zeroing on the GPU side
|
||||
call x%set_dev()
|
||||
x%v = 0
|
||||
!$acc update device(x%v)
|
||||
end subroutine i_oacc_zero
|
||||
|
||||
subroutine i_oacc_asb_m(n, x, info)
|
||||
use psb_error_mod
|
||||
use psb_realloc_mod
|
||||
implicit none
|
||||
integer(psb_ipk_), intent(in) :: n
|
||||
class(psb_i_vect_oacc), intent(inout) :: x
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
integer(psb_ipk_) :: nh, nd
|
||||
|
||||
info = 0
|
||||
if (x%is_dev()) then
|
||||
nd = size(x%v)
|
||||
if (nd < n) then
|
||||
call x%sync()
|
||||
call x%psb_i_base_vect_type%asb(n, info)
|
||||
if (info == psb_success_) call x%sync_space(info)
|
||||
call x%set_host()
|
||||
end if
|
||||
else
|
||||
nh = size(x%v)
|
||||
if (nh < n) then
|
||||
call x%psb_i_base_vect_type%asb(n, info)
|
||||
if (info == psb_success_) call x%sync_space(info)
|
||||
call x%set_host()
|
||||
end if
|
||||
end if
|
||||
end subroutine i_oacc_asb_m
|
||||
|
||||
subroutine i_oacc_vect_finalize(x)
|
||||
use psi_serial_mod
|
||||
use psb_realloc_mod
|
||||
implicit none
|
||||
type(psb_i_vect_oacc), intent(inout) :: x
|
||||
integer(psb_ipk_) :: info
|
||||
|
||||
info = 0
|
||||
call x%free(info)
|
||||
end subroutine i_oacc_vect_finalize
|
||||
|
||||
end module psb_i_oacc_vect_mod
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,7 @@
|
||||
module psb_oacc_mod
|
||||
use psb_const_mod
|
||||
|
||||
use psb_d_oacc_vect_mod
|
||||
use psb_d_oacc_csr_mat_mod
|
||||
|
||||
end module psb_oacc_mod
|
@ -0,0 +1,53 @@
|
||||
TOPDIR=../..
|
||||
include $(TOPDIR)/Make.inc
|
||||
|
||||
# Directories
|
||||
LIBDIR=$(TOPDIR)/lib/
|
||||
PSBLIBDIR=$(TOPDIR)/lib/
|
||||
PSBINCDIR=$(TOPDIR)/include
|
||||
PSBMODDIR=$(TOPDIR)/modules
|
||||
INCDIR=$(TOPDIR)/include
|
||||
MODDIR=$(TOPDIR)/modules
|
||||
EXEDIR=./runs
|
||||
|
||||
# Libraries
|
||||
PSBLAS_LIB= -L$(LIBDIR) -L$(PSBLIBDIR) -lpsb_util -lpsb_ext -lpsb_base -lpsb_openacc -lopenblas -lmetis
|
||||
LDLIBS=$(PSBGPULDLIBS)
|
||||
|
||||
# Includes
|
||||
FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FMFLAG). $(FMFLAG)$(PSBMODDIR) $(FMFLAG)$(PSBINCDIR) $(LIBRSB_DEFINES)
|
||||
|
||||
# Compiler flags
|
||||
FFLAGS=-O0 -march=native -fopenacc -foffload=nvptx-none="-march=sm_70"
|
||||
CFLAGS=-O0 -march=native
|
||||
|
||||
# Source files
|
||||
SRCS=test.F90 vectoacc.F90
|
||||
CSRC=timers.c
|
||||
|
||||
# Object files
|
||||
OBJS=$(SRCS:.F90=.o) $(CSRC:.c=.o)
|
||||
|
||||
# Default rule
|
||||
all: dir
|
||||
|
||||
dir:
|
||||
@if test ! -d $(EXEDIR); then mkdir $(EXEDIR); fi
|
||||
|
||||
# Pattern rule for creating executables
|
||||
%: %.o timers.o
|
||||
$(FC) $(FFLAGS) $^ -o $@ $(FINCLUDES) $(PSBLAS_LIB) $(LDLIBS)
|
||||
/bin/mv $@ $(EXEDIR)
|
||||
|
||||
# Compilation rules
|
||||
%.o: %.F90
|
||||
$(FC) $(FFLAGS) $(FINCLUDES) -c $< -o $@
|
||||
|
||||
%.o: %.c
|
||||
$(CC) $(CFLAGS) $(FINCLUDES) -c $< -o $@
|
||||
|
||||
clean:
|
||||
/bin/rm -fr *.o *.mod $(EXEDIR)/*
|
||||
|
||||
# Phony targets
|
||||
.PHONY: all dir clean
|
@ -0,0 +1,617 @@
|
||||
module psb_d_pde3d_mod
|
||||
|
||||
|
||||
use psb_base_mod, only : psb_dpk_, psb_ipk_, psb_lpk_, psb_desc_type,&
|
||||
& psb_dspmat_type, psb_d_vect_type, dzero,&
|
||||
& psb_d_base_sparse_mat, psb_d_base_vect_type, &
|
||||
& psb_i_base_vect_type, psb_l_base_vect_type
|
||||
|
||||
interface
|
||||
function d_func_3d(x,y,z) result(val)
|
||||
import :: psb_dpk_
|
||||
real(psb_dpk_), intent(in) :: x,y,z
|
||||
real(psb_dpk_) :: val
|
||||
end function d_func_3d
|
||||
end interface
|
||||
|
||||
interface psb_gen_pde3d
|
||||
module procedure psb_d_gen_pde3d
|
||||
end interface psb_gen_pde3d
|
||||
|
||||
contains
|
||||
|
||||
function d_null_func_3d(x,y,z) result(val)
|
||||
|
||||
real(psb_dpk_), intent(in) :: x,y,z
|
||||
real(psb_dpk_) :: val
|
||||
|
||||
val = dzero
|
||||
|
||||
end function d_null_func_3d
|
||||
!
|
||||
! functions parametrizing the differential equation
|
||||
!
|
||||
|
||||
!
|
||||
! Note: b1, b2 and b3 are the coefficients of the first
|
||||
! derivative of the unknown function. The default
|
||||
! we apply here is to have them zero, so that the resulting
|
||||
! matrix is symmetric/hermitian and suitable for
|
||||
! testing with CG and FCG.
|
||||
! When testing methods for non-hermitian matrices you can
|
||||
! change the B1/B2/B3 functions to e.g. done/sqrt((3*done))
|
||||
!
|
||||
function b1(x,y,z)
|
||||
use psb_base_mod, only : psb_dpk_, done, dzero
|
||||
implicit none
|
||||
real(psb_dpk_) :: b1
|
||||
real(psb_dpk_), intent(in) :: x,y,z
|
||||
b1=done/sqrt((3*done))
|
||||
end function b1
|
||||
function b2(x,y,z)
|
||||
use psb_base_mod, only : psb_dpk_, done, dzero
|
||||
implicit none
|
||||
real(psb_dpk_) :: b2
|
||||
real(psb_dpk_), intent(in) :: x,y,z
|
||||
b2=done/sqrt((3*done))
|
||||
end function b2
|
||||
function b3(x,y,z)
|
||||
use psb_base_mod, only : psb_dpk_, done, dzero
|
||||
implicit none
|
||||
real(psb_dpk_) :: b3
|
||||
real(psb_dpk_), intent(in) :: x,y,z
|
||||
b3=done/sqrt((3*done))
|
||||
end function b3
|
||||
function c(x,y,z)
|
||||
use psb_base_mod, only : psb_dpk_, done, dzero
|
||||
implicit none
|
||||
real(psb_dpk_) :: c
|
||||
real(psb_dpk_), intent(in) :: x,y,z
|
||||
c=dzero
|
||||
end function c
|
||||
function a1(x,y,z)
|
||||
use psb_base_mod, only : psb_dpk_, done, dzero
|
||||
implicit none
|
||||
real(psb_dpk_) :: a1
|
||||
real(psb_dpk_), intent(in) :: x,y,z
|
||||
a1=done/80
|
||||
end function a1
|
||||
function a2(x,y,z)
|
||||
use psb_base_mod, only : psb_dpk_, done, dzero
|
||||
implicit none
|
||||
real(psb_dpk_) :: a2
|
||||
real(psb_dpk_), intent(in) :: x,y,z
|
||||
a2=done/80
|
||||
end function a2
|
||||
function a3(x,y,z)
|
||||
use psb_base_mod, only : psb_dpk_, done, dzero
|
||||
implicit none
|
||||
real(psb_dpk_) :: a3
|
||||
real(psb_dpk_), intent(in) :: x,y,z
|
||||
a3=done/80
|
||||
end function a3
|
||||
function g(x,y,z)
|
||||
use psb_base_mod, only : psb_dpk_, done, dzero
|
||||
implicit none
|
||||
real(psb_dpk_) :: g
|
||||
real(psb_dpk_), intent(in) :: x,y,z
|
||||
g = dzero
|
||||
if (x == done) then
|
||||
g = done
|
||||
else if (x == dzero) then
|
||||
g = exp(y**2-z**2)
|
||||
end if
|
||||
end function g
|
||||
|
||||
|
||||
!
|
||||
! subroutine to allocate and fill in the coefficient matrix and
|
||||
! the rhs.
|
||||
!
|
||||
subroutine psb_d_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info,&
|
||||
& f,amold,vmold,imold,partition,nrl,iv,tnd)
|
||||
use psb_base_mod
|
||||
use psb_util_mod
|
||||
!
|
||||
! Discretizes the partial differential equation
|
||||
!
|
||||
! a1 dd(u) a2 dd(u) a3 dd(u) b1 d(u) b2 d(u) b3 d(u)
|
||||
! - ------ - ------ - ------ + ----- + ------ + ------ + c u = f
|
||||
! dxdx dydy dzdz dx dy dz
|
||||
!
|
||||
! with Dirichlet boundary conditions
|
||||
! u = g
|
||||
!
|
||||
! on the unit cube 0<=x,y,z<=1.
|
||||
!
|
||||
!
|
||||
! Note that if b1=b2=b3=c=0., the PDE is the Laplace equation.
|
||||
!
|
||||
implicit none
|
||||
integer(psb_ipk_) :: idim
|
||||
type(psb_dspmat_type) :: a
|
||||
type(psb_d_vect_type) :: xv,bv
|
||||
type(psb_desc_type) :: desc_a
|
||||
type(psb_ctxt_type) :: ctxt
|
||||
integer(psb_ipk_) :: info
|
||||
character(len=*) :: afmt
|
||||
procedure(d_func_3d), optional :: f
|
||||
class(psb_d_base_sparse_mat), optional :: amold
|
||||
class(psb_d_base_vect_type), optional :: vmold
|
||||
class(psb_i_base_vect_type), optional :: imold
|
||||
integer(psb_ipk_), optional :: partition, nrl,iv(:)
|
||||
logical, optional :: tnd
|
||||
! Local variables.
|
||||
|
||||
integer(psb_ipk_), parameter :: nb=20
|
||||
type(psb_d_csc_sparse_mat) :: acsc
|
||||
type(psb_d_coo_sparse_mat) :: acoo
|
||||
type(psb_d_csr_sparse_mat) :: acsr
|
||||
real(psb_dpk_) :: zt(nb),x,y,z
|
||||
integer(psb_ipk_) :: nnz,nr,nlr,i,j,ii,ib,k, partition_
|
||||
integer(psb_lpk_) :: m,n,glob_row,nt
|
||||
integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner
|
||||
! For 3D partition
|
||||
! Note: integer control variables going directly into an MPI call
|
||||
! must be 4 bytes, i.e. psb_mpk_
|
||||
integer(psb_mpk_) :: npdims(3), npp, minfo
|
||||
integer(psb_ipk_) :: npx,npy,npz, iamx,iamy,iamz,mynx,myny,mynz
|
||||
integer(psb_ipk_), allocatable :: bndx(:),bndy(:),bndz(:)
|
||||
! Process grid
|
||||
integer(psb_ipk_) :: np, iam
|
||||
integer(psb_ipk_) :: icoeff
|
||||
integer(psb_lpk_), allocatable :: irow(:),icol(:),myidx(:)
|
||||
real(psb_dpk_), allocatable :: val(:)
|
||||
! deltah dimension of each grid cell
|
||||
! deltat discretization time
|
||||
real(psb_dpk_) :: deltah, sqdeltah, deltah2
|
||||
real(psb_dpk_), parameter :: rhs=dzero,one=done,zero=dzero
|
||||
real(psb_dpk_) :: t0, t1, t2, t3, tasb, talc, ttot, tgen, tcdasb
|
||||
integer(psb_ipk_) :: err_act
|
||||
procedure(d_func_3d), pointer :: f_
|
||||
logical :: tnd_
|
||||
character(len=20) :: name, ch_err,tmpfmt
|
||||
|
||||
info = psb_success_
|
||||
name = 'create_matrix'
|
||||
call psb_erractionsave(err_act)
|
||||
|
||||
call psb_info(ctxt, iam, np)
|
||||
|
||||
|
||||
if (present(f)) then
|
||||
f_ => f
|
||||
else
|
||||
f_ => d_null_func_3d
|
||||
end if
|
||||
|
||||
deltah = done/(idim+2)
|
||||
sqdeltah = deltah*deltah
|
||||
deltah2 = (2*done)* deltah
|
||||
|
||||
if (present(partition)) then
|
||||
if ((1<= partition).and.(partition <= 3)) then
|
||||
partition_ = partition
|
||||
else
|
||||
write(*,*) 'Invalid partition choice ',partition,' defaulting to 3'
|
||||
partition_ = 3
|
||||
end if
|
||||
else
|
||||
partition_ = 3
|
||||
end if
|
||||
|
||||
! initialize array descriptor and sparse matrix storage. provide an
|
||||
! estimate of the number of non zeroes
|
||||
|
||||
m = (1_psb_lpk_*idim)*idim*idim
|
||||
n = m
|
||||
nnz = ((n*7)/(np))
|
||||
if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n
|
||||
t0 = psb_wtime()
|
||||
select case(partition_)
|
||||
case(1)
|
||||
! A BLOCK partition
|
||||
if (present(nrl)) then
|
||||
nr = nrl
|
||||
else
|
||||
!
|
||||
! Using a simple BLOCK distribution.
|
||||
!
|
||||
nt = (m+np-1)/np
|
||||
nr = max(0,min(nt,m-(iam*nt)))
|
||||
end if
|
||||
|
||||
nt = nr
|
||||
call psb_sum(ctxt,nt)
|
||||
if (nt /= m) then
|
||||
write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m
|
||||
info = -1
|
||||
call psb_barrier(ctxt)
|
||||
call psb_abort(ctxt)
|
||||
return
|
||||
end if
|
||||
|
||||
!
|
||||
! First example of use of CDALL: specify for each process a number of
|
||||
! contiguous rows
|
||||
!
|
||||
call psb_cdall(ctxt,desc_a,info,nl=nr)
|
||||
myidx = desc_a%get_global_indices()
|
||||
nlr = size(myidx)
|
||||
|
||||
case(2)
|
||||
! A partition defined by the user through IV
|
||||
|
||||
if (present(iv)) then
|
||||
if (size(iv) /= m) then
|
||||
write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m
|
||||
info = -1
|
||||
call psb_barrier(ctxt)
|
||||
call psb_abort(ctxt)
|
||||
return
|
||||
end if
|
||||
else
|
||||
write(psb_err_unit,*) iam, 'Initialization error: IV not present'
|
||||
info = -1
|
||||
call psb_barrier(ctxt)
|
||||
call psb_abort(ctxt)
|
||||
return
|
||||
end if
|
||||
|
||||
!
|
||||
! Second example of use of CDALL: specify for each row the
|
||||
! process that owns it
|
||||
!
|
||||
call psb_cdall(ctxt,desc_a,info,vg=iv)
|
||||
myidx = desc_a%get_global_indices()
|
||||
nlr = size(myidx)
|
||||
|
||||
case(3)
|
||||
! A 3-dimensional partition
|
||||
|
||||
! A nifty MPI function will split the process list
|
||||
npdims = 0
|
||||
call mpi_dims_create(np,3,npdims,info)
|
||||
npx = npdims(1)
|
||||
npy = npdims(2)
|
||||
npz = npdims(3)
|
||||
|
||||
allocate(bndx(0:npx),bndy(0:npy),bndz(0:npz))
|
||||
! We can reuse idx2ijk for process indices as well.
|
||||
call idx2ijk(iamx,iamy,iamz,iam,npx,npy,npz,base=0)
|
||||
! Now let's split the 3D cube in hexahedra
|
||||
call dist1Didx(bndx,idim,npx)
|
||||
mynx = bndx(iamx+1)-bndx(iamx)
|
||||
call dist1Didx(bndy,idim,npy)
|
||||
myny = bndy(iamy+1)-bndy(iamy)
|
||||
call dist1Didx(bndz,idim,npz)
|
||||
mynz = bndz(iamz+1)-bndz(iamz)
|
||||
|
||||
! How many indices do I own?
|
||||
nlr = mynx*myny*mynz
|
||||
allocate(myidx(nlr))
|
||||
! Now, let's generate the list of indices I own
|
||||
nr = 0
|
||||
do i=bndx(iamx),bndx(iamx+1)-1
|
||||
do j=bndy(iamy),bndy(iamy+1)-1
|
||||
do k=bndz(iamz),bndz(iamz+1)-1
|
||||
nr = nr + 1
|
||||
call ijk2idx(myidx(nr),i,j,k,idim,idim,idim)
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
if (nr /= nlr) then
|
||||
write(psb_err_unit,*) iam,iamx,iamy,iamz, 'Initialization error: NR vs NLR ',&
|
||||
& nr,nlr,mynx,myny,mynz
|
||||
info = -1
|
||||
call psb_barrier(ctxt)
|
||||
call psb_abort(ctxt)
|
||||
end if
|
||||
|
||||
!
|
||||
! Third example of use of CDALL: specify for each process
|
||||
! the set of global indices it owns.
|
||||
!
|
||||
call psb_cdall(ctxt,desc_a,info,vl=myidx)
|
||||
|
||||
case default
|
||||
write(psb_err_unit,*) iam, 'Initialization error: should not get here'
|
||||
info = -1
|
||||
call psb_barrier(ctxt)
|
||||
call psb_abort(ctxt)
|
||||
return
|
||||
end select
|
||||
|
||||
|
||||
if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz,&
|
||||
& dupl=psb_dupl_err_)
|
||||
! define rhs from boundary conditions; also build initial guess
|
||||
if (info == psb_success_) call psb_geall(xv,desc_a,info)
|
||||
if (info == psb_success_) call psb_geall(bv,desc_a,info)
|
||||
|
||||
call psb_barrier(ctxt)
|
||||
talc = psb_wtime()-t0
|
||||
|
||||
if (info /= psb_success_) then
|
||||
info=psb_err_from_subroutine_
|
||||
ch_err='allocation rout.'
|
||||
call psb_errpush(info,name,a_err=ch_err)
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
! we build an auxiliary matrix consisting of one row at a
|
||||
! time; just a small matrix. might be extended to generate
|
||||
! a bunch of rows per call.
|
||||
!
|
||||
allocate(val(20*nb),irow(20*nb),&
|
||||
&icol(20*nb),stat=info)
|
||||
if (info /= psb_success_ ) then
|
||||
info=psb_err_alloc_dealloc_
|
||||
call psb_errpush(info,name)
|
||||
goto 9999
|
||||
endif
|
||||
|
||||
|
||||
! loop over rows belonging to current process in a block
|
||||
! distribution.
|
||||
|
||||
call psb_barrier(ctxt)
|
||||
t1 = psb_wtime()
|
||||
do ii=1, nlr,nb
|
||||
ib = min(nb,nlr-ii+1)
|
||||
icoeff = 1
|
||||
do k=1,ib
|
||||
i=ii+k-1
|
||||
! local matrix pointer
|
||||
glob_row=myidx(i)
|
||||
! compute gridpoint coordinates
|
||||
call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim)
|
||||
! x, y, z coordinates
|
||||
x = (ix-1)*deltah
|
||||
y = (iy-1)*deltah
|
||||
z = (iz-1)*deltah
|
||||
zt(k) = f_(x,y,z)
|
||||
! internal point: build discretization
|
||||
!
|
||||
! term depending on (x-1,y,z)
|
||||
!
|
||||
val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2
|
||||
if (ix == 1) then
|
||||
zt(k) = g(dzero,y,z)*(-val(icoeff)) + zt(k)
|
||||
else
|
||||
call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim)
|
||||
irow(icoeff) = glob_row
|
||||
icoeff = icoeff+1
|
||||
endif
|
||||
! term depending on (x,y-1,z)
|
||||
val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2
|
||||
if (iy == 1) then
|
||||
zt(k) = g(x,dzero,z)*(-val(icoeff)) + zt(k)
|
||||
else
|
||||
call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim)
|
||||
irow(icoeff) = glob_row
|
||||
icoeff = icoeff+1
|
||||
endif
|
||||
! term depending on (x,y,z-1)
|
||||
val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2
|
||||
if (iz == 1) then
|
||||
zt(k) = g(x,y,dzero)*(-val(icoeff)) + zt(k)
|
||||
else
|
||||
call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim)
|
||||
irow(icoeff) = glob_row
|
||||
icoeff = icoeff+1
|
||||
endif
|
||||
|
||||
! term depending on (x,y,z)
|
||||
val(icoeff)=(2*done)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah &
|
||||
& + c(x,y,z)
|
||||
call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim)
|
||||
irow(icoeff) = glob_row
|
||||
icoeff = icoeff+1
|
||||
! term depending on (x,y,z+1)
|
||||
val(icoeff)=-a3(x,y,z)/sqdeltah+b3(x,y,z)/deltah2
|
||||
if (iz == idim) then
|
||||
zt(k) = g(x,y,done)*(-val(icoeff)) + zt(k)
|
||||
else
|
||||
call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim)
|
||||
irow(icoeff) = glob_row
|
||||
icoeff = icoeff+1
|
||||
endif
|
||||
! term depending on (x,y+1,z)
|
||||
val(icoeff)=-a2(x,y,z)/sqdeltah+b2(x,y,z)/deltah2
|
||||
if (iy == idim) then
|
||||
zt(k) = g(x,done,z)*(-val(icoeff)) + zt(k)
|
||||
else
|
||||
call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim)
|
||||
irow(icoeff) = glob_row
|
||||
icoeff = icoeff+1
|
||||
endif
|
||||
! term depending on (x+1,y,z)
|
||||
val(icoeff)=-a1(x,y,z)/sqdeltah+b1(x,y,z)/deltah2
|
||||
if (ix==idim) then
|
||||
zt(k) = g(done,y,z)*(-val(icoeff)) + zt(k)
|
||||
else
|
||||
call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim)
|
||||
irow(icoeff) = glob_row
|
||||
icoeff = icoeff+1
|
||||
endif
|
||||
|
||||
end do
|
||||
call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info)
|
||||
if(info /= psb_success_) exit
|
||||
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info)
|
||||
if(info /= psb_success_) exit
|
||||
zt(:)=dzero
|
||||
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
|
||||
if(info /= psb_success_) exit
|
||||
end do
|
||||
|
||||
tgen = psb_wtime()-t1
|
||||
if(info /= psb_success_) then
|
||||
info=psb_err_from_subroutine_
|
||||
ch_err='insert rout.'
|
||||
call psb_errpush(info,name,a_err=ch_err)
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
deallocate(val,irow,icol)
|
||||
|
||||
call psb_barrier(ctxt)
|
||||
t1 = psb_wtime()
|
||||
call psb_cdasb(desc_a,info,mold=imold)
|
||||
tcdasb = psb_wtime()-t1
|
||||
call psb_barrier(ctxt)
|
||||
t1 = psb_wtime()
|
||||
if (info == psb_success_) then
|
||||
if (present(amold)) then
|
||||
call psb_spasb(a,desc_a,info,mold=amold,bld_and=tnd)
|
||||
else
|
||||
call psb_spasb(a,desc_a,info,afmt=afmt,bld_and=tnd)
|
||||
end if
|
||||
end if
|
||||
call psb_barrier(ctxt)
|
||||
if(info /= psb_success_) then
|
||||
info=psb_err_from_subroutine_
|
||||
ch_err='asb rout.'
|
||||
call psb_errpush(info,name,a_err=ch_err)
|
||||
goto 9999
|
||||
end if
|
||||
if (info == psb_success_) call psb_geasb(xv,desc_a,info,mold=vmold)
|
||||
if (info == psb_success_) call psb_geasb(bv,desc_a,info,mold=vmold)
|
||||
if(info /= psb_success_) then
|
||||
info=psb_err_from_subroutine_
|
||||
ch_err='asb rout.'
|
||||
call psb_errpush(info,name,a_err=ch_err)
|
||||
goto 9999
|
||||
end if
|
||||
tasb = psb_wtime()-t1
|
||||
call psb_barrier(ctxt)
|
||||
ttot = psb_wtime() - t0
|
||||
|
||||
call psb_amx(ctxt,talc)
|
||||
call psb_amx(ctxt,tgen)
|
||||
call psb_amx(ctxt,tasb)
|
||||
call psb_amx(ctxt,ttot)
|
||||
if(iam == psb_root_) then
|
||||
tmpfmt = a%get_fmt()
|
||||
write(psb_out_unit,'("The matrix has been generated and assembled in ",a3," format.")')&
|
||||
& tmpfmt
|
||||
write(psb_out_unit,'("-allocation time : ",es12.5)') talc
|
||||
write(psb_out_unit,'("-coeff. gen. time : ",es12.5)') tgen
|
||||
write(psb_out_unit,'("-desc asbly time : ",es12.5)') tcdasb
|
||||
write(psb_out_unit,'("- mat asbly time : ",es12.5)') tasb
|
||||
write(psb_out_unit,'("-total time : ",es12.5)') ttot
|
||||
|
||||
end if
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(ctxt,err_act)
|
||||
|
||||
return
|
||||
end subroutine psb_d_gen_pde3d
|
||||
|
||||
|
||||
end module psb_d_pde3d_mod
|
||||
|
||||
|
||||
|
||||
program test
|
||||
use psb_base_mod
|
||||
use psb_ext_mod
|
||||
use psb_oacc_mod
|
||||
use psb_d_pde3d_mod
|
||||
|
||||
implicit none
|
||||
integer(psb_ipk_) :: n, i, info, m, nrm, nz
|
||||
integer(psb_ipk_), parameter :: ntests=80, ngpu=20
|
||||
real(psb_dpk_) :: dot_dev, dot_host
|
||||
type(psb_d_vect_oacc) :: tx, ty
|
||||
type(psb_d_oacc_csr_sparse_mat) :: aacsr
|
||||
real(psb_dpk_) :: t0, t1, t2, t3, csflp, elflp
|
||||
double precision, external :: etime
|
||||
|
||||
type(psb_dspmat_type) :: a
|
||||
type(psb_desc_type) :: desc_a
|
||||
type(psb_d_vect_type) :: xxv, bv
|
||||
type(psb_d_csr_sparse_mat) :: acsr
|
||||
character(len=5) :: afmt='csr'
|
||||
real(psb_dpk_), allocatable :: vv(:), ydev(:), yhost(:)
|
||||
type(psb_ctxt_type) :: ctxt
|
||||
integer(psb_ipk_) :: iam, np, nth, idim
|
||||
integer(psb_epk_) :: neq
|
||||
|
||||
call psb_init(ctxt)
|
||||
call psb_info(ctxt, iam, np)
|
||||
|
||||
write(*,*) 'Enter size :'
|
||||
read(*,*) idim
|
||||
idim = max(1, idim)
|
||||
|
||||
n = idim**3
|
||||
call psb_gen_pde3d(ctxt, idim, a, bv, xxv, desc_a, afmt, info)
|
||||
call a%cp_to(acsr)
|
||||
m = acsr%get_nrows()
|
||||
n = acsr%get_ncols()
|
||||
nz = acsr%get_nzeros()
|
||||
call aacsr%all(m, n, nz, info)
|
||||
aacsr%val = (acsr%val)
|
||||
aacsr%ja = (acsr%ja)
|
||||
aacsr%irp = (acsr%irp)
|
||||
call aacsr%set_host()
|
||||
call aacsr%sync()
|
||||
|
||||
call initialize(n)
|
||||
|
||||
call to_host()
|
||||
t2 = etime()
|
||||
do i = 1, ntests
|
||||
dot_host = h_dot(n)
|
||||
end do
|
||||
t3 = etime()
|
||||
|
||||
call tx%all(n, info)
|
||||
call ty%all(n, info)
|
||||
vv = bv%get_vect()
|
||||
call bv%set_vect(v1)
|
||||
call tx%set_vect(v1)
|
||||
call ty%set_vect(v2)
|
||||
t0 = etime()
|
||||
do i = 1, ntests * ngpu
|
||||
dot_dev = tx%dot_v(n, ty)
|
||||
end do
|
||||
!$acc wait
|
||||
t1 = etime()
|
||||
write(*,*) ' Dot Results : dev:', dot_dev, ' host:', dot_host
|
||||
write(*,*) ' Timing : dev:', t1 - t0, (t1 - t0) / (ntests * ngpu), &
|
||||
' host:', t3 - t2, (t3 - t2) / ntests
|
||||
|
||||
call a%mv_from(acsr)
|
||||
t2 = etime()
|
||||
do i = 1, ntests
|
||||
call a%spmm(done, bv, dzero, xxv, info)
|
||||
end do
|
||||
t3 = etime()
|
||||
yhost = xxv%get_vect()
|
||||
t0 = etime()
|
||||
do i = 1, ntests * ngpu
|
||||
call aacsr%vect_mv(done, tx, dzero, ty, info)
|
||||
end do
|
||||
!$acc wait
|
||||
t1 = etime()
|
||||
ydev = ty%get_vect()
|
||||
write(*,*) 'Correctness check: ', maxval(abs(ydev(:) - yhost(:)))
|
||||
write(*,*) ' CSR PROD '
|
||||
write(*, '(2(a,f12.3,2x))') ' Timing (ms): '
|
||||
csflp = 2.d0 * nz / ((t1 - t0) / (ntests * ngpu))
|
||||
write(*, '(2(a,f12.3,2x))') ' dev:', 1e3 * (t1 - t0) / (ntests * ngpu), ' :', csflp / 1.d6
|
||||
csflp = 2.d0 * nz / ((t3 - t2) / (ntests))
|
||||
write(*, '(2(a,f12.3,2x))') ' host:', 1e3 * (t3 - t2) / ntests, ' :', csflp / 1.d6
|
||||
write(*,*) 'Done'
|
||||
|
||||
call tx%free(info)
|
||||
call ty%free(info)
|
||||
call finalize_dev()
|
||||
call finalize_host()
|
||||
call psb_exit(ctxt)
|
||||
end program test
|
@ -0,0 +1,97 @@
|
||||
#include <sys/time.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
double wtime()
|
||||
{
|
||||
struct timeval tt;
|
||||
struct timezone tz;
|
||||
double temp;
|
||||
if (gettimeofday(&tt,&tz) != 0) {
|
||||
fprintf(stderr,"Fatal error for gettimeofday ??? \n");
|
||||
exit(-1);
|
||||
}
|
||||
temp = ((double)tt.tv_sec)*1.0e3 + ((double)tt.tv_usec)*1.0e-3;
|
||||
return(temp);
|
||||
}
|
||||
|
||||
double timef_()
|
||||
{
|
||||
struct timeval tt;
|
||||
struct timezone tz;
|
||||
double temp;
|
||||
if (gettimeofday(&tt,&tz) != 0) {
|
||||
fprintf(stderr,"Fatal error for gettimeofday ??? \n");
|
||||
exit(-1);
|
||||
}
|
||||
temp = ((double)tt.tv_sec)*1.0e3 + ((double)tt.tv_usec)*1.0e-3;
|
||||
return(temp);
|
||||
}
|
||||
|
||||
double timef()
|
||||
{
|
||||
struct timeval tt;
|
||||
struct timezone tz;
|
||||
double temp;
|
||||
if (gettimeofday(&tt,&tz) != 0) {
|
||||
fprintf(stderr,"Fatal error for gettimeofday ??? \n");
|
||||
exit(-1);
|
||||
}
|
||||
temp = ((double)tt.tv_sec)*1.0e3 + ((double)tt.tv_usec)*1.0e-3;
|
||||
return(temp);
|
||||
}
|
||||
|
||||
double etime()
|
||||
{
|
||||
struct timeval tt;
|
||||
struct timezone tz;
|
||||
double temp;
|
||||
if (gettimeofday(&tt,&tz) != 0) {
|
||||
fprintf(stderr,"Fatal error for gettimeofday ??? \n");
|
||||
exit(-1);
|
||||
}
|
||||
temp = ((double)tt.tv_sec) + ((double)tt.tv_usec)*1.0e-6;
|
||||
return(temp);
|
||||
}
|
||||
|
||||
double etime_()
|
||||
{
|
||||
struct timeval tt;
|
||||
struct timezone tz;
|
||||
double temp;
|
||||
if (gettimeofday(&tt,&tz) != 0) {
|
||||
fprintf(stderr,"Fatal error for gettimeofday ??? \n");
|
||||
exit(-1);
|
||||
}
|
||||
temp = ((double)tt.tv_sec) + ((double)tt.tv_usec)*1.0e-6;
|
||||
return(temp);
|
||||
}
|
||||
|
||||
double etimef()
|
||||
{
|
||||
struct timeval tt;
|
||||
struct timezone tz;
|
||||
double temp;
|
||||
if (gettimeofday(&tt,&tz) != 0) {
|
||||
fprintf(stderr,"Fatal error for gettimeofday ??? \n");
|
||||
exit(-1);
|
||||
}
|
||||
temp = ((double)tt.tv_sec) + ((double)tt.tv_usec)*1.0e-6;
|
||||
return(temp);
|
||||
}
|
||||
|
||||
double etimef_()
|
||||
{
|
||||
struct timeval tt;
|
||||
struct timezone tz;
|
||||
double temp;
|
||||
if (gettimeofday(&tt,&tz) != 0) {
|
||||
fprintf(stderr,"Fatal error for gettimeofday ??? \n");
|
||||
exit(-1);
|
||||
}
|
||||
temp = ((double)tt.tv_sec) + ((double)tt.tv_usec)*1.0e-6;
|
||||
return(temp);
|
||||
}
|
||||
|
||||
|
||||
|
@ -0,0 +1,85 @@
|
||||
program vectoacc
|
||||
use psb_base_mod
|
||||
use psb_oacc_mod
|
||||
implicit none
|
||||
|
||||
type(psb_d_vect_oacc) :: v3, v4, v5
|
||||
integer(psb_ipk_) :: info, n, i
|
||||
real(psb_dpk_) :: alpha, beta, result
|
||||
double precision, external :: etime
|
||||
|
||||
real(psb_dpk_) :: dot_host, dot_dev, t_host, t_dev
|
||||
double precision :: time_start, time_end
|
||||
integer(psb_ipk_), parameter :: ntests=80, ngpu=20
|
||||
|
||||
write(*, *) 'Test of the vector operations with OpenACC'
|
||||
|
||||
write(*, *) 'Enter the size of the vectors'
|
||||
read(*, *) n
|
||||
alpha = 2.0
|
||||
beta = 0.5
|
||||
|
||||
call v3%all(n, info)
|
||||
call v4%all(n, info)
|
||||
call v5%all(n, info)
|
||||
|
||||
do i = 1, n
|
||||
v3%v(i) = real(i, psb_dpk_)
|
||||
v4%v(i) = real(n - i, psb_dpk_)
|
||||
end do
|
||||
|
||||
call v3%set_dev()
|
||||
call v4%set_dev()
|
||||
|
||||
call v3%scal(alpha)
|
||||
call v3%sync()
|
||||
|
||||
do i = 1, n
|
||||
if (v3%v(i) /= alpha * real(i, psb_dpk_)) then
|
||||
write(*, *) 'Scal error : index', i
|
||||
end if
|
||||
end do
|
||||
write(*, *) 'Scal test passed'
|
||||
|
||||
result = v3%dot_v(n, v4)
|
||||
call v3%sync()
|
||||
call v4%sync()
|
||||
if (result /= sum(v3%v * v4%v)) then
|
||||
write(*, *) 'Dot_v error, expected result:', sum(v3%v * v4%v), 'instead of :', result
|
||||
end if
|
||||
write(*, *) 'Dot_v test passed'
|
||||
|
||||
result = v3%nrm2(n)
|
||||
call v3%sync()
|
||||
if (result /= sqrt(sum(v3%v ** 2))) then
|
||||
write(*, *) 'nrm2 error, expected result:', sqrt(sum(v3%v ** 2)), 'instead of :', result
|
||||
end if
|
||||
write(*, *) 'nrm2 test passed'
|
||||
|
||||
call v3%set_host()
|
||||
call v4%set_host()
|
||||
|
||||
time_start = etime()
|
||||
do i = 1, ntests
|
||||
dot_host = sum(v3%v * v4%v)
|
||||
end do
|
||||
time_end = etime()
|
||||
t_host = (time_end - time_start) / real(ntests)
|
||||
write(*, *) 'Performance host: ', t_host, ' sec'
|
||||
|
||||
call v3%set_dev()
|
||||
call v4%set_dev()
|
||||
time_start = etime()
|
||||
do i = 1, ntests
|
||||
dot_dev = v3%dot_v(n, v4)
|
||||
end do
|
||||
!$acc wait
|
||||
time_end = etime()
|
||||
t_dev = (time_end - time_start) / real(ntests)
|
||||
write(*, *) 'Performance device: ', t_dev, ' sec'
|
||||
|
||||
call v3%free(info)
|
||||
call v4%free(info)
|
||||
call v5%free(info)
|
||||
|
||||
end program vectoacc
|
Loading…
Reference in New Issue