X_cuda_vect%abgdxyz

nond-rep
sfilippone 10 months ago
parent 93c71c4316
commit 2391f64df6

@ -922,13 +922,57 @@ contains
class(psb_c_vect_cuda), intent(inout) :: z
complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nx, ny, nz
logical :: gpu_done
info = psb_success_
if (.true.) then
gpu_done = .false.
select type(xx => x)
class is (psb_c_vect_cuda)
select type(yy => y)
class is (psb_c_vect_cuda)
select type(zz => z)
class is (psb_c_vect_cuda)
! Do something different here
if ((beta /= czero).and.yy%is_host())&
& call yy%sync()
if ((delta /= czero).and.zz%is_host())&
& call zz%sync()
if (xx%is_host()) call xx%sync()
nx = getMultiVecDeviceSize(xx%deviceVect)
ny = getMultiVecDeviceSize(yy%deviceVect)
nz = getMultiVecDeviceSize(zz%deviceVect)
if ((nx<m).or.(ny<m).or.(nz<m)) then
info = psb_err_internal_error_
else
info = abgdxyzMultiVecDevice(m,alpha,beta,gamma,delta,&
& xx%deviceVect,yy%deviceVect,zz%deviceVect)
end if
call yy%set_dev()
call zz%set_dev()
gpu_done = .true.
end select
end select
end select
if (.not.gpu_done) then
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
if (z%is_host()) call z%sync()
call y%axpby(m,alpha,x,beta,info)
call z%axpby(m,gamma,y,delta,info)
end if
else
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
if (z%is_host()) call z%sync()
call y%axpby(m,alpha,x,beta,info)
call z%axpby(m,gamma,y,delta,info)
end if
call z%psb_c_base_vect_type%abgdxyz(m,alpha,beta,gamma,delta,x,y,info)
!!$
!!$ if (x%is_dev()) call x%sync()
!!$
!!$ call y%axpby(m,alpha,x,beta,info)
!!$ call z%axpby(m,gamma,y,delta,info)
end subroutine c_cuda_abgdxyz
@ -1512,7 +1556,7 @@ contains
!!$ complex(psb_spk_), external :: ddot
!!$ integer(psb_ipk_) :: info
!!$
!!$ res = dzero
!!$ res = czero
!!$ !
!!$ ! Note: this is the gpu implementation.
!!$ ! When we get here, we are sure that X is of
@ -1566,13 +1610,13 @@ contains
!!$
!!$ select type(xx => x)
!!$ type is (psb_c_base_multivect_type)
!!$ if ((beta /= dzero).and.(y%is_dev()))&
!!$ if ((beta /= czero).and.(y%is_dev()))&
!!$ & call y%sync()
!!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info)
!!$ call y%set_host()
!!$ type is (psb_c_multivect_cuda)
!!$ ! Do something different here
!!$ if ((beta /= dzero).and.y%is_host())&
!!$ if ((beta /= czero).and.y%is_host())&
!!$ & call y%sync()
!!$ if (xx%is_host()) call xx%sync()
!!$ nx = getMultiVecDeviceSize(xx%deviceVect)
@ -1817,7 +1861,7 @@ contains
implicit none
class(psb_c_multivect_cuda), intent(inout) :: x
if (allocated(x%v)) x%v=dzero
if (allocated(x%v)) x%v=czero
call x%set_host()
end subroutine c_cuda_multi_zero

@ -922,13 +922,57 @@ contains
class(psb_d_vect_cuda), intent(inout) :: z
real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nx, ny, nz
logical :: gpu_done
info = psb_success_
if (.true.) then
gpu_done = .false.
select type(xx => x)
class is (psb_d_vect_cuda)
select type(yy => y)
class is (psb_d_vect_cuda)
select type(zz => z)
class is (psb_d_vect_cuda)
! Do something different here
if ((beta /= dzero).and.yy%is_host())&
& call yy%sync()
if ((delta /= dzero).and.zz%is_host())&
& call zz%sync()
if (xx%is_host()) call xx%sync()
nx = getMultiVecDeviceSize(xx%deviceVect)
ny = getMultiVecDeviceSize(yy%deviceVect)
nz = getMultiVecDeviceSize(zz%deviceVect)
if ((nx<m).or.(ny<m).or.(nz<m)) then
info = psb_err_internal_error_
else
info = abgdxyzMultiVecDevice(m,alpha,beta,gamma,delta,&
& xx%deviceVect,yy%deviceVect,zz%deviceVect)
end if
call yy%set_dev()
call zz%set_dev()
gpu_done = .true.
end select
end select
end select
if (.not.gpu_done) then
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
if (z%is_host()) call z%sync()
call y%axpby(m,alpha,x,beta,info)
call z%axpby(m,gamma,y,delta,info)
end if
else
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
if (z%is_host()) call z%sync()
call y%axpby(m,alpha,x,beta,info)
call z%axpby(m,gamma,y,delta,info)
end if
call z%psb_d_base_vect_type%abgdxyz(m,alpha,beta,gamma,delta,x,y,info)
!!$
!!$ if (x%is_dev()) call x%sync()
!!$
!!$ call y%axpby(m,alpha,x,beta,info)
!!$ call z%axpby(m,gamma,y,delta,info)
end subroutine d_cuda_abgdxyz

@ -1172,7 +1172,7 @@ contains
!!$ integer(psb_ipk_), external :: ddot
!!$ integer(psb_ipk_) :: info
!!$
!!$ res = dzero
!!$ res = izero
!!$ !
!!$ ! Note: this is the gpu implementation.
!!$ ! When we get here, we are sure that X is of
@ -1226,13 +1226,13 @@ contains
!!$
!!$ select type(xx => x)
!!$ type is (psb_i_base_multivect_type)
!!$ if ((beta /= dzero).and.(y%is_dev()))&
!!$ if ((beta /= izero).and.(y%is_dev()))&
!!$ & call y%sync()
!!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info)
!!$ call y%set_host()
!!$ type is (psb_i_multivect_cuda)
!!$ ! Do something different here
!!$ if ((beta /= dzero).and.y%is_host())&
!!$ if ((beta /= izero).and.y%is_host())&
!!$ & call y%sync()
!!$ if (xx%is_host()) call xx%sync()
!!$ nx = getMultiVecDeviceSize(xx%deviceVect)
@ -1477,7 +1477,7 @@ contains
implicit none
class(psb_i_multivect_cuda), intent(inout) :: x
if (allocated(x%v)) x%v=dzero
if (allocated(x%v)) x%v=izero
call x%set_host()
end subroutine i_cuda_multi_zero

@ -922,13 +922,57 @@ contains
class(psb_s_vect_cuda), intent(inout) :: z
real(psb_spk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nx, ny, nz
logical :: gpu_done
info = psb_success_
if (.true.) then
gpu_done = .false.
select type(xx => x)
class is (psb_s_vect_cuda)
select type(yy => y)
class is (psb_s_vect_cuda)
select type(zz => z)
class is (psb_s_vect_cuda)
! Do something different here
if ((beta /= szero).and.yy%is_host())&
& call yy%sync()
if ((delta /= szero).and.zz%is_host())&
& call zz%sync()
if (xx%is_host()) call xx%sync()
nx = getMultiVecDeviceSize(xx%deviceVect)
ny = getMultiVecDeviceSize(yy%deviceVect)
nz = getMultiVecDeviceSize(zz%deviceVect)
if ((nx<m).or.(ny<m).or.(nz<m)) then
info = psb_err_internal_error_
else
info = abgdxyzMultiVecDevice(m,alpha,beta,gamma,delta,&
& xx%deviceVect,yy%deviceVect,zz%deviceVect)
end if
call yy%set_dev()
call zz%set_dev()
gpu_done = .true.
end select
end select
end select
if (.not.gpu_done) then
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
if (z%is_host()) call z%sync()
call y%axpby(m,alpha,x,beta,info)
call z%axpby(m,gamma,y,delta,info)
end if
else
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
if (z%is_host()) call z%sync()
call y%axpby(m,alpha,x,beta,info)
call z%axpby(m,gamma,y,delta,info)
end if
call z%psb_s_base_vect_type%abgdxyz(m,alpha,beta,gamma,delta,x,y,info)
!!$
!!$ if (x%is_dev()) call x%sync()
!!$
!!$ call y%axpby(m,alpha,x,beta,info)
!!$ call z%axpby(m,gamma,y,delta,info)
end subroutine s_cuda_abgdxyz
@ -1512,7 +1556,7 @@ contains
!!$ real(psb_spk_), external :: ddot
!!$ integer(psb_ipk_) :: info
!!$
!!$ res = dzero
!!$ res = szero
!!$ !
!!$ ! Note: this is the gpu implementation.
!!$ ! When we get here, we are sure that X is of
@ -1566,13 +1610,13 @@ contains
!!$
!!$ select type(xx => x)
!!$ type is (psb_s_base_multivect_type)
!!$ if ((beta /= dzero).and.(y%is_dev()))&
!!$ if ((beta /= szero).and.(y%is_dev()))&
!!$ & call y%sync()
!!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info)
!!$ call y%set_host()
!!$ type is (psb_s_multivect_cuda)
!!$ ! Do something different here
!!$ if ((beta /= dzero).and.y%is_host())&
!!$ if ((beta /= szero).and.y%is_host())&
!!$ & call y%sync()
!!$ if (xx%is_host()) call xx%sync()
!!$ nx = getMultiVecDeviceSize(xx%deviceVect)
@ -1817,7 +1861,7 @@ contains
implicit none
class(psb_s_multivect_cuda), intent(inout) :: x
if (allocated(x%v)) x%v=dzero
if (allocated(x%v)) x%v=szero
call x%set_host()
end subroutine s_cuda_multi_zero

@ -922,13 +922,57 @@ contains
class(psb_z_vect_cuda), intent(inout) :: z
complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nx, ny, nz
logical :: gpu_done
info = psb_success_
if (.true.) then
gpu_done = .false.
select type(xx => x)
class is (psb_z_vect_cuda)
select type(yy => y)
class is (psb_z_vect_cuda)
select type(zz => z)
class is (psb_z_vect_cuda)
! Do something different here
if ((beta /= zzero).and.yy%is_host())&
& call yy%sync()
if ((delta /= zzero).and.zz%is_host())&
& call zz%sync()
if (xx%is_host()) call xx%sync()
nx = getMultiVecDeviceSize(xx%deviceVect)
ny = getMultiVecDeviceSize(yy%deviceVect)
nz = getMultiVecDeviceSize(zz%deviceVect)
if ((nx<m).or.(ny<m).or.(nz<m)) then
info = psb_err_internal_error_
else
info = abgdxyzMultiVecDevice(m,alpha,beta,gamma,delta,&
& xx%deviceVect,yy%deviceVect,zz%deviceVect)
end if
call yy%set_dev()
call zz%set_dev()
gpu_done = .true.
end select
end select
end select
if (.not.gpu_done) then
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
if (z%is_host()) call z%sync()
call y%axpby(m,alpha,x,beta,info)
call z%axpby(m,gamma,y,delta,info)
end if
else
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
if (z%is_host()) call z%sync()
call y%axpby(m,alpha,x,beta,info)
call z%axpby(m,gamma,y,delta,info)
end if
call z%psb_z_base_vect_type%abgdxyz(m,alpha,beta,gamma,delta,x,y,info)
!!$
!!$ if (x%is_dev()) call x%sync()
!!$
!!$ call y%axpby(m,alpha,x,beta,info)
!!$ call z%axpby(m,gamma,y,delta,info)
end subroutine z_cuda_abgdxyz
@ -1512,7 +1556,7 @@ contains
!!$ complex(psb_dpk_), external :: ddot
!!$ integer(psb_ipk_) :: info
!!$
!!$ res = dzero
!!$ res = zzero
!!$ !
!!$ ! Note: this is the gpu implementation.
!!$ ! When we get here, we are sure that X is of
@ -1566,13 +1610,13 @@ contains
!!$
!!$ select type(xx => x)
!!$ type is (psb_z_base_multivect_type)
!!$ if ((beta /= dzero).and.(y%is_dev()))&
!!$ if ((beta /= zzero).and.(y%is_dev()))&
!!$ & call y%sync()
!!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info)
!!$ call y%set_host()
!!$ type is (psb_z_multivect_cuda)
!!$ ! Do something different here
!!$ if ((beta /= dzero).and.y%is_host())&
!!$ if ((beta /= zzero).and.y%is_host())&
!!$ & call y%sync()
!!$ if (xx%is_host()) call xx%sync()
!!$ nx = getMultiVecDeviceSize(xx%deviceVect)
@ -1817,7 +1861,7 @@ contains
implicit none
class(psb_z_multivect_cuda), intent(inout) :: x
if (allocated(x%v)) x%v=dzero
if (allocated(x%v)) x%v=zzero
call x%set_host()
end subroutine z_cuda_multi_zero

Loading…
Cancel
Save