Added CUDA version of XYZW

nond-rep
sfilippone 10 months ago
parent 86be8ebcd0
commit a11f328e62

@ -273,6 +273,28 @@ int abgdxyzMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha,cuFloatComplex
return(i); return(i);
} }
int xyzwMultiVecDeviceFloatComplex(int n,cuFloatComplex a,cuFloatComplex b,
cuFloatComplex c, cuFloatComplex d,
cuFloatComplex e, cuFloatComplex f,
void* devMultiVecX, void* devMultiVecY,
void* devMultiVecZ, void* devMultiVecW)
{ int j=0, i=0;
int pitch = 0;
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY;
struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ;
struct MultiVectDevice *devVecW = (struct MultiVectDevice *) devMultiVecW;
spgpuHandle_t handle=psb_cudaGetHandle();
pitch = devVecY->pitch_;
if ((n > devVecY->size_) || (n>devVecX->size_ ))
return SPGPU_UNSUPPORTED;
spgpuCxyzw(handle,n, a,b,c,d,e,f,
(cuFloatComplex *)devVecX->v_,(cuFloatComplex *) devVecY->v_,
(cuFloatComplex *) devVecZ->v_,(cuFloatComplex *) devVecW->v_);
return(i);
}
int axyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, int axyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha,
void *deviceVecA, void *deviceVecB) void *deviceVecA, void *deviceVecB)
{ int i = 0; { int i = 0;

@ -72,6 +72,11 @@ int axpbyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void* devVecX,
int abgdxyzMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha,cuFloatComplex beta, int abgdxyzMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha,cuFloatComplex beta,
cuFloatComplex gamma, cuFloatComplex delta, cuFloatComplex gamma, cuFloatComplex delta,
void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ);
int xyzwMultiVecDeviceFloatComplex(int n,cuFloatComplex a,cuFloatComplex b,
cuFloatComplex c, cuFloatComplex d,
cuFloatComplex e, cuFloatComplex f,
void* devMultiVecX, void* devMultiVecY,
void* devMultiVecZ, void* devMultiVecW);
int axyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA, void *deviceVecB); int axyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA, void *deviceVecB);
int axybzMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA, int axybzMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA,
void *deviceVecB, cuFloatComplex beta, void *deviceVecZ); void *deviceVecB, cuFloatComplex beta, void *deviceVecZ);

@ -258,6 +258,25 @@ int abgdxyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, do
return(i); return(i);
} }
int xyzwMultiVecDeviceDouble(int n,double a, double b, double c, double d, double e, double f,
void* devMultiVecX, void* devMultiVecY,
void* devMultiVecZ, void* devMultiVecW)
{ int j=0, i=0;
int pitch = 0;
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY;
struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ;
struct MultiVectDevice *devVecW = (struct MultiVectDevice *) devMultiVecW;
spgpuHandle_t handle=psb_cudaGetHandle();
pitch = devVecY->pitch_;
if ((n > devVecY->size_) || (n>devVecX->size_ ))
return SPGPU_UNSUPPORTED;
spgpuDxyzw(handle,n, a,b,c,d,e,f,
(double*)devVecX->v_,(double*) devVecY->v_,(double*) devVecZ->v_,(double*) devVecW->v_);
return(i);
}
int axyMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceVecB) int axyMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceVecB)
{ int i = 0; { int i = 0;
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;

@ -69,6 +69,9 @@ int dotMultiVecDeviceDouble(double* y_res, int n, void* devVecA, void* devVecB);
int axpbyMultiVecDeviceDouble(int n, double alpha, void* devVecX, double beta, void* devVecY); int axpbyMultiVecDeviceDouble(int n, double alpha, void* devVecX, double beta, void* devVecY);
int abgdxyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, double delta, int abgdxyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, double delta,
void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ);
int xyzwMultiVecDeviceDouble(int n,double a, double b, double c, double d, double e, double f,
void* devMultiVecX, void* devMultiVecY,
void* devMultiVecZ, void* devMultiVecW);
int axyMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceVecB); int axyMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceVecB);
int axybzMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, int axybzMultiVecDeviceDouble(int n, double alpha, void *deviceVecA,
void *deviceVecB, double beta, void *deviceVecZ); void *deviceVecB, double beta, void *deviceVecZ);

@ -914,7 +914,6 @@ contains
end subroutine c_cuda_axpby_v end subroutine c_cuda_axpby_v
subroutine c_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) subroutine c_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod use psi_serial_mod
implicit none implicit none
@ -975,9 +974,70 @@ contains
call z%axpby(m,gamma,y,delta,info) call z%axpby(m,gamma,y,delta,info)
end if end if
end subroutine c_cuda_abgdxyz end subroutine c_cuda_abgdxyz
subroutine c_cuda_xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_c_base_vect_type), intent(inout) :: x
class(psb_c_base_vect_type), intent(inout) :: y
class(psb_c_base_vect_type), intent(inout) :: z
class(psb_c_vect_cuda), intent(inout) :: w
complex(psb_spk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nx, ny, nz, nw
logical :: gpu_done
info = psb_success_
gpu_done = .false.
if ((a==czero).or.(b==czero).or. &
& (c==czero).or.(d==czero).or.&
& (e==czero).or.(f==czero)) then
write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero'
else
select type(xx => x)
class is (psb_c_vect_cuda)
select type(yy => y)
class is (psb_c_vect_cuda)
select type(zz => z)
class is (psb_c_vect_cuda)
! Do something different here
if (xx%is_host()) call xx%sync()
if (yy%is_host()) call yy%sync()
if (zz%is_host()) call zz%sync()
if (w%is_host()) call w%sync()
nx = getMultiVecDeviceSize(xx%deviceVect)
ny = getMultiVecDeviceSize(yy%deviceVect)
nz = getMultiVecDeviceSize(zz%deviceVect)
nw = getMultiVecDeviceSize(w%deviceVect)
if ((nx<m).or.(ny<m).or.(nz<m).or.(nw<m)) then
info = psb_err_internal_error_
else
info = xyzwMultiVecDevice(m,a,b,c,d,e,f,&
& xx%deviceVect,yy%deviceVect,zz%deviceVect,w%deviceVect)
end if
call yy%set_dev()
call zz%set_dev()
call w%set_dev()
gpu_done = .true.
end select
end select
end select
if (.not.gpu_done) then
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
if (z%is_host()) call z%sync()
if (w%is_host()) call w%sync()
call y%axpby(m,a,x,b,info)
call z%axpby(m,c,y,d,info)
call w%axpby(m,e,z,f,info)
end if
end if
end subroutine c_cuda_xyzw
subroutine c_cuda_axpby_a(m,alpha, x, beta, y, info) subroutine c_cuda_axpby_a(m,alpha, x, beta, y, info)
use psi_serial_mod use psi_serial_mod

@ -325,6 +325,18 @@ module psb_c_vectordev_mod
end function abgdxyzMultiVecDeviceFloatComplex end function abgdxyzMultiVecDeviceFloatComplex
end interface end interface
interface xyzwMultiVecDevice
function xyzwMultiVecDeviceFloatComplex(n,a,b,c,d,e,f,deviceVecX,&
& deviceVecY,deviceVecZ,deviceVecW) &
& result(res) bind(c,name='xyzwMultiVecDeviceFloatComplex')
use iso_c_binding
integer(c_int) :: res
integer(c_int), value :: n
complex(c_float_complex), value :: a,b,c,d,e,f
type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ, deviceVecW
end function xyzwMultiVecDeviceFloatComplex
end interface
interface axyMultiVecDevice interface axyMultiVecDevice
function axyMultiVecDeviceFloatComplex(n,alpha,deviceVecA,deviceVecB) & function axyMultiVecDeviceFloatComplex(n,alpha,deviceVecA,deviceVecB) &
& result(res) bind(c,name='axyMultiVecDeviceFloatComplex') & result(res) bind(c,name='axyMultiVecDeviceFloatComplex')

@ -914,7 +914,6 @@ contains
end subroutine d_cuda_axpby_v end subroutine d_cuda_axpby_v
subroutine d_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) subroutine d_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod use psi_serial_mod
implicit none implicit none
@ -975,9 +974,70 @@ contains
call z%axpby(m,gamma,y,delta,info) call z%axpby(m,gamma,y,delta,info)
end if end if
end subroutine d_cuda_abgdxyz end subroutine d_cuda_abgdxyz
subroutine d_cuda_xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_d_base_vect_type), intent(inout) :: x
class(psb_d_base_vect_type), intent(inout) :: y
class(psb_d_base_vect_type), intent(inout) :: z
class(psb_d_vect_cuda), intent(inout) :: w
real(psb_dpk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nx, ny, nz, nw
logical :: gpu_done
info = psb_success_
gpu_done = .false.
if ((a==dzero).or.(b==dzero).or. &
& (c==dzero).or.(d==dzero).or.&
& (e==dzero).or.(f==dzero)) then
write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero'
else
select type(xx => x)
class is (psb_d_vect_cuda)
select type(yy => y)
class is (psb_d_vect_cuda)
select type(zz => z)
class is (psb_d_vect_cuda)
! Do something different here
if (xx%is_host()) call xx%sync()
if (yy%is_host()) call yy%sync()
if (zz%is_host()) call zz%sync()
if (w%is_host()) call w%sync()
nx = getMultiVecDeviceSize(xx%deviceVect)
ny = getMultiVecDeviceSize(yy%deviceVect)
nz = getMultiVecDeviceSize(zz%deviceVect)
nw = getMultiVecDeviceSize(w%deviceVect)
if ((nx<m).or.(ny<m).or.(nz<m).or.(nw<m)) then
info = psb_err_internal_error_
else
info = xyzwMultiVecDevice(m,a,b,c,d,e,f,&
& xx%deviceVect,yy%deviceVect,zz%deviceVect,w%deviceVect)
end if
call yy%set_dev()
call zz%set_dev()
call w%set_dev()
gpu_done = .true.
end select
end select
end select
if (.not.gpu_done) then
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
if (z%is_host()) call z%sync()
if (w%is_host()) call w%sync()
call y%axpby(m,a,x,b,info)
call z%axpby(m,c,y,d,info)
call w%axpby(m,e,z,f,info)
end if
end if
end subroutine d_cuda_xyzw
subroutine d_cuda_axpby_a(m,alpha, x, beta, y, info) subroutine d_cuda_axpby_a(m,alpha, x, beta, y, info)
use psi_serial_mod use psi_serial_mod

@ -325,6 +325,18 @@ module psb_d_vectordev_mod
end function abgdxyzMultiVecDeviceDouble end function abgdxyzMultiVecDeviceDouble
end interface end interface
interface xyzwMultiVecDevice
function xyzwMultiVecDeviceDouble(n,a,b,c,d,e,f,deviceVecX,&
& deviceVecY,deviceVecZ,deviceVecW) &
& result(res) bind(c,name='xyzwMultiVecDeviceDouble')
use iso_c_binding
integer(c_int) :: res
integer(c_int), value :: n
real(c_double), value :: a,b,c,d,e,f
type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ, deviceVecW
end function xyzwMultiVecDeviceDouble
end interface
interface axyMultiVecDevice interface axyMultiVecDevice
function axyMultiVecDeviceDouble(n,alpha,deviceVecA,deviceVecB) & function axyMultiVecDeviceDouble(n,alpha,deviceVecA,deviceVecB) &
& result(res) bind(c,name='axyMultiVecDeviceDouble') & result(res) bind(c,name='axyMultiVecDeviceDouble')

@ -914,7 +914,6 @@ contains
end subroutine s_cuda_axpby_v end subroutine s_cuda_axpby_v
subroutine s_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) subroutine s_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod use psi_serial_mod
implicit none implicit none
@ -975,9 +974,70 @@ contains
call z%axpby(m,gamma,y,delta,info) call z%axpby(m,gamma,y,delta,info)
end if end if
end subroutine s_cuda_abgdxyz end subroutine s_cuda_abgdxyz
subroutine s_cuda_xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_s_base_vect_type), intent(inout) :: x
class(psb_s_base_vect_type), intent(inout) :: y
class(psb_s_base_vect_type), intent(inout) :: z
class(psb_s_vect_cuda), intent(inout) :: w
real(psb_spk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nx, ny, nz, nw
logical :: gpu_done
info = psb_success_
gpu_done = .false.
if ((a==szero).or.(b==szero).or. &
& (c==szero).or.(d==szero).or.&
& (e==szero).or.(f==szero)) then
write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero'
else
select type(xx => x)
class is (psb_s_vect_cuda)
select type(yy => y)
class is (psb_s_vect_cuda)
select type(zz => z)
class is (psb_s_vect_cuda)
! Do something different here
if (xx%is_host()) call xx%sync()
if (yy%is_host()) call yy%sync()
if (zz%is_host()) call zz%sync()
if (w%is_host()) call w%sync()
nx = getMultiVecDeviceSize(xx%deviceVect)
ny = getMultiVecDeviceSize(yy%deviceVect)
nz = getMultiVecDeviceSize(zz%deviceVect)
nw = getMultiVecDeviceSize(w%deviceVect)
if ((nx<m).or.(ny<m).or.(nz<m).or.(nw<m)) then
info = psb_err_internal_error_
else
info = xyzwMultiVecDevice(m,a,b,c,d,e,f,&
& xx%deviceVect,yy%deviceVect,zz%deviceVect,w%deviceVect)
end if
call yy%set_dev()
call zz%set_dev()
call w%set_dev()
gpu_done = .true.
end select
end select
end select
if (.not.gpu_done) then
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
if (z%is_host()) call z%sync()
if (w%is_host()) call w%sync()
call y%axpby(m,a,x,b,info)
call z%axpby(m,c,y,d,info)
call w%axpby(m,e,z,f,info)
end if
end if
end subroutine s_cuda_xyzw
subroutine s_cuda_axpby_a(m,alpha, x, beta, y, info) subroutine s_cuda_axpby_a(m,alpha, x, beta, y, info)
use psi_serial_mod use psi_serial_mod

@ -325,6 +325,18 @@ module psb_s_vectordev_mod
end function abgdxyzMultiVecDeviceFloat end function abgdxyzMultiVecDeviceFloat
end interface end interface
interface xyzwMultiVecDevice
function xyzwMultiVecDeviceFloat(n,a,b,c,d,e,f,deviceVecX,&
& deviceVecY,deviceVecZ,deviceVecW) &
& result(res) bind(c,name='xyzwMultiVecDeviceFloat')
use iso_c_binding
integer(c_int) :: res
integer(c_int), value :: n
real(c_float), value :: a,b,c,d,e,f
type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ, deviceVecW
end function xyzwMultiVecDeviceFloat
end interface
interface axyMultiVecDevice interface axyMultiVecDevice
function axyMultiVecDeviceFloat(n,alpha,deviceVecA,deviceVecB) & function axyMultiVecDeviceFloat(n,alpha,deviceVecA,deviceVecB) &
& result(res) bind(c,name='axyMultiVecDeviceFloat') & result(res) bind(c,name='axyMultiVecDeviceFloat')

@ -914,7 +914,6 @@ contains
end subroutine z_cuda_axpby_v end subroutine z_cuda_axpby_v
subroutine z_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) subroutine z_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod use psi_serial_mod
implicit none implicit none
@ -975,9 +974,70 @@ contains
call z%axpby(m,gamma,y,delta,info) call z%axpby(m,gamma,y,delta,info)
end if end if
end subroutine z_cuda_abgdxyz end subroutine z_cuda_abgdxyz
subroutine z_cuda_xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_z_base_vect_type), intent(inout) :: x
class(psb_z_base_vect_type), intent(inout) :: y
class(psb_z_base_vect_type), intent(inout) :: z
class(psb_z_vect_cuda), intent(inout) :: w
complex(psb_dpk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nx, ny, nz, nw
logical :: gpu_done
info = psb_success_
gpu_done = .false.
if ((a==zzero).or.(b==zzero).or. &
& (c==zzero).or.(d==zzero).or.&
& (e==zzero).or.(f==zzero)) then
write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero'
else
select type(xx => x)
class is (psb_z_vect_cuda)
select type(yy => y)
class is (psb_z_vect_cuda)
select type(zz => z)
class is (psb_z_vect_cuda)
! Do something different here
if (xx%is_host()) call xx%sync()
if (yy%is_host()) call yy%sync()
if (zz%is_host()) call zz%sync()
if (w%is_host()) call w%sync()
nx = getMultiVecDeviceSize(xx%deviceVect)
ny = getMultiVecDeviceSize(yy%deviceVect)
nz = getMultiVecDeviceSize(zz%deviceVect)
nw = getMultiVecDeviceSize(w%deviceVect)
if ((nx<m).or.(ny<m).or.(nz<m).or.(nw<m)) then
info = psb_err_internal_error_
else
info = xyzwMultiVecDevice(m,a,b,c,d,e,f,&
& xx%deviceVect,yy%deviceVect,zz%deviceVect,w%deviceVect)
end if
call yy%set_dev()
call zz%set_dev()
call w%set_dev()
gpu_done = .true.
end select
end select
end select
if (.not.gpu_done) then
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
if (z%is_host()) call z%sync()
if (w%is_host()) call w%sync()
call y%axpby(m,a,x,b,info)
call z%axpby(m,c,y,d,info)
call w%axpby(m,e,z,f,info)
end if
end if
end subroutine z_cuda_xyzw
subroutine z_cuda_axpby_a(m,alpha, x, beta, y, info) subroutine z_cuda_axpby_a(m,alpha, x, beta, y, info)
use psi_serial_mod use psi_serial_mod

@ -325,6 +325,18 @@ module psb_z_vectordev_mod
end function abgdxyzMultiVecDeviceDoubleComplex end function abgdxyzMultiVecDeviceDoubleComplex
end interface end interface
interface xyzwMultiVecDevice
function xyzwMultiVecDeviceDoubleComplex(n,a,b,c,d,e,f,deviceVecX,&
& deviceVecY,deviceVecZ,deviceVecW) &
& result(res) bind(c,name='xyzwMultiVecDeviceDoubleComplex')
use iso_c_binding
integer(c_int) :: res
integer(c_int), value :: n
complex(c_double_complex), value :: a,b,c,d,e,f
type(c_ptr), value :: deviceVecX, deviceVecY, deviceVecZ, deviceVecW
end function xyzwMultiVecDeviceDoubleComplex
end interface
interface axyMultiVecDevice interface axyMultiVecDevice
function axyMultiVecDeviceDoubleComplex(n,alpha,deviceVecA,deviceVecB) & function axyMultiVecDeviceDoubleComplex(n,alpha,deviceVecA,deviceVecB) &
& result(res) bind(c,name='axyMultiVecDeviceDoubleComplex') & result(res) bind(c,name='axyMultiVecDeviceDoubleComplex')

@ -19,7 +19,8 @@ OBJS=cabs.o camax.o casum.o caxpby.o caxy.o cdot.o cgath.o \
hdia_cspmv.o hdia_dspmv.o hdia_sspmv.o hdia_zspmv.o hell_cspmv.o hell_dspmv.o \ hdia_cspmv.o hdia_dspmv.o hdia_sspmv.o hdia_zspmv.o hell_cspmv.o hell_dspmv.o \
hell_sspmv.o hell_zspmv.o igath.o iscat.o isetscal.o sabs.o samax.o sasum.o \ hell_sspmv.o hell_zspmv.o igath.o iscat.o isetscal.o sabs.o samax.o sasum.o \
saxpby.o saxy.o sdot.o sgath.o snrm2.o sscal.o sscat.o ssetscal.o zabs.o zamax.o sabgdxyz.o\ saxpby.o saxy.o sdot.o sgath.o snrm2.o sscal.o sscat.o ssetscal.o zabs.o zamax.o sabgdxyz.o\
zasum.o zaxpby.o zaxy.o zdot.o zgath.o znrm2.o zscal.o zscat.o zsetscal.o zabgdxyz.o zasum.o zaxpby.o zaxy.o zdot.o zgath.o znrm2.o zscal.o zscat.o zsetscal.o zabgdxyz.o \
sxyzw.o cxyzw.o dxyzw.o zxyzw.o
objs: $(OBJS) objs: $(OBJS)
lib: objs lib: objs

@ -192,6 +192,17 @@ void spgpuSabgdxyz(spgpuHandle_t handle,
__device float *y, __device float *y,
__device float *z) __device float *z)
; ;
void spgpuSxyzw(spgpuHandle_t handle,
int n,
float a, float b,
float c, float d,
float e, float f,
__device float* x,
__device float *y,
__device float *z,
__device float *w)
;
/** /**
* \fn void spgpuSmaxpby(spgpuHandle_t handle, __device float *z, int n, float beta, __device float *y, float alpha, __device float* x, int count, int pitch) * \fn void spgpuSmaxpby(spgpuHandle_t handle, __device float *z, int n, float beta, __device float *y, float alpha, __device float* x, int count, int pitch)
@ -487,6 +498,16 @@ void spgpuDabgdxyz(spgpuHandle_t handle,
__device double *z) __device double *z)
; ;
void spgpuDxyzw(spgpuHandle_t handle,
int n,
double a, double b,
double c, double d,
double e, double f,
__device double* x,
__device double *y,
__device double *z,
__device double *w)
;
/** /**
* \fn void spgpuDmaxpby(spgpuHandle_t handle, __device double *z, int n, double beta, __device double *y, double alpha, __device double* x, int count, int pitch) * \fn void spgpuDmaxpby(spgpuHandle_t handle, __device double *z, int n, double beta, __device double *y, double alpha, __device double* x, int count, int pitch)
@ -778,6 +799,18 @@ void spgpuCabgdxyz(spgpuHandle_t handle,
__device cuFloatComplex *y, __device cuFloatComplex *y,
__device cuFloatComplex *z) __device cuFloatComplex *z)
; ;
void spgpuCxyzw(spgpuHandle_t handle,
int n,
cuFloatComplex a, cuFloatComplex b,
cuFloatComplex c, cuFloatComplex d,
cuFloatComplex e, cuFloatComplex f,
__device cuFloatComplex* x,
__device cuFloatComplex *y,
__device cuFloatComplex *z,
__device cuFloatComplex *w)
;
/** /**
* \fn void spgpuCmaxpby(spgpuHandle_t handle, __device cuFloatComplex *z, int n, cuFloatComplex beta, __device cuFloatComplex *y, cuFloatComplex alpha, __device cuFloatComplex* x, int count, int pitch) * \fn void spgpuCmaxpby(spgpuHandle_t handle, __device cuFloatComplex *z, int n, cuFloatComplex beta, __device cuFloatComplex *y, cuFloatComplex alpha, __device cuFloatComplex* x, int count, int pitch)
@ -1069,6 +1102,19 @@ void spgpuZabgdxyz(spgpuHandle_t handle,
__device cuDoubleComplex *y, __device cuDoubleComplex *y,
__device cuDoubleComplex *z) __device cuDoubleComplex *z)
; ;
void spgpuZxyzw(spgpuHandle_t handle,
int n,
cuDoubleComplex a, cuDoubleComplex b,
cuDoubleComplex c, cuDoubleComplex d,
cuDoubleComplex e, cuDoubleComplex f,
__device cuDoubleComplex* x,
__device cuDoubleComplex *y,
__device cuDoubleComplex *z,
__device cuDoubleComplex *w)
;
/** /**
* \fn void spgpuZmaxpby(spgpuHandle_t handle, __device cuDoubleComplex *z, int n, cuDoubleComplex beta, __device cuDoubleComplex *y, cuDoubleComplex alpha, __device cuDoubleComplex* x, int count, int pitch) * \fn void spgpuZmaxpby(spgpuHandle_t handle, __device cuDoubleComplex *z, int n, cuDoubleComplex beta, __device cuDoubleComplex *y, cuDoubleComplex alpha, __device cuDoubleComplex* x, int count, int pitch)

@ -258,6 +258,27 @@ int abgdxyzMultiVecDeviceFloat(int n,float alpha,float beta, float gamma, float
return(i); return(i);
} }
int xyzwMultiVecDeviceFloat(int n,float a,float b, float c, float d, float e, float f,
void* devMultiVecX, void* devMultiVecY,
void* devMultiVecZ, void* devMultiVecW)
{ int j=0, i=0;
int pitch = 0;
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY;
struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ;
struct MultiVectDevice *devVecW = (struct MultiVectDevice *) devMultiVecW;
spgpuHandle_t handle=psb_cudaGetHandle();
pitch = devVecY->pitch_;
if ((n > devVecY->size_) || (n>devVecX->size_ ))
return SPGPU_UNSUPPORTED;
spgpuSxyzw(handle,n, a,b,c,d,e,f,
(float*)devVecX->v_,(float*) devVecY->v_,
(float*) devVecZ->v_,(float*) devVecW->v_);
return(i);
}
int axyMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, void *deviceVecB) int axyMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, void *deviceVecB)
{ int i = 0; { int i = 0;
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA;

@ -69,6 +69,9 @@ int dotMultiVecDeviceFloat(float* y_res, int n, void* devVecA, void* devVecB);
int axpbyMultiVecDeviceFloat(int n, float alpha, void* devVecX, float beta, void* devVecY); int axpbyMultiVecDeviceFloat(int n, float alpha, void* devVecX, float beta, void* devVecY);
int abgdxyzMultiVecDeviceFloat(int n,float alpha,float beta, float gamma, float delta, int abgdxyzMultiVecDeviceFloat(int n,float alpha,float beta, float gamma, float delta,
void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ);
int xyzwMultiVecDeviceFloat(int n,float a,float b, float c, float d, float e, float f,
void* devMultiVecX, void* devMultiVecY,
void* devMultiVecZ, void* devMultiVecW);
int axyMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, void *deviceVecB); int axyMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, void *deviceVecB);
int axybzMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, int axybzMultiVecDeviceFloat(int n, float alpha, void *deviceVecA,
void *deviceVecB, float beta, void *deviceVecZ); void *deviceVecB, float beta, void *deviceVecZ);

@ -251,7 +251,29 @@ int abgdxyzMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha,
(cuDoubleComplex *)devVecX->v_,(cuDoubleComplex *) devVecY->v_,(cuDoubleComplex *) devVecZ->v_); (cuDoubleComplex *)devVecX->v_,(cuDoubleComplex *) devVecY->v_,(cuDoubleComplex *) devVecZ->v_);
return(i); return(i);
} }
int xyzwMultiVecDeviceDoubleComplex(int n,cuDoubleComplex a, cuDoubleComplex b,
cuDoubleComplex c, cuDoubleComplex d,
cuDoubleComplex e, cuDoubleComplex f,
void* devMultiVecX, void* devMultiVecY,
void* devMultiVecZ, void* devMultiVecW)
{ int j=0, i=0;
int pitch = 0;
struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX;
struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY;
struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ;
struct MultiVectDevice *devVecW = (struct MultiVectDevice *) devMultiVecW;
spgpuHandle_t handle=psb_cudaGetHandle();
pitch = devVecY->pitch_;
if ((n > devVecY->size_) || (n>devVecX->size_ ))
return SPGPU_UNSUPPORTED;
spgpuZxyzw(handle,n, a,b,c,d,e,f,
(cuDoubleComplex *)devVecX->v_,(cuDoubleComplex *) devVecY->v_,
(cuDoubleComplex *) devVecZ->v_,(cuDoubleComplex *) devVecW->v_);
return(i);
}
int axpbyMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, void* devMultiVecX, int axpbyMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, void* devMultiVecX,
cuDoubleComplex beta, void* devMultiVecY) cuDoubleComplex beta, void* devMultiVecY)
{ int j=0, i=0; { int j=0, i=0;

@ -80,6 +80,11 @@ int axpbyMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void* devVecX
int abgdxyzMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, int abgdxyzMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha,
cuDoubleComplex beta, cuDoubleComplex gamma, cuDoubleComplex delta, cuDoubleComplex beta, cuDoubleComplex gamma, cuDoubleComplex delta,
void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ);
int xyzwMultiVecDeviceDoubleComplex(int n,cuDoubleComplex a, cuDoubleComplex b,
cuDoubleComplex c, cuDoubleComplex d,
cuDoubleComplex e, cuDoubleComplex f,
void* devMultiVecX, void* devMultiVecY,
void* devMultiVecZ, void* devMultiVecW);
int axyMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, int axyMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha,
void *deviceVecA, void *deviceVecB); void *deviceVecA, void *deviceVecB);
int axybzMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVecA, int axybzMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVecA,

Loading…
Cancel
Save