diff --git a/cuda/psb_c_cuda_vect_mod.F90 b/cuda/psb_c_cuda_vect_mod.F90 index fca1c616..9b3b6fb1 100644 --- a/cuda/psb_c_cuda_vect_mod.F90 +++ b/cuda/psb_c_cuda_vect_mod.F90 @@ -922,13 +922,57 @@ contains class(psb_c_vect_cuda), intent(inout) :: z complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny, nz + logical :: gpu_done + + info = psb_success_ + + if (.true.) then + gpu_done = .false. + select type(xx => x) + class is (psb_c_vect_cuda) + select type(yy => y) + class is (psb_c_vect_cuda) + select type(zz => z) + class is (psb_c_vect_cuda) + ! Do something different here + if ((beta /= czero).and.yy%is_host())& + & call yy%sync() + if ((delta /= czero).and.zz%is_host())& + & call zz%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + if ((nx x) !!$ type is (psb_c_base_multivect_type) -!!$ if ((beta /= dzero).and.(y%is_dev()))& +!!$ if ((beta /= czero).and.(y%is_dev()))& !!$ & call y%sync() !!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) !!$ call y%set_host() !!$ type is (psb_c_multivect_cuda) !!$ ! Do something different here -!!$ if ((beta /= dzero).and.y%is_host())& +!!$ if ((beta /= czero).and.y%is_host())& !!$ & call y%sync() !!$ if (xx%is_host()) call xx%sync() !!$ nx = getMultiVecDeviceSize(xx%deviceVect) @@ -1817,7 +1861,7 @@ contains implicit none class(psb_c_multivect_cuda), intent(inout) :: x - if (allocated(x%v)) x%v=dzero + if (allocated(x%v)) x%v=czero call x%set_host() end subroutine c_cuda_multi_zero diff --git a/cuda/psb_d_cuda_vect_mod.F90 b/cuda/psb_d_cuda_vect_mod.F90 index 2220b26c..c98d66f6 100644 --- a/cuda/psb_d_cuda_vect_mod.F90 +++ b/cuda/psb_d_cuda_vect_mod.F90 @@ -922,13 +922,57 @@ contains class(psb_d_vect_cuda), intent(inout) :: z real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny, nz + logical :: gpu_done + + info = psb_success_ + + if (.true.) then + gpu_done = .false. + select type(xx => x) + class is (psb_d_vect_cuda) + select type(yy => y) + class is (psb_d_vect_cuda) + select type(zz => z) + class is (psb_d_vect_cuda) + ! Do something different here + if ((beta /= dzero).and.yy%is_host())& + & call yy%sync() + if ((delta /= dzero).and.zz%is_host())& + & call zz%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + if ((nx x) !!$ type is (psb_i_base_multivect_type) -!!$ if ((beta /= dzero).and.(y%is_dev()))& +!!$ if ((beta /= izero).and.(y%is_dev()))& !!$ & call y%sync() !!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) !!$ call y%set_host() !!$ type is (psb_i_multivect_cuda) !!$ ! Do something different here -!!$ if ((beta /= dzero).and.y%is_host())& +!!$ if ((beta /= izero).and.y%is_host())& !!$ & call y%sync() !!$ if (xx%is_host()) call xx%sync() !!$ nx = getMultiVecDeviceSize(xx%deviceVect) @@ -1477,7 +1477,7 @@ contains implicit none class(psb_i_multivect_cuda), intent(inout) :: x - if (allocated(x%v)) x%v=dzero + if (allocated(x%v)) x%v=izero call x%set_host() end subroutine i_cuda_multi_zero diff --git a/cuda/psb_s_cuda_vect_mod.F90 b/cuda/psb_s_cuda_vect_mod.F90 index 80c60bc3..55ed4a7d 100644 --- a/cuda/psb_s_cuda_vect_mod.F90 +++ b/cuda/psb_s_cuda_vect_mod.F90 @@ -922,13 +922,57 @@ contains class(psb_s_vect_cuda), intent(inout) :: z real(psb_spk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny, nz + logical :: gpu_done + + info = psb_success_ + + if (.true.) then + gpu_done = .false. + select type(xx => x) + class is (psb_s_vect_cuda) + select type(yy => y) + class is (psb_s_vect_cuda) + select type(zz => z) + class is (psb_s_vect_cuda) + ! Do something different here + if ((beta /= szero).and.yy%is_host())& + & call yy%sync() + if ((delta /= szero).and.zz%is_host())& + & call zz%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + if ((nx x) !!$ type is (psb_s_base_multivect_type) -!!$ if ((beta /= dzero).and.(y%is_dev()))& +!!$ if ((beta /= szero).and.(y%is_dev()))& !!$ & call y%sync() !!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) !!$ call y%set_host() !!$ type is (psb_s_multivect_cuda) !!$ ! Do something different here -!!$ if ((beta /= dzero).and.y%is_host())& +!!$ if ((beta /= szero).and.y%is_host())& !!$ & call y%sync() !!$ if (xx%is_host()) call xx%sync() !!$ nx = getMultiVecDeviceSize(xx%deviceVect) @@ -1817,7 +1861,7 @@ contains implicit none class(psb_s_multivect_cuda), intent(inout) :: x - if (allocated(x%v)) x%v=dzero + if (allocated(x%v)) x%v=szero call x%set_host() end subroutine s_cuda_multi_zero diff --git a/cuda/psb_z_cuda_vect_mod.F90 b/cuda/psb_z_cuda_vect_mod.F90 index 9f801742..2114723b 100644 --- a/cuda/psb_z_cuda_vect_mod.F90 +++ b/cuda/psb_z_cuda_vect_mod.F90 @@ -922,13 +922,57 @@ contains class(psb_z_vect_cuda), intent(inout) :: z complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny, nz + logical :: gpu_done + + info = psb_success_ + + if (.true.) then + gpu_done = .false. + select type(xx => x) + class is (psb_z_vect_cuda) + select type(yy => y) + class is (psb_z_vect_cuda) + select type(zz => z) + class is (psb_z_vect_cuda) + ! Do something different here + if ((beta /= zzero).and.yy%is_host())& + & call yy%sync() + if ((delta /= zzero).and.zz%is_host())& + & call zz%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + if ((nx x) !!$ type is (psb_z_base_multivect_type) -!!$ if ((beta /= dzero).and.(y%is_dev()))& +!!$ if ((beta /= zzero).and.(y%is_dev()))& !!$ & call y%sync() !!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) !!$ call y%set_host() !!$ type is (psb_z_multivect_cuda) !!$ ! Do something different here -!!$ if ((beta /= dzero).and.y%is_host())& +!!$ if ((beta /= zzero).and.y%is_host())& !!$ & call y%sync() !!$ if (xx%is_host()) call xx%sync() !!$ nx = getMultiVecDeviceSize(xx%deviceVect) @@ -1817,7 +1861,7 @@ contains implicit none class(psb_z_multivect_cuda), intent(inout) :: x - if (allocated(x%v)) x%v=dzero + if (allocated(x%v)) x%v=zzero call x%set_host() end subroutine z_cuda_multi_zero