Track CUDA allocation

repack-track
sfilippone 2 years ago
parent e0a4d362fa
commit ef82b975e3

@ -37,14 +37,14 @@ static int hasUVA=-1;
static struct cudaDeviceProp *prop=NULL;
static spgpuHandle_t psb_cuda_handle = NULL;
static cublasHandle_t psb_cublas_handle = NULL;
#if defined(TRACK_CUDA_MALLOC)
#if 0&& defined(TRACK_CUDA_MALLOC)
static long long total_cuda_mem = 0;
#endif
int allocRemoteBuffer(void** buffer, int count)
{
cudaError_t err = cudaMalloc(buffer, count);
#if defined(TRACK_CUDA_MALLOC)
#if 0&& defined(TRACK_CUDA_MALLOC)
total_cuda_mem += count;
fprintf(stderr,"Tracking CUDA allocRemoteBuffer for %ld bytes total %ld address %p\n",
count, total_cuda_mem, *buffer);
@ -205,6 +205,21 @@ int readRemoteBuffer(void* hostDest, void* buffer, int count)
return SPGPU_UNSPECIFIED;
}
}
#if 0&& defined(TRACK_CUDA_MALLOC)
int freeAndTrackRemoteBuffer(void* buffer,int size)
{
cudaError_t err = cudaFree(buffer);
total_cuda_mem -= size;
fprintf(stderr,"Tracking CUDA free for %ld bytes total %ld address %p\n",
size, total_cuda_mem, buffer);
if (err == cudaSuccess)
return SPGPU_SUCCESS;
else {
fprintf(stderr,"CUDA Error freeRemoteBuffer: %s %p\n", cudaGetErrorString(err),buffer);
return SPGPU_UNSPECIFIED;
}
}
#endif
int freeRemoteBuffer(void* buffer)
{

@ -51,6 +51,9 @@ int unregisterMappedMemory(void *buffer);
int writeRemoteBuffer(void* hostSrc, void* buffer, int count);
int readRemoteBuffer(void* hostDest, void* buffer, int count);
int freeRemoteBuffer(void* buffer);
#if 0&&defined(TRACK_CUDA_MALLOC)
int freeAndTrackRemoteBuffer(void* buffer,int size);
#endif
int gpuInit(int dev);
int getDeviceCount();
int getDevice();

@ -72,7 +72,7 @@ int allocHllDevice(void ** remoteMatrix, HllDeviceParams* params)
HllDevice *tmp = (HllDevice *)malloc(sizeof(HllDevice));
int ret=SPGPU_SUCCESS;
*remoteMatrix = (void *)tmp;
fprintf(stderr,"Allocated HllDevice %p\n",tmp);
tmp->hackSize = params->hackSize;
tmp->allocsize = params->allocsize;
@ -131,13 +131,16 @@ int allocHllDevice(void ** remoteMatrix, HllDeviceParams* params)
void freeHllDevice(void* remoteMatrix)
{
HllDevice *devMat = (HllDevice *) remoteMatrix;
//fprintf(stderr,"freeHllDevice\n");
fprintf(stderr,"freeHllDevice: %p \n",devMat);
if (devMat != NULL) {
fprintf(stderr,"freeHllDevice: doing free(s) %p\n",devMat);
freeRemoteBuffer(devMat->rS);
freeRemoteBuffer(devMat->diag);
freeRemoteBuffer(devMat->rP);
freeRemoteBuffer(devMat->cM);
free(remoteMatrix);
} else {
fprintf(stderr,"Just called FreeHllDevice on a NULL pointer!\n");
}
}

@ -73,12 +73,14 @@ subroutine psb_c_cuda_cp_hlg_from_coo(a,b,info)
if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1))
if (c_associated(a%deviceMat)) then
call trackCudafree(' c_hlg ',a%sizeof())
call freeHllDevice(a%deviceMat)
endif
info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1)
if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,&
& a%irn,a%hkoffs,idisp,b%ja, b%val, a%deviceMat)
call a%set_dev()
call trackCudaAlloc(' c_hlg ',a%sizeof())
else
! This is to guarantee tmp%is_by_rows()
call b%cp_to_coo(tmp,info)
@ -95,7 +97,8 @@ subroutine psb_c_cuda_cp_hlg_from_coo(a,b,info)
end if
if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1))
if (c_associated(a%deviceMat)) then
if (c_associated(a%deviceMat)) then
call trackCudaFree(' c_hlg ',a%sizeof())
call freeHllDevice(a%deviceMat)
endif
info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1)
@ -104,6 +107,7 @@ subroutine psb_c_cuda_cp_hlg_from_coo(a,b,info)
call tmp%free()
call a%set_dev()
call trackCudaAlloc(' c_hlg ',a%sizeof())
end if
if (info /= 0) goto 9999

@ -33,6 +33,7 @@
subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm)
use psb_base_mod
use psb_cuda_env_mod
use cusparse_mod
use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_to_gpu
implicit none
@ -52,7 +53,8 @@ subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm)
m = a%get_nrows()
n = a%get_ncols()
nz = a%get_nzeros()
if (c_associated(a%deviceMat%Mat)) then
if (c_associated(a%deviceMat%Mat)) then
call trackCudaFree(' c_csrg ',a%sizeof())
info = CSRGDeviceFree(a%deviceMat)
end if
#if (CUDA_SHORT_VERSION <= 10 )
@ -369,6 +371,7 @@ subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm)
endif
#endif
call trackCudaAlloc(' c_csrg ',a%sizeof())
call a%set_sync()
if (info /= 0) then

@ -33,6 +33,7 @@
subroutine psb_c_cuda_diag_to_gpu(a,info,nzrm)
use psb_base_mod
use psb_cuda_env_mod
use diagdev_mod
use psb_vectordev_mod
use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_to_gpu
@ -55,12 +56,14 @@ subroutine psb_c_cuda_diag_to_gpu(a,info,nzrm)
!allocsize = a%get_size()
!write(*,*) 'Create the DIAG matrix'
gpu_parms = FgetDiagDeviceParams(n,c,d,spgpu_type_complex_float)
if (c_associated(a%deviceMat)) then
call freeDiagDevice(a%deviceMat)
if (c_associated(a%deviceMat)) then
call trackCudaFree(' c_diag ',a%sizeof())
call freeDiagDevice(a%deviceMat)
endif
info = FallocDiagDevice(a%deviceMat,n,c,d,spgpu_type_complex_float)
if (info == 0) info = &
& writeDiagDevice(a%deviceMat,a%data,a%offset,n)
call trackCudaAlloc(' c_diag ',a%sizeof())
! if (info /= 0) goto 9999
end subroutine psb_c_cuda_diag_to_gpu

@ -32,6 +32,7 @@
subroutine psb_c_cuda_elg_to_gpu(a,info,nzrm)
use psb_base_mod
use psb_cuda_env_mod
use elldev_mod
use psb_vectordev_mod
use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_to_gpu
@ -64,7 +65,8 @@ subroutine psb_c_cuda_elg_to_gpu(a,info,nzrm)
end if
if ((pitch /= gpu_parms%pitch).or.(maxrowsize /= gpu_parms%maxRowSize)) then
if (c_associated(a%deviceMat)) then
if (c_associated(a%deviceMat)) then
call trackCudaAlloc(' c_elg ',a%sizeof())
call freeEllDevice(a%deviceMat)
endif
info = FallocEllDevice(a%deviceMat,m,nzm,nzt,n,spgpu_type_complex_float,1)
@ -79,6 +81,7 @@ subroutine psb_c_cuda_elg_to_gpu(a,info,nzrm)
end if
if (info == 0) info = &
& writeEllDevice(a%deviceMat,a%val,a%ja,size(a%ja,1),a%irn,a%idiag)
call trackCudaAlloc(' c_elg ',a%sizeof())
call a%set_sync()
end subroutine psb_c_cuda_elg_to_gpu

@ -32,6 +32,7 @@
subroutine psb_c_cuda_hdiag_to_gpu(a,info)
use psb_base_mod
use psb_cuda_env_mod
use hdiagdev_mod
use psb_vectordev_mod
use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_hdiag_to_gpu
@ -65,12 +66,13 @@ subroutine psb_c_cuda_hdiag_to_gpu(a,info)
end if
if (c_associated(a%deviceMat)) then
call freeHdiagDevice(a%deviceMat)
call trackCudaFree(' c_hdiag ',a%sizeof())
call freeHdiagDevice(a%deviceMat)
endif
info = FAllocHdiagDevice(a%deviceMat,nr,nc,&
& allocheight,hacksize,hackCount,spgpu_type_double)
if (info == 0) info = &
& writeHdiagDevice(a%deviceMat,a%val,a%diaOffsets,a%hackOffsets)
call trackCudaAlloc(' c_hdiag ',a%sizeof())
end subroutine psb_c_cuda_hdiag_to_gpu

@ -32,6 +32,7 @@
subroutine psb_c_cuda_hlg_to_gpu(a,info,nzrm)
use psb_base_mod
use psb_cuda_env_mod
use hlldev_mod
use psb_vectordev_mod
use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_to_gpu
@ -51,11 +52,13 @@ subroutine psb_c_cuda_hlg_to_gpu(a,info,nzrm)
allocsize = a%get_size()
nza = a%get_nzeros()
if (c_associated(a%deviceMat)) then
call freehllDevice(a%deviceMat)
call trackCudaFree(' to_gpu c_hlg ',a%sizeof())
call freehllDevice(a%deviceMat)
endif
info = FallochllDevice(a%deviceMat,a%hksz,n,nza,allocsize,spgpu_type_complex_float,1)
if (info == 0) info = &
& writehllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag)
call trackCudaAlloc(' c_hlg ',a%sizeof())
! if (info /= 0) goto 9999
end subroutine psb_c_cuda_hlg_to_gpu

@ -33,6 +33,7 @@
subroutine psb_c_cuda_hybg_to_gpu(a,info,nzrm)
use psb_base_mod
use psb_cuda_env_mod
use cusparse_mod
use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_to_gpu
implicit none
@ -53,6 +54,7 @@ subroutine psb_c_cuda_hybg_to_gpu(a,info,nzrm)
n = a%get_ncols()
nz = a%get_nzeros()
if (c_associated(a%deviceMat%Mat)) then
call trackCudaFree(' c_hybg ',a%sizeof())
info = HYBGDeviceFree(a%deviceMat)
end if
if (a%is_unit()) then
@ -139,6 +141,7 @@ subroutine psb_c_cuda_hybg_to_gpu(a,info,nzrm)
info = HYBGDeviceHybsmAnalysis(a%deviceMat)
end if
call trackCudaAlloc(' c_hybg ',a%sizeof())
if (info /= 0) then
write(0,*) 'Error in HYBG_TO_GPU ',info

@ -73,12 +73,14 @@ subroutine psb_d_cuda_cp_hlg_from_coo(a,b,info)
if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1))
if (c_associated(a%deviceMat)) then
call trackCudafree(' d_hlg ',a%sizeof())
call freeHllDevice(a%deviceMat)
endif
info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1)
if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,&
& a%irn,a%hkoffs,idisp,b%ja, b%val, a%deviceMat)
call a%set_dev()
call trackCudaAlloc(' d_hlg ',a%sizeof())
else
! This is to guarantee tmp%is_by_rows()
call b%cp_to_coo(tmp,info)
@ -95,7 +97,8 @@ subroutine psb_d_cuda_cp_hlg_from_coo(a,b,info)
end if
if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1))
if (c_associated(a%deviceMat)) then
if (c_associated(a%deviceMat)) then
call trackCudaFree(' d_hlg ',a%sizeof())
call freeHllDevice(a%deviceMat)
endif
info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1)
@ -104,6 +107,7 @@ subroutine psb_d_cuda_cp_hlg_from_coo(a,b,info)
call tmp%free()
call a%set_dev()
call trackCudaAlloc(' d_hlg ',a%sizeof())
end if
if (info /= 0) goto 9999

@ -33,6 +33,7 @@
subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm)
use psb_base_mod
use psb_cuda_env_mod
use cusparse_mod
use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_to_gpu
implicit none
@ -52,7 +53,8 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm)
m = a%get_nrows()
n = a%get_ncols()
nz = a%get_nzeros()
if (c_associated(a%deviceMat%Mat)) then
if (c_associated(a%deviceMat%Mat)) then
call trackCudaFree(' d_csrg ',a%sizeof())
info = CSRGDeviceFree(a%deviceMat)
end if
#if (CUDA_SHORT_VERSION <= 10 )
@ -369,6 +371,7 @@ subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm)
endif
#endif
call trackCudaAlloc(' d_csrg ',a%sizeof())
call a%set_sync()
if (info /= 0) then

@ -33,6 +33,7 @@
subroutine psb_d_cuda_diag_to_gpu(a,info,nzrm)
use psb_base_mod
use psb_cuda_env_mod
use diagdev_mod
use psb_vectordev_mod
use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_diag_to_gpu
@ -55,12 +56,14 @@ subroutine psb_d_cuda_diag_to_gpu(a,info,nzrm)
!allocsize = a%get_size()
!write(*,*) 'Create the DIAG matrix'
gpu_parms = FgetDiagDeviceParams(n,c,d,spgpu_type_double)
if (c_associated(a%deviceMat)) then
call freeDiagDevice(a%deviceMat)
if (c_associated(a%deviceMat)) then
call trackCudaFree(' d_diag ',a%sizeof())
call freeDiagDevice(a%deviceMat)
endif
info = FallocDiagDevice(a%deviceMat,n,c,d,spgpu_type_double)
if (info == 0) info = &
& writeDiagDevice(a%deviceMat,a%data,a%offset,n)
call trackCudaAlloc(' d_diag ',a%sizeof())
! if (info /= 0) goto 9999
end subroutine psb_d_cuda_diag_to_gpu

@ -32,6 +32,7 @@
subroutine psb_d_cuda_elg_to_gpu(a,info,nzrm)
use psb_base_mod
use psb_cuda_env_mod
use elldev_mod
use psb_vectordev_mod
use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_to_gpu
@ -64,7 +65,8 @@ subroutine psb_d_cuda_elg_to_gpu(a,info,nzrm)
end if
if ((pitch /= gpu_parms%pitch).or.(maxrowsize /= gpu_parms%maxRowSize)) then
if (c_associated(a%deviceMat)) then
if (c_associated(a%deviceMat)) then
call trackCudaAlloc(' d_elg ',a%sizeof())
call freeEllDevice(a%deviceMat)
endif
info = FallocEllDevice(a%deviceMat,m,nzm,nzt,n,spgpu_type_double,1)
@ -79,6 +81,7 @@ subroutine psb_d_cuda_elg_to_gpu(a,info,nzrm)
end if
if (info == 0) info = &
& writeEllDevice(a%deviceMat,a%val,a%ja,size(a%ja,1),a%irn,a%idiag)
call trackCudaAlloc(' d_elg ',a%sizeof())
call a%set_sync()
end subroutine psb_d_cuda_elg_to_gpu

@ -32,6 +32,7 @@
subroutine psb_d_cuda_hdiag_to_gpu(a,info)
use psb_base_mod
use psb_cuda_env_mod
use hdiagdev_mod
use psb_vectordev_mod
use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_hdiag_to_gpu
@ -65,12 +66,13 @@ subroutine psb_d_cuda_hdiag_to_gpu(a,info)
end if
if (c_associated(a%deviceMat)) then
call freeHdiagDevice(a%deviceMat)
call trackCudaFree(' d_hdiag ',a%sizeof())
call freeHdiagDevice(a%deviceMat)
endif
info = FAllocHdiagDevice(a%deviceMat,nr,nc,&
& allocheight,hacksize,hackCount,spgpu_type_double)
if (info == 0) info = &
& writeHdiagDevice(a%deviceMat,a%val,a%diaOffsets,a%hackOffsets)
call trackCudaAlloc(' d_hdiag ',a%sizeof())
end subroutine psb_d_cuda_hdiag_to_gpu

@ -32,6 +32,7 @@
subroutine psb_d_cuda_hlg_to_gpu(a,info,nzrm)
use psb_base_mod
use psb_cuda_env_mod
use hlldev_mod
use psb_vectordev_mod
use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_to_gpu
@ -51,11 +52,13 @@ subroutine psb_d_cuda_hlg_to_gpu(a,info,nzrm)
allocsize = a%get_size()
nza = a%get_nzeros()
if (c_associated(a%deviceMat)) then
call freehllDevice(a%deviceMat)
call trackCudaFree(' to_gpu d_hlg ',a%sizeof())
call freehllDevice(a%deviceMat)
endif
info = FallochllDevice(a%deviceMat,a%hksz,n,nza,allocsize,spgpu_type_double,1)
if (info == 0) info = &
& writehllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag)
call trackCudaAlloc(' d_hlg ',a%sizeof())
! if (info /= 0) goto 9999
end subroutine psb_d_cuda_hlg_to_gpu

@ -33,6 +33,7 @@
subroutine psb_d_cuda_hybg_to_gpu(a,info,nzrm)
use psb_base_mod
use psb_cuda_env_mod
use cusparse_mod
use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_to_gpu
implicit none
@ -53,6 +54,7 @@ subroutine psb_d_cuda_hybg_to_gpu(a,info,nzrm)
n = a%get_ncols()
nz = a%get_nzeros()
if (c_associated(a%deviceMat%Mat)) then
call trackCudaFree(' d_hybg ',a%sizeof())
info = HYBGDeviceFree(a%deviceMat)
end if
if (a%is_unit()) then
@ -139,6 +141,7 @@ subroutine psb_d_cuda_hybg_to_gpu(a,info,nzrm)
info = HYBGDeviceHybsmAnalysis(a%deviceMat)
end if
call trackCudaAlloc(' d_hybg ',a%sizeof())
if (info /= 0) then
write(0,*) 'Error in HYBG_TO_GPU ',info

@ -73,12 +73,14 @@ subroutine psb_s_cuda_cp_hlg_from_coo(a,b,info)
if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1))
if (c_associated(a%deviceMat)) then
call trackCudafree(' s_hlg ',a%sizeof())
call freeHllDevice(a%deviceMat)
endif
info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1)
if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,&
& a%irn,a%hkoffs,idisp,b%ja, b%val, a%deviceMat)
call a%set_dev()
call trackCudaAlloc(' s_hlg ',a%sizeof())
else
! This is to guarantee tmp%is_by_rows()
call b%cp_to_coo(tmp,info)
@ -95,7 +97,8 @@ subroutine psb_s_cuda_cp_hlg_from_coo(a,b,info)
end if
if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1))
if (c_associated(a%deviceMat)) then
if (c_associated(a%deviceMat)) then
call trackCudaFree(' s_hlg ',a%sizeof())
call freeHllDevice(a%deviceMat)
endif
info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1)
@ -104,6 +107,7 @@ subroutine psb_s_cuda_cp_hlg_from_coo(a,b,info)
call tmp%free()
call a%set_dev()
call trackCudaAlloc(' s_hlg ',a%sizeof())
end if
if (info /= 0) goto 9999

@ -33,6 +33,7 @@
subroutine psb_s_cuda_csrg_to_gpu(a,info,nzrm)
use psb_base_mod
use psb_cuda_env_mod
use cusparse_mod
use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_to_gpu
implicit none
@ -52,7 +53,8 @@ subroutine psb_s_cuda_csrg_to_gpu(a,info,nzrm)
m = a%get_nrows()
n = a%get_ncols()
nz = a%get_nzeros()
if (c_associated(a%deviceMat%Mat)) then
if (c_associated(a%deviceMat%Mat)) then
call trackCudaFree(' s_csrg ',a%sizeof())
info = CSRGDeviceFree(a%deviceMat)
end if
#if (CUDA_SHORT_VERSION <= 10 )
@ -369,6 +371,7 @@ subroutine psb_s_cuda_csrg_to_gpu(a,info,nzrm)
endif
#endif
call trackCudaAlloc(' s_csrg ',a%sizeof())
call a%set_sync()
if (info /= 0) then

@ -33,6 +33,7 @@
subroutine psb_s_cuda_diag_to_gpu(a,info,nzrm)
use psb_base_mod
use psb_cuda_env_mod
use diagdev_mod
use psb_vectordev_mod
use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_diag_to_gpu
@ -55,12 +56,14 @@ subroutine psb_s_cuda_diag_to_gpu(a,info,nzrm)
!allocsize = a%get_size()
!write(*,*) 'Create the DIAG matrix'
gpu_parms = FgetDiagDeviceParams(n,c,d,spgpu_type_float)
if (c_associated(a%deviceMat)) then
call freeDiagDevice(a%deviceMat)
if (c_associated(a%deviceMat)) then
call trackCudaFree(' s_diag ',a%sizeof())
call freeDiagDevice(a%deviceMat)
endif
info = FallocDiagDevice(a%deviceMat,n,c,d,spgpu_type_float)
if (info == 0) info = &
& writeDiagDevice(a%deviceMat,a%data,a%offset,n)
call trackCudaAlloc(' s_diag ',a%sizeof())
! if (info /= 0) goto 9999
end subroutine psb_s_cuda_diag_to_gpu

@ -32,6 +32,7 @@
subroutine psb_s_cuda_elg_to_gpu(a,info,nzrm)
use psb_base_mod
use psb_cuda_env_mod
use elldev_mod
use psb_vectordev_mod
use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_to_gpu
@ -64,7 +65,8 @@ subroutine psb_s_cuda_elg_to_gpu(a,info,nzrm)
end if
if ((pitch /= gpu_parms%pitch).or.(maxrowsize /= gpu_parms%maxRowSize)) then
if (c_associated(a%deviceMat)) then
if (c_associated(a%deviceMat)) then
call trackCudaAlloc(' s_elg ',a%sizeof())
call freeEllDevice(a%deviceMat)
endif
info = FallocEllDevice(a%deviceMat,m,nzm,nzt,n,spgpu_type_float,1)
@ -79,6 +81,7 @@ subroutine psb_s_cuda_elg_to_gpu(a,info,nzrm)
end if
if (info == 0) info = &
& writeEllDevice(a%deviceMat,a%val,a%ja,size(a%ja,1),a%irn,a%idiag)
call trackCudaAlloc(' s_elg ',a%sizeof())
call a%set_sync()
end subroutine psb_s_cuda_elg_to_gpu

@ -32,6 +32,7 @@
subroutine psb_s_cuda_hdiag_to_gpu(a,info)
use psb_base_mod
use psb_cuda_env_mod
use hdiagdev_mod
use psb_vectordev_mod
use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_hdiag_to_gpu
@ -65,12 +66,13 @@ subroutine psb_s_cuda_hdiag_to_gpu(a,info)
end if
if (c_associated(a%deviceMat)) then
call freeHdiagDevice(a%deviceMat)
call trackCudaFree(' s_hdiag ',a%sizeof())
call freeHdiagDevice(a%deviceMat)
endif
info = FAllocHdiagDevice(a%deviceMat,nr,nc,&
& allocheight,hacksize,hackCount,spgpu_type_double)
if (info == 0) info = &
& writeHdiagDevice(a%deviceMat,a%val,a%diaOffsets,a%hackOffsets)
call trackCudaAlloc(' s_hdiag ',a%sizeof())
end subroutine psb_s_cuda_hdiag_to_gpu

@ -32,6 +32,7 @@
subroutine psb_s_cuda_hlg_to_gpu(a,info,nzrm)
use psb_base_mod
use psb_cuda_env_mod
use hlldev_mod
use psb_vectordev_mod
use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_to_gpu
@ -51,11 +52,13 @@ subroutine psb_s_cuda_hlg_to_gpu(a,info,nzrm)
allocsize = a%get_size()
nza = a%get_nzeros()
if (c_associated(a%deviceMat)) then
call freehllDevice(a%deviceMat)
call trackCudaFree(' to_gpu s_hlg ',a%sizeof())
call freehllDevice(a%deviceMat)
endif
info = FallochllDevice(a%deviceMat,a%hksz,n,nza,allocsize,spgpu_type_float,1)
if (info == 0) info = &
& writehllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag)
call trackCudaAlloc(' s_hlg ',a%sizeof())
! if (info /= 0) goto 9999
end subroutine psb_s_cuda_hlg_to_gpu

@ -33,6 +33,7 @@
subroutine psb_s_cuda_hybg_to_gpu(a,info,nzrm)
use psb_base_mod
use psb_cuda_env_mod
use cusparse_mod
use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_to_gpu
implicit none
@ -53,6 +54,7 @@ subroutine psb_s_cuda_hybg_to_gpu(a,info,nzrm)
n = a%get_ncols()
nz = a%get_nzeros()
if (c_associated(a%deviceMat%Mat)) then
call trackCudaFree(' s_hybg ',a%sizeof())
info = HYBGDeviceFree(a%deviceMat)
end if
if (a%is_unit()) then
@ -139,6 +141,7 @@ subroutine psb_s_cuda_hybg_to_gpu(a,info,nzrm)
info = HYBGDeviceHybsmAnalysis(a%deviceMat)
end if
call trackCudaAlloc(' s_hybg ',a%sizeof())
if (info /= 0) then
write(0,*) 'Error in HYBG_TO_GPU ',info

@ -73,12 +73,14 @@ subroutine psb_z_cuda_cp_hlg_from_coo(a,b,info)
if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1))
if (c_associated(a%deviceMat)) then
call trackCudafree(' z_hlg ',a%sizeof())
call freeHllDevice(a%deviceMat)
endif
info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1)
if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,&
& a%irn,a%hkoffs,idisp,b%ja, b%val, a%deviceMat)
call a%set_dev()
call trackCudaAlloc(' z_hlg ',a%sizeof())
else
! This is to guarantee tmp%is_by_rows()
call b%cp_to_coo(tmp,info)
@ -95,7 +97,8 @@ subroutine psb_z_cuda_cp_hlg_from_coo(a,b,info)
end if
if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1))
if (c_associated(a%deviceMat)) then
if (c_associated(a%deviceMat)) then
call trackCudaFree(' z_hlg ',a%sizeof())
call freeHllDevice(a%deviceMat)
endif
info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1)
@ -104,6 +107,7 @@ subroutine psb_z_cuda_cp_hlg_from_coo(a,b,info)
call tmp%free()
call a%set_dev()
call trackCudaAlloc(' z_hlg ',a%sizeof())
end if
if (info /= 0) goto 9999

@ -33,6 +33,7 @@
subroutine psb_z_cuda_csrg_to_gpu(a,info,nzrm)
use psb_base_mod
use psb_cuda_env_mod
use cusparse_mod
use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_to_gpu
implicit none
@ -52,7 +53,8 @@ subroutine psb_z_cuda_csrg_to_gpu(a,info,nzrm)
m = a%get_nrows()
n = a%get_ncols()
nz = a%get_nzeros()
if (c_associated(a%deviceMat%Mat)) then
if (c_associated(a%deviceMat%Mat)) then
call trackCudaFree(' z_csrg ',a%sizeof())
info = CSRGDeviceFree(a%deviceMat)
end if
#if (CUDA_SHORT_VERSION <= 10 )
@ -369,6 +371,7 @@ subroutine psb_z_cuda_csrg_to_gpu(a,info,nzrm)
endif
#endif
call trackCudaAlloc(' z_csrg ',a%sizeof())
call a%set_sync()
if (info /= 0) then

@ -33,6 +33,7 @@
subroutine psb_z_cuda_diag_to_gpu(a,info,nzrm)
use psb_base_mod
use psb_cuda_env_mod
use diagdev_mod
use psb_vectordev_mod
use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_diag_to_gpu
@ -55,12 +56,14 @@ subroutine psb_z_cuda_diag_to_gpu(a,info,nzrm)
!allocsize = a%get_size()
!write(*,*) 'Create the DIAG matrix'
gpu_parms = FgetDiagDeviceParams(n,c,d,spgpu_type_complex_double)
if (c_associated(a%deviceMat)) then
call freeDiagDevice(a%deviceMat)
if (c_associated(a%deviceMat)) then
call trackCudaFree(' z_diag ',a%sizeof())
call freeDiagDevice(a%deviceMat)
endif
info = FallocDiagDevice(a%deviceMat,n,c,d,spgpu_type_complex_double)
if (info == 0) info = &
& writeDiagDevice(a%deviceMat,a%data,a%offset,n)
call trackCudaAlloc(' z_diag ',a%sizeof())
! if (info /= 0) goto 9999
end subroutine psb_z_cuda_diag_to_gpu

@ -32,6 +32,7 @@
subroutine psb_z_cuda_elg_to_gpu(a,info,nzrm)
use psb_base_mod
use psb_cuda_env_mod
use elldev_mod
use psb_vectordev_mod
use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_to_gpu
@ -64,7 +65,8 @@ subroutine psb_z_cuda_elg_to_gpu(a,info,nzrm)
end if
if ((pitch /= gpu_parms%pitch).or.(maxrowsize /= gpu_parms%maxRowSize)) then
if (c_associated(a%deviceMat)) then
if (c_associated(a%deviceMat)) then
call trackCudaAlloc(' z_elg ',a%sizeof())
call freeEllDevice(a%deviceMat)
endif
info = FallocEllDevice(a%deviceMat,m,nzm,nzt,n,spgpu_type_complex_double,1)
@ -79,6 +81,7 @@ subroutine psb_z_cuda_elg_to_gpu(a,info,nzrm)
end if
if (info == 0) info = &
& writeEllDevice(a%deviceMat,a%val,a%ja,size(a%ja,1),a%irn,a%idiag)
call trackCudaAlloc(' z_elg ',a%sizeof())
call a%set_sync()
end subroutine psb_z_cuda_elg_to_gpu

@ -32,6 +32,7 @@
subroutine psb_z_cuda_hdiag_to_gpu(a,info)
use psb_base_mod
use psb_cuda_env_mod
use hdiagdev_mod
use psb_vectordev_mod
use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_hdiag_to_gpu
@ -65,12 +66,13 @@ subroutine psb_z_cuda_hdiag_to_gpu(a,info)
end if
if (c_associated(a%deviceMat)) then
call freeHdiagDevice(a%deviceMat)
call trackCudaFree(' z_hdiag ',a%sizeof())
call freeHdiagDevice(a%deviceMat)
endif
info = FAllocHdiagDevice(a%deviceMat,nr,nc,&
& allocheight,hacksize,hackCount,spgpu_type_double)
if (info == 0) info = &
& writeHdiagDevice(a%deviceMat,a%val,a%diaOffsets,a%hackOffsets)
call trackCudaAlloc(' z_hdiag ',a%sizeof())
end subroutine psb_z_cuda_hdiag_to_gpu

@ -32,6 +32,7 @@
subroutine psb_z_cuda_hlg_to_gpu(a,info,nzrm)
use psb_base_mod
use psb_cuda_env_mod
use hlldev_mod
use psb_vectordev_mod
use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_to_gpu
@ -51,11 +52,13 @@ subroutine psb_z_cuda_hlg_to_gpu(a,info,nzrm)
allocsize = a%get_size()
nza = a%get_nzeros()
if (c_associated(a%deviceMat)) then
call freehllDevice(a%deviceMat)
call trackCudaFree(' to_gpu z_hlg ',a%sizeof())
call freehllDevice(a%deviceMat)
endif
info = FallochllDevice(a%deviceMat,a%hksz,n,nza,allocsize,spgpu_type_complex_double,1)
if (info == 0) info = &
& writehllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag)
call trackCudaAlloc(' z_hlg ',a%sizeof())
! if (info /= 0) goto 9999
end subroutine psb_z_cuda_hlg_to_gpu

@ -33,6 +33,7 @@
subroutine psb_z_cuda_hybg_to_gpu(a,info,nzrm)
use psb_base_mod
use psb_cuda_env_mod
use cusparse_mod
use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_to_gpu
implicit none
@ -53,6 +54,7 @@ subroutine psb_z_cuda_hybg_to_gpu(a,info,nzrm)
n = a%get_ncols()
nz = a%get_nzeros()
if (c_associated(a%deviceMat%Mat)) then
call trackCudaFree(' z_hybg ',a%sizeof())
info = HYBGDeviceFree(a%deviceMat)
end if
if (a%is_unit()) then
@ -139,6 +141,7 @@ subroutine psb_z_cuda_hybg_to_gpu(a,info,nzrm)
info = HYBGDeviceHybsmAnalysis(a%deviceMat)
end if
call trackCudaAlloc(' z_hybg ',a%sizeof())
if (info /= 0) then
write(0,*) 'Error in HYBG_TO_GPU ',info

@ -35,7 +35,8 @@ module psb_c_cuda_csrg_mat_mod
use iso_c_binding
use psb_c_mat_mod
use cusparse_mod
use psb_cuda_env_mod
integer(psb_ipk_), parameter, private :: is_host = -1
integer(psb_ipk_), parameter, private :: is_sync = 0
integer(psb_ipk_), parameter, private :: is_dev = 1
@ -352,6 +353,7 @@ contains
class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a
call trackCudaFree(' c_csrg ',a%sizeof())
info = CSRGDeviceFree(a%deviceMat)
call a%psb_c_csr_sparse_mat%free()
@ -366,6 +368,7 @@ contains
type(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a
call trackCudaFree(' c_csrg ',a%sizeof())
info = CSRGDeviceFree(a%deviceMat)
return

@ -34,6 +34,7 @@ module psb_c_cuda_diag_mat_mod
use iso_c_binding
use psb_base_mod
use psb_cuda_env_mod
use psb_c_dia_mat_mod
type, extends(psb_c_dia_sparse_mat) :: psb_c_cuda_diag_sparse_mat
@ -265,8 +266,10 @@ contains
integer(psb_ipk_) :: info
class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a
if (c_associated(a%deviceMat)) &
& call freeDiagDevice(a%deviceMat)
if (c_associated(a%deviceMat)) then
call trackCudaFree(' c_diag ',a%sizeof())
call freeDiagDevice(a%deviceMat)
end if
a%deviceMat = c_null_ptr
call a%psb_c_dia_sparse_mat%free()

@ -35,6 +35,7 @@ module psb_c_cuda_dnsg_mat_mod
use iso_c_binding
use psb_c_mat_mod
use psb_c_dns_mat_mod
use psb_cuda_env_mod
use dnsdev_mod
type, extends(psb_c_dns_sparse_mat) :: psb_c_cuda_dnsg_sparse_mat
@ -251,8 +252,10 @@ contains
integer(psb_ipk_) :: info
class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a
if (c_associated(a%deviceMat)) &
& call freeDnsDevice(a%deviceMat)
if (c_associated(a%deviceMat)) then
call trackCudaFree(' c_dnsg ',a%sizeof())
call freeDnsDevice(a%deviceMat)
end if
a%deviceMat = c_null_ptr
call a%psb_c_dns_sparse_mat%free()

@ -35,6 +35,7 @@ module psb_c_cuda_elg_mat_mod
use iso_c_binding
use psb_c_mat_mod
use psb_c_ell_mat_mod
use psb_cuda_env_mod
use psb_i_cuda_vect_mod
integer(psb_ipk_), parameter, private :: is_host = -1
@ -369,8 +370,10 @@ contains
class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a
if (c_associated(a%deviceMat)) &
& call freeEllDevice(a%deviceMat)
if (c_associated(a%deviceMat)) then
call trackCudaFree(' c_elg ',a%sizeof())
call freeEllDevice(a%deviceMat)
end if
a%deviceMat = c_null_ptr
call a%psb_c_ell_sparse_mat%free()
call a%set_sync()

@ -34,6 +34,7 @@ module psb_c_cuda_hdiag_mat_mod
use iso_c_binding
use psb_base_mod
use psb_cuda_env_mod
use psb_c_hdia_mat_mod
type, extends(psb_c_hdia_sparse_mat) :: psb_c_cuda_hdiag_sparse_mat
@ -243,8 +244,10 @@ contains
integer(psb_ipk_) :: info
class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a
if (c_associated(a%deviceMat)) &
& call freeHdiagDevice(a%deviceMat)
if (c_associated(a%deviceMat)) then
call trackCudaFree(' c_hdiag ',a%sizeof())
call freeHdiagDevice(a%deviceMat)
end if
a%deviceMat = c_null_ptr
call a%psb_c_hdia_sparse_mat%free()

@ -34,6 +34,7 @@ module psb_c_cuda_hlg_mat_mod
use iso_c_binding
use psb_c_mat_mod
use psb_cuda_env_mod
use psb_c_hll_mat_mod
@ -291,8 +292,10 @@ contains
integer(psb_ipk_) :: info
class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a
if (c_associated(a%deviceMat)) &
& call freeHllDevice(a%deviceMat)
if (c_associated(a%deviceMat)) then
call trackCudaFree(' hlg_free c_hlg ',a%sizeof())
call freeHllDevice(a%deviceMat)
end if
a%deviceMat = c_null_ptr
call a%psb_c_hll_sparse_mat%free()

@ -35,6 +35,7 @@ module psb_c_cuda_hybg_mat_mod
use iso_c_binding
use psb_c_mat_mod
use psb_cuda_env_mod
use cusparse_mod
type, extends(psb_c_csr_sparse_mat) :: psb_c_cuda_hybg_sparse_mat
@ -265,6 +266,7 @@ contains
integer(psb_ipk_) :: info
class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a
call trackCudaFree(' c_hybg ',a%sizeof())
info = HYBGDeviceFree(a%deviceMat)
call a%psb_c_csr_sparse_mat%free()

@ -30,7 +30,7 @@
!
module psb_c_cuda_vect_mod
module psb_c_cuda_vect_mod
use iso_c_binding
use psb_const_mod
use psb_error_mod
@ -728,12 +728,14 @@ contains
end if
if (c_associated(x%deviceVect)) then
nd = getMultiVecDeviceSize(x%deviceVect)
if (nd < nh ) then
if (nd < nh ) then
call trackCudaFree(' c_vect_cuda ',x%sizeof())
call freeMultiVecDevice(x%deviceVect)
x%deviceVect=c_null_ptr
end if
end if
if (.not.c_associated(x%deviceVect)) then
if (.not.c_associated(x%deviceVect)) then
call trackCudaAlloc(' c_vect_cuda ',x%sizeof())
info = FallocMultiVecDevice(x%deviceVect,1,nh,spgpu_type_complex_float)
if (info /= 0) then
if (info == spgpu_outofmem) then
@ -755,6 +757,7 @@ contains
if (x%is_host()) then
if (.not.c_associated(x%deviceVect)) then
n = size(x%v)
call trackCudaAlloc(' c_vect_cuda ',x%sizeof())
info = FallocMultiVecDevice(x%deviceVect,1,n,spgpu_type_complex_float)
end if
if (info == 0) &
@ -790,6 +793,7 @@ contains
if (allocated(x%v)) deallocate(x%v, stat=info)
if (c_associated(x%deviceVect)) then
!!$ write(0,*)'d_cuda_free Calling freeMultiVecDevice'
call trackCudaFree(' c_vect_cuda ',x%sizeof())
call freeMultiVecDevice(x%deviceVect)
x%deviceVect=c_null_ptr
end if
@ -1972,12 +1976,14 @@ contains
md = getMultiVecDevicePitch(x%deviceVect)
nd = getMultiVecDeviceCount(x%deviceVect)
if ((md < mh).or.(nd<nh)) then
call trackCudaFree(' c_multivect_cuda ',x%sizeof())
call freeMultiVecDevice(x%deviceVect)
x%deviceVect=c_null_ptr
end if
end if
if (.not.c_associated(x%deviceVect)) then
if (.not.c_associated(x%deviceVect)) then
call trackCudaAlloc(' c_multivect_cuda ',x%sizeof())
info = FallocMultiVecDevice(x%deviceVect,nh,mh,spgpu_type_complex_float)
if (info == 0) &
& call psb_realloc(getMultiVecDevicePitch(x%deviceVect),&
@ -2042,6 +2048,7 @@ contains
info = 0
if (c_associated(x%deviceVect)) then
call trackCudaFree(' c_multivect_cuda ',x%sizeof())
call freeMultiVecDevice(x%deviceVect)
x%deviceVect=c_null_ptr
end if
@ -2063,6 +2070,7 @@ contains
info = 0
if (c_associated(x%deviceVect)) then
call trackCudaFree(' c_multivect_cuda ',x%sizeof())
call freeMultiVecDevice(x%deviceVect)
x%deviceVect=c_null_ptr
end if

@ -176,7 +176,27 @@ module psb_cuda_env_mod
logical, private :: gpu_do_maybe_free_buffer = .false.
integer(psb_epk_), save :: total_cuda_mem=0
Contains
subroutine trackCudaAlloc(data,size)
integer(psb_epk_), intent(in) :: size
character(len=*), intent(in) :: data
total_cuda_mem = total_cuda_mem + size
write(0,*) 'Tracking cuda Alloc for data ',&
& data,' size ',size,' total ',total_cuda_mem
end subroutine trackCudaAlloc
subroutine trackCudaFree(data,size)
integer(psb_epk_), intent(in) :: size
character(len=*), intent(in) :: data
total_cuda_mem = total_cuda_mem - size
write(0,*) 'Tracking cuda Free for data ',&
& data,' size ',size,' total ',total_cuda_mem
end subroutine trackCudaFree
function psb_cuda_get_maybe_free_buffer() result(res)
logical :: res

@ -35,7 +35,8 @@ module psb_d_cuda_csrg_mat_mod
use iso_c_binding
use psb_d_mat_mod
use cusparse_mod
use psb_cuda_env_mod
integer(psb_ipk_), parameter, private :: is_host = -1
integer(psb_ipk_), parameter, private :: is_sync = 0
integer(psb_ipk_), parameter, private :: is_dev = 1
@ -352,6 +353,7 @@ contains
class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a
call trackCudaFree(' d_csrg ',a%sizeof())
info = CSRGDeviceFree(a%deviceMat)
call a%psb_d_csr_sparse_mat%free()
@ -366,6 +368,7 @@ contains
type(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a
call trackCudaFree(' d_csrg ',a%sizeof())
info = CSRGDeviceFree(a%deviceMat)
return

@ -34,6 +34,7 @@ module psb_d_cuda_diag_mat_mod
use iso_c_binding
use psb_base_mod
use psb_cuda_env_mod
use psb_d_dia_mat_mod
type, extends(psb_d_dia_sparse_mat) :: psb_d_cuda_diag_sparse_mat
@ -265,8 +266,10 @@ contains
integer(psb_ipk_) :: info
class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a
if (c_associated(a%deviceMat)) &
& call freeDiagDevice(a%deviceMat)
if (c_associated(a%deviceMat)) then
call trackCudaFree(' d_diag ',a%sizeof())
call freeDiagDevice(a%deviceMat)
end if
a%deviceMat = c_null_ptr
call a%psb_d_dia_sparse_mat%free()

@ -35,6 +35,7 @@ module psb_d_cuda_dnsg_mat_mod
use iso_c_binding
use psb_d_mat_mod
use psb_d_dns_mat_mod
use psb_cuda_env_mod
use dnsdev_mod
type, extends(psb_d_dns_sparse_mat) :: psb_d_cuda_dnsg_sparse_mat
@ -251,8 +252,10 @@ contains
integer(psb_ipk_) :: info
class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a
if (c_associated(a%deviceMat)) &
& call freeDnsDevice(a%deviceMat)
if (c_associated(a%deviceMat)) then
call trackCudaFree(' d_dnsg ',a%sizeof())
call freeDnsDevice(a%deviceMat)
end if
a%deviceMat = c_null_ptr
call a%psb_d_dns_sparse_mat%free()

@ -35,6 +35,7 @@ module psb_d_cuda_elg_mat_mod
use iso_c_binding
use psb_d_mat_mod
use psb_d_ell_mat_mod
use psb_cuda_env_mod
use psb_i_cuda_vect_mod
integer(psb_ipk_), parameter, private :: is_host = -1
@ -369,8 +370,10 @@ contains
class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a
if (c_associated(a%deviceMat)) &
& call freeEllDevice(a%deviceMat)
if (c_associated(a%deviceMat)) then
call trackCudaFree(' d_elg ',a%sizeof())
call freeEllDevice(a%deviceMat)
end if
a%deviceMat = c_null_ptr
call a%psb_d_ell_sparse_mat%free()
call a%set_sync()

@ -34,6 +34,7 @@ module psb_d_cuda_hdiag_mat_mod
use iso_c_binding
use psb_base_mod
use psb_cuda_env_mod
use psb_d_hdia_mat_mod
type, extends(psb_d_hdia_sparse_mat) :: psb_d_cuda_hdiag_sparse_mat
@ -243,8 +244,10 @@ contains
integer(psb_ipk_) :: info
class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a
if (c_associated(a%deviceMat)) &
& call freeHdiagDevice(a%deviceMat)
if (c_associated(a%deviceMat)) then
call trackCudaFree(' d_hdiag ',a%sizeof())
call freeHdiagDevice(a%deviceMat)
end if
a%deviceMat = c_null_ptr
call a%psb_d_hdia_sparse_mat%free()

@ -34,6 +34,7 @@ module psb_d_cuda_hlg_mat_mod
use iso_c_binding
use psb_d_mat_mod
use psb_cuda_env_mod
use psb_d_hll_mat_mod
@ -291,8 +292,10 @@ contains
integer(psb_ipk_) :: info
class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a
if (c_associated(a%deviceMat)) &
& call freeHllDevice(a%deviceMat)
if (c_associated(a%deviceMat)) then
call trackCudaFree(' hlg_free d_hlg ',a%sizeof())
call freeHllDevice(a%deviceMat)
end if
a%deviceMat = c_null_ptr
call a%psb_d_hll_sparse_mat%free()

@ -35,6 +35,7 @@ module psb_d_cuda_hybg_mat_mod
use iso_c_binding
use psb_d_mat_mod
use psb_cuda_env_mod
use cusparse_mod
type, extends(psb_d_csr_sparse_mat) :: psb_d_cuda_hybg_sparse_mat
@ -265,6 +266,7 @@ contains
integer(psb_ipk_) :: info
class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a
call trackCudaFree(' d_hybg ',a%sizeof())
info = HYBGDeviceFree(a%deviceMat)
call a%psb_d_csr_sparse_mat%free()

@ -728,12 +728,14 @@ contains
end if
if (c_associated(x%deviceVect)) then
nd = getMultiVecDeviceSize(x%deviceVect)
if (nd < nh ) then
if (nd < nh ) then
call trackCudaFree(' d_vect_cuda ',x%sizeof())
call freeMultiVecDevice(x%deviceVect)
x%deviceVect=c_null_ptr
end if
end if
if (.not.c_associated(x%deviceVect)) then
if (.not.c_associated(x%deviceVect)) then
call trackCudaAlloc(' d_vect_cuda ',x%sizeof())
info = FallocMultiVecDevice(x%deviceVect,1,nh,spgpu_type_double)
if (info /= 0) then
if (info == spgpu_outofmem) then
@ -755,6 +757,7 @@ contains
if (x%is_host()) then
if (.not.c_associated(x%deviceVect)) then
n = size(x%v)
call trackCudaAlloc(' d_vect_cuda ',x%sizeof())
info = FallocMultiVecDevice(x%deviceVect,1,n,spgpu_type_double)
end if
if (info == 0) &
@ -790,6 +793,7 @@ contains
if (allocated(x%v)) deallocate(x%v, stat=info)
if (c_associated(x%deviceVect)) then
!!$ write(0,*)'d_cuda_free Calling freeMultiVecDevice'
call trackCudaFree(' d_vect_cuda ',x%sizeof())
call freeMultiVecDevice(x%deviceVect)
x%deviceVect=c_null_ptr
end if
@ -1972,12 +1976,14 @@ contains
md = getMultiVecDevicePitch(x%deviceVect)
nd = getMultiVecDeviceCount(x%deviceVect)
if ((md < mh).or.(nd<nh)) then
call trackCudaFree(' d_multivect_cuda ',x%sizeof())
call freeMultiVecDevice(x%deviceVect)
x%deviceVect=c_null_ptr
end if
end if
if (.not.c_associated(x%deviceVect)) then
if (.not.c_associated(x%deviceVect)) then
call trackCudaAlloc(' d_multivect_cuda ',x%sizeof())
info = FallocMultiVecDevice(x%deviceVect,nh,mh,spgpu_type_double)
if (info == 0) &
& call psb_realloc(getMultiVecDevicePitch(x%deviceVect),&
@ -2042,6 +2048,7 @@ contains
info = 0
if (c_associated(x%deviceVect)) then
call trackCudaFree(' d_multivect_cuda ',x%sizeof())
call freeMultiVecDevice(x%deviceVect)
x%deviceVect=c_null_ptr
end if
@ -2063,6 +2070,7 @@ contains
info = 0
if (c_associated(x%deviceVect)) then
call trackCudaFree(' d_multivect_cuda ',x%sizeof())
call freeMultiVecDevice(x%deviceVect)
x%deviceVect=c_null_ptr
end if

@ -35,7 +35,8 @@ module psb_i_cuda_csrg_mat_mod
use iso_c_binding
use psb_i_mat_mod
use cusparse_mod
use psb_cuda_env_mod
integer(psb_ipk_), parameter, private :: is_host = -1
integer(psb_ipk_), parameter, private :: is_sync = 0
integer(psb_ipk_), parameter, private :: is_dev = 1
@ -352,6 +353,7 @@ contains
class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a
call trackCudaFree(' i_csrg ',a%sizeof())
info = CSRGDeviceFree(a%deviceMat)
call a%psb_i_csr_sparse_mat%free()
@ -366,6 +368,7 @@ contains
type(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a
call trackCudaFree(' i_csrg ',a%sizeof())
info = CSRGDeviceFree(a%deviceMat)
return

@ -34,6 +34,7 @@ module psb_i_cuda_diag_mat_mod
use iso_c_binding
use psb_base_mod
use psb_cuda_env_mod
use psb_i_dia_mat_mod
type, extends(psb_i_dia_sparse_mat) :: psb_i_cuda_diag_sparse_mat
@ -265,8 +266,10 @@ contains
integer(psb_ipk_) :: info
class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a
if (c_associated(a%deviceMat)) &
& call freeDiagDevice(a%deviceMat)
if (c_associated(a%deviceMat)) then
call trackCudaFree(' i_diag ',a%sizeof())
call freeDiagDevice(a%deviceMat)
end if
a%deviceMat = c_null_ptr
call a%psb_i_dia_sparse_mat%free()

@ -35,6 +35,7 @@ module psb_i_cuda_dnsg_mat_mod
use iso_c_binding
use psb_i_mat_mod
use psb_i_dns_mat_mod
use psb_cuda_env_mod
use dnsdev_mod
type, extends(psb_i_dns_sparse_mat) :: psb_i_cuda_dnsg_sparse_mat
@ -251,8 +252,10 @@ contains
integer(psb_ipk_) :: info
class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a
if (c_associated(a%deviceMat)) &
& call freeDnsDevice(a%deviceMat)
if (c_associated(a%deviceMat)) then
call trackCudaFree(' i_dnsg ',a%sizeof())
call freeDnsDevice(a%deviceMat)
end if
a%deviceMat = c_null_ptr
call a%psb_i_dns_sparse_mat%free()

@ -35,6 +35,7 @@ module psb_i_cuda_elg_mat_mod
use iso_c_binding
use psb_i_mat_mod
use psb_i_ell_mat_mod
use psb_cuda_env_mod
use psb_i_cuda_vect_mod
integer(psb_ipk_), parameter, private :: is_host = -1
@ -369,8 +370,10 @@ contains
class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a
if (c_associated(a%deviceMat)) &
& call freeEllDevice(a%deviceMat)
if (c_associated(a%deviceMat)) then
call trackCudaFree(' i_elg ',a%sizeof())
call freeEllDevice(a%deviceMat)
end if
a%deviceMat = c_null_ptr
call a%psb_i_ell_sparse_mat%free()
call a%set_sync()

@ -34,6 +34,7 @@ module psb_i_cuda_hdiag_mat_mod
use iso_c_binding
use psb_base_mod
use psb_cuda_env_mod
use psb_i_hdia_mat_mod
type, extends(psb_i_hdia_sparse_mat) :: psb_i_cuda_hdiag_sparse_mat
@ -243,8 +244,10 @@ contains
integer(psb_ipk_) :: info
class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a
if (c_associated(a%deviceMat)) &
& call freeHdiagDevice(a%deviceMat)
if (c_associated(a%deviceMat)) then
call trackCudaFree(' i_hdiag ',a%sizeof())
call freeHdiagDevice(a%deviceMat)
end if
a%deviceMat = c_null_ptr
call a%psb_i_hdia_sparse_mat%free()

@ -34,6 +34,7 @@ module psb_i_cuda_hlg_mat_mod
use iso_c_binding
use psb_i_mat_mod
use psb_cuda_env_mod
use psb_i_hll_mat_mod
@ -291,8 +292,10 @@ contains
integer(psb_ipk_) :: info
class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a
if (c_associated(a%deviceMat)) &
& call freeHllDevice(a%deviceMat)
if (c_associated(a%deviceMat)) then
call trackCudaFree(' hlg_free i_hlg ',a%sizeof())
call freeHllDevice(a%deviceMat)
end if
a%deviceMat = c_null_ptr
call a%psb_i_hll_sparse_mat%free()

@ -35,6 +35,7 @@ module psb_i_cuda_hybg_mat_mod
use iso_c_binding
use psb_i_mat_mod
use psb_cuda_env_mod
use cusparse_mod
type, extends(psb_i_csr_sparse_mat) :: psb_i_cuda_hybg_sparse_mat
@ -265,6 +266,7 @@ contains
integer(psb_ipk_) :: info
class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a
call trackCudaFree(' i_hybg ',a%sizeof())
info = HYBGDeviceFree(a%deviceMat)
call a%psb_i_csr_sparse_mat%free()

@ -710,12 +710,14 @@ contains
end if
if (c_associated(x%deviceVect)) then
nd = getMultiVecDeviceSize(x%deviceVect)
if (nd < nh ) then
if (nd < nh ) then
call trackCudaFree(' i_vect_cuda ',x%sizeof())
call freeMultiVecDevice(x%deviceVect)
x%deviceVect=c_null_ptr
end if
end if
if (.not.c_associated(x%deviceVect)) then
if (.not.c_associated(x%deviceVect)) then
call trackCudaAlloc(' i_vect_cuda ',x%sizeof())
info = FallocMultiVecDevice(x%deviceVect,1,nh,spgpu_type_int)
if (info /= 0) then
if (info == spgpu_outofmem) then
@ -737,6 +739,7 @@ contains
if (x%is_host()) then
if (.not.c_associated(x%deviceVect)) then
n = size(x%v)
call trackCudaAlloc(' i_vect_cuda ',x%sizeof())
info = FallocMultiVecDevice(x%deviceVect,1,n,spgpu_type_int)
end if
if (info == 0) &
@ -772,6 +775,7 @@ contains
if (allocated(x%v)) deallocate(x%v, stat=info)
if (c_associated(x%deviceVect)) then
!!$ write(0,*)'d_cuda_free Calling freeMultiVecDevice'
call trackCudaFree(' i_vect_cuda ',x%sizeof())
call freeMultiVecDevice(x%deviceVect)
x%deviceVect=c_null_ptr
end if
@ -1528,12 +1532,14 @@ contains
md = getMultiVecDevicePitch(x%deviceVect)
nd = getMultiVecDeviceCount(x%deviceVect)
if ((md < mh).or.(nd<nh)) then
call trackCudaFree(' i_multivect_cuda ',x%sizeof())
call freeMultiVecDevice(x%deviceVect)
x%deviceVect=c_null_ptr
end if
end if
if (.not.c_associated(x%deviceVect)) then
if (.not.c_associated(x%deviceVect)) then
call trackCudaAlloc(' i_multivect_cuda ',x%sizeof())
info = FallocMultiVecDevice(x%deviceVect,nh,mh,spgpu_type_int)
if (info == 0) &
& call psb_realloc(getMultiVecDevicePitch(x%deviceVect),&
@ -1598,6 +1604,7 @@ contains
info = 0
if (c_associated(x%deviceVect)) then
call trackCudaFree(' i_multivect_cuda ',x%sizeof())
call freeMultiVecDevice(x%deviceVect)
x%deviceVect=c_null_ptr
end if
@ -1619,6 +1626,7 @@ contains
info = 0
if (c_associated(x%deviceVect)) then
call trackCudaFree(' i_multivect_cuda ',x%sizeof())
call freeMultiVecDevice(x%deviceVect)
x%deviceVect=c_null_ptr
end if

@ -35,7 +35,8 @@ module psb_s_cuda_csrg_mat_mod
use iso_c_binding
use psb_s_mat_mod
use cusparse_mod
use psb_cuda_env_mod
integer(psb_ipk_), parameter, private :: is_host = -1
integer(psb_ipk_), parameter, private :: is_sync = 0
integer(psb_ipk_), parameter, private :: is_dev = 1
@ -352,6 +353,7 @@ contains
class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a
call trackCudaFree(' s_csrg ',a%sizeof())
info = CSRGDeviceFree(a%deviceMat)
call a%psb_s_csr_sparse_mat%free()
@ -366,6 +368,7 @@ contains
type(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a
call trackCudaFree(' s_csrg ',a%sizeof())
info = CSRGDeviceFree(a%deviceMat)
return

@ -34,6 +34,7 @@ module psb_s_cuda_diag_mat_mod
use iso_c_binding
use psb_base_mod
use psb_cuda_env_mod
use psb_s_dia_mat_mod
type, extends(psb_s_dia_sparse_mat) :: psb_s_cuda_diag_sparse_mat
@ -265,8 +266,10 @@ contains
integer(psb_ipk_) :: info
class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a
if (c_associated(a%deviceMat)) &
& call freeDiagDevice(a%deviceMat)
if (c_associated(a%deviceMat)) then
call trackCudaFree(' s_diag ',a%sizeof())
call freeDiagDevice(a%deviceMat)
end if
a%deviceMat = c_null_ptr
call a%psb_s_dia_sparse_mat%free()

@ -35,6 +35,7 @@ module psb_s_cuda_dnsg_mat_mod
use iso_c_binding
use psb_s_mat_mod
use psb_s_dns_mat_mod
use psb_cuda_env_mod
use dnsdev_mod
type, extends(psb_s_dns_sparse_mat) :: psb_s_cuda_dnsg_sparse_mat
@ -251,8 +252,10 @@ contains
integer(psb_ipk_) :: info
class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a
if (c_associated(a%deviceMat)) &
& call freeDnsDevice(a%deviceMat)
if (c_associated(a%deviceMat)) then
call trackCudaFree(' s_dnsg ',a%sizeof())
call freeDnsDevice(a%deviceMat)
end if
a%deviceMat = c_null_ptr
call a%psb_s_dns_sparse_mat%free()

@ -35,6 +35,7 @@ module psb_s_cuda_elg_mat_mod
use iso_c_binding
use psb_s_mat_mod
use psb_s_ell_mat_mod
use psb_cuda_env_mod
use psb_i_cuda_vect_mod
integer(psb_ipk_), parameter, private :: is_host = -1
@ -369,8 +370,10 @@ contains
class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a
if (c_associated(a%deviceMat)) &
& call freeEllDevice(a%deviceMat)
if (c_associated(a%deviceMat)) then
call trackCudaFree(' s_elg ',a%sizeof())
call freeEllDevice(a%deviceMat)
end if
a%deviceMat = c_null_ptr
call a%psb_s_ell_sparse_mat%free()
call a%set_sync()

@ -34,6 +34,7 @@ module psb_s_cuda_hdiag_mat_mod
use iso_c_binding
use psb_base_mod
use psb_cuda_env_mod
use psb_s_hdia_mat_mod
type, extends(psb_s_hdia_sparse_mat) :: psb_s_cuda_hdiag_sparse_mat
@ -243,8 +244,10 @@ contains
integer(psb_ipk_) :: info
class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a
if (c_associated(a%deviceMat)) &
& call freeHdiagDevice(a%deviceMat)
if (c_associated(a%deviceMat)) then
call trackCudaFree(' s_hdiag ',a%sizeof())
call freeHdiagDevice(a%deviceMat)
end if
a%deviceMat = c_null_ptr
call a%psb_s_hdia_sparse_mat%free()

@ -34,6 +34,7 @@ module psb_s_cuda_hlg_mat_mod
use iso_c_binding
use psb_s_mat_mod
use psb_cuda_env_mod
use psb_s_hll_mat_mod
@ -291,8 +292,10 @@ contains
integer(psb_ipk_) :: info
class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a
if (c_associated(a%deviceMat)) &
& call freeHllDevice(a%deviceMat)
if (c_associated(a%deviceMat)) then
call trackCudaFree(' hlg_free s_hlg ',a%sizeof())
call freeHllDevice(a%deviceMat)
end if
a%deviceMat = c_null_ptr
call a%psb_s_hll_sparse_mat%free()

@ -35,6 +35,7 @@ module psb_s_cuda_hybg_mat_mod
use iso_c_binding
use psb_s_mat_mod
use psb_cuda_env_mod
use cusparse_mod
type, extends(psb_s_csr_sparse_mat) :: psb_s_cuda_hybg_sparse_mat
@ -265,6 +266,7 @@ contains
integer(psb_ipk_) :: info
class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a
call trackCudaFree(' s_hybg ',a%sizeof())
info = HYBGDeviceFree(a%deviceMat)
call a%psb_s_csr_sparse_mat%free()

@ -728,12 +728,14 @@ contains
end if
if (c_associated(x%deviceVect)) then
nd = getMultiVecDeviceSize(x%deviceVect)
if (nd < nh ) then
if (nd < nh ) then
call trackCudaFree(' s_vect_cuda ',x%sizeof())
call freeMultiVecDevice(x%deviceVect)
x%deviceVect=c_null_ptr
end if
end if
if (.not.c_associated(x%deviceVect)) then
if (.not.c_associated(x%deviceVect)) then
call trackCudaAlloc(' s_vect_cuda ',x%sizeof())
info = FallocMultiVecDevice(x%deviceVect,1,nh,spgpu_type_float)
if (info /= 0) then
if (info == spgpu_outofmem) then
@ -755,6 +757,7 @@ contains
if (x%is_host()) then
if (.not.c_associated(x%deviceVect)) then
n = size(x%v)
call trackCudaAlloc(' s_vect_cuda ',x%sizeof())
info = FallocMultiVecDevice(x%deviceVect,1,n,spgpu_type_float)
end if
if (info == 0) &
@ -790,6 +793,7 @@ contains
if (allocated(x%v)) deallocate(x%v, stat=info)
if (c_associated(x%deviceVect)) then
!!$ write(0,*)'d_cuda_free Calling freeMultiVecDevice'
call trackCudaFree(' s_vect_cuda ',x%sizeof())
call freeMultiVecDevice(x%deviceVect)
x%deviceVect=c_null_ptr
end if
@ -1972,12 +1976,14 @@ contains
md = getMultiVecDevicePitch(x%deviceVect)
nd = getMultiVecDeviceCount(x%deviceVect)
if ((md < mh).or.(nd<nh)) then
call trackCudaFree(' s_multivect_cuda ',x%sizeof())
call freeMultiVecDevice(x%deviceVect)
x%deviceVect=c_null_ptr
end if
end if
if (.not.c_associated(x%deviceVect)) then
if (.not.c_associated(x%deviceVect)) then
call trackCudaAlloc(' s_multivect_cuda ',x%sizeof())
info = FallocMultiVecDevice(x%deviceVect,nh,mh,spgpu_type_float)
if (info == 0) &
& call psb_realloc(getMultiVecDevicePitch(x%deviceVect),&
@ -2042,6 +2048,7 @@ contains
info = 0
if (c_associated(x%deviceVect)) then
call trackCudaFree(' s_multivect_cuda ',x%sizeof())
call freeMultiVecDevice(x%deviceVect)
x%deviceVect=c_null_ptr
end if
@ -2063,6 +2070,7 @@ contains
info = 0
if (c_associated(x%deviceVect)) then
call trackCudaFree(' s_multivect_cuda ',x%sizeof())
call freeMultiVecDevice(x%deviceVect)
x%deviceVect=c_null_ptr
end if

@ -35,7 +35,8 @@ module psb_z_cuda_csrg_mat_mod
use iso_c_binding
use psb_z_mat_mod
use cusparse_mod
use psb_cuda_env_mod
integer(psb_ipk_), parameter, private :: is_host = -1
integer(psb_ipk_), parameter, private :: is_sync = 0
integer(psb_ipk_), parameter, private :: is_dev = 1
@ -352,6 +353,7 @@ contains
class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a
call trackCudaFree(' z_csrg ',a%sizeof())
info = CSRGDeviceFree(a%deviceMat)
call a%psb_z_csr_sparse_mat%free()
@ -366,6 +368,7 @@ contains
type(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a
call trackCudaFree(' z_csrg ',a%sizeof())
info = CSRGDeviceFree(a%deviceMat)
return

@ -34,6 +34,7 @@ module psb_z_cuda_diag_mat_mod
use iso_c_binding
use psb_base_mod
use psb_cuda_env_mod
use psb_z_dia_mat_mod
type, extends(psb_z_dia_sparse_mat) :: psb_z_cuda_diag_sparse_mat
@ -265,8 +266,10 @@ contains
integer(psb_ipk_) :: info
class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a
if (c_associated(a%deviceMat)) &
& call freeDiagDevice(a%deviceMat)
if (c_associated(a%deviceMat)) then
call trackCudaFree(' z_diag ',a%sizeof())
call freeDiagDevice(a%deviceMat)
end if
a%deviceMat = c_null_ptr
call a%psb_z_dia_sparse_mat%free()

@ -35,6 +35,7 @@ module psb_z_cuda_dnsg_mat_mod
use iso_c_binding
use psb_z_mat_mod
use psb_z_dns_mat_mod
use psb_cuda_env_mod
use dnsdev_mod
type, extends(psb_z_dns_sparse_mat) :: psb_z_cuda_dnsg_sparse_mat
@ -251,8 +252,10 @@ contains
integer(psb_ipk_) :: info
class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a
if (c_associated(a%deviceMat)) &
& call freeDnsDevice(a%deviceMat)
if (c_associated(a%deviceMat)) then
call trackCudaFree(' z_dnsg ',a%sizeof())
call freeDnsDevice(a%deviceMat)
end if
a%deviceMat = c_null_ptr
call a%psb_z_dns_sparse_mat%free()

@ -35,6 +35,7 @@ module psb_z_cuda_elg_mat_mod
use iso_c_binding
use psb_z_mat_mod
use psb_z_ell_mat_mod
use psb_cuda_env_mod
use psb_i_cuda_vect_mod
integer(psb_ipk_), parameter, private :: is_host = -1
@ -369,8 +370,10 @@ contains
class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a
if (c_associated(a%deviceMat)) &
& call freeEllDevice(a%deviceMat)
if (c_associated(a%deviceMat)) then
call trackCudaFree(' z_elg ',a%sizeof())
call freeEllDevice(a%deviceMat)
end if
a%deviceMat = c_null_ptr
call a%psb_z_ell_sparse_mat%free()
call a%set_sync()

@ -34,6 +34,7 @@ module psb_z_cuda_hdiag_mat_mod
use iso_c_binding
use psb_base_mod
use psb_cuda_env_mod
use psb_z_hdia_mat_mod
type, extends(psb_z_hdia_sparse_mat) :: psb_z_cuda_hdiag_sparse_mat
@ -243,8 +244,10 @@ contains
integer(psb_ipk_) :: info
class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a
if (c_associated(a%deviceMat)) &
& call freeHdiagDevice(a%deviceMat)
if (c_associated(a%deviceMat)) then
call trackCudaFree(' z_hdiag ',a%sizeof())
call freeHdiagDevice(a%deviceMat)
end if
a%deviceMat = c_null_ptr
call a%psb_z_hdia_sparse_mat%free()

@ -34,6 +34,7 @@ module psb_z_cuda_hlg_mat_mod
use iso_c_binding
use psb_z_mat_mod
use psb_cuda_env_mod
use psb_z_hll_mat_mod
@ -291,8 +292,10 @@ contains
integer(psb_ipk_) :: info
class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a
if (c_associated(a%deviceMat)) &
& call freeHllDevice(a%deviceMat)
if (c_associated(a%deviceMat)) then
call trackCudaFree(' hlg_free z_hlg ',a%sizeof())
call freeHllDevice(a%deviceMat)
end if
a%deviceMat = c_null_ptr
call a%psb_z_hll_sparse_mat%free()

@ -35,6 +35,7 @@ module psb_z_cuda_hybg_mat_mod
use iso_c_binding
use psb_z_mat_mod
use psb_cuda_env_mod
use cusparse_mod
type, extends(psb_z_csr_sparse_mat) :: psb_z_cuda_hybg_sparse_mat
@ -265,6 +266,7 @@ contains
integer(psb_ipk_) :: info
class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a
call trackCudaFree(' z_hybg ',a%sizeof())
info = HYBGDeviceFree(a%deviceMat)
call a%psb_z_csr_sparse_mat%free()

@ -728,12 +728,14 @@ contains
end if
if (c_associated(x%deviceVect)) then
nd = getMultiVecDeviceSize(x%deviceVect)
if (nd < nh ) then
if (nd < nh ) then
call trackCudaFree(' z_vect_cuda ',x%sizeof())
call freeMultiVecDevice(x%deviceVect)
x%deviceVect=c_null_ptr
end if
end if
if (.not.c_associated(x%deviceVect)) then
if (.not.c_associated(x%deviceVect)) then
call trackCudaAlloc(' z_vect_cuda ',x%sizeof())
info = FallocMultiVecDevice(x%deviceVect,1,nh,spgpu_type_complex_double)
if (info /= 0) then
if (info == spgpu_outofmem) then
@ -755,6 +757,7 @@ contains
if (x%is_host()) then
if (.not.c_associated(x%deviceVect)) then
n = size(x%v)
call trackCudaAlloc(' z_vect_cuda ',x%sizeof())
info = FallocMultiVecDevice(x%deviceVect,1,n,spgpu_type_complex_double)
end if
if (info == 0) &
@ -790,6 +793,7 @@ contains
if (allocated(x%v)) deallocate(x%v, stat=info)
if (c_associated(x%deviceVect)) then
!!$ write(0,*)'d_cuda_free Calling freeMultiVecDevice'
call trackCudaFree(' z_vect_cuda ',x%sizeof())
call freeMultiVecDevice(x%deviceVect)
x%deviceVect=c_null_ptr
end if
@ -1972,12 +1976,14 @@ contains
md = getMultiVecDevicePitch(x%deviceVect)
nd = getMultiVecDeviceCount(x%deviceVect)
if ((md < mh).or.(nd<nh)) then
call trackCudaFree(' z_multivect_cuda ',x%sizeof())
call freeMultiVecDevice(x%deviceVect)
x%deviceVect=c_null_ptr
end if
end if
if (.not.c_associated(x%deviceVect)) then
if (.not.c_associated(x%deviceVect)) then
call trackCudaAlloc(' z_multivect_cuda ',x%sizeof())
info = FallocMultiVecDevice(x%deviceVect,nh,mh,spgpu_type_complex_double)
if (info == 0) &
& call psb_realloc(getMultiVecDevicePitch(x%deviceVect),&
@ -2042,6 +2048,7 @@ contains
info = 0
if (c_associated(x%deviceVect)) then
call trackCudaFree(' z_multivect_cuda ',x%sizeof())
call freeMultiVecDevice(x%deviceVect)
x%deviceVect=c_null_ptr
end if
@ -2063,6 +2070,7 @@ contains
info = 0
if (c_associated(x%deviceVect)) then
call trackCudaFree(' z_multivect_cuda ',x%sizeof())
call freeMultiVecDevice(x%deviceVect)
x%deviceVect=c_null_ptr
end if

@ -97,7 +97,7 @@ int allocMultiVecDevice(void ** remoteMultiVec, struct MultiVectorDeviceParams *
else
tmp->pitch_ = (((params->size*sizeof(int) + 255)/256)*256)/sizeof(int);
//fprintf(stderr,"Allocating an INT vector %ld\n",tmp->pitch_*tmp->count_*sizeof(double));
tmp->msize_ = tmp->pitch_*params->count*sizeof(int);
return allocRemoteBuffer((void **)&(tmp->v_), tmp->pitch_*params->count*sizeof(int));
}
else if (params->elementType == SPGPU_TYPE_FLOAT)
@ -106,7 +106,7 @@ int allocMultiVecDevice(void ** remoteMultiVec, struct MultiVectorDeviceParams *
tmp->pitch_ = params->size;
else
tmp->pitch_ = (((params->size*sizeof(float) + 255)/256)*256)/sizeof(float);
tmp->msize_ = tmp->pitch_*params->count*sizeof(float);
return allocRemoteBuffer((void **)&(tmp->v_), tmp->pitch_*params->count*sizeof(float));
}
else if (params->elementType == SPGPU_TYPE_DOUBLE)
@ -117,7 +117,7 @@ int allocMultiVecDevice(void ** remoteMultiVec, struct MultiVectorDeviceParams *
else
tmp->pitch_ = (int)(((params->size*sizeof(double) + 255)/256)*256)/sizeof(double);
//fprintf(stderr,"Allocating a DOUBLE vector %ld\n",tmp->pitch_*tmp->count_*sizeof(double));
tmp->msize_ = tmp->pitch_*params->count*sizeof(double);
return allocRemoteBuffer((void **)&(tmp->v_), tmp->pitch_*tmp->count_*sizeof(double));
}
else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT)
@ -126,6 +126,7 @@ int allocMultiVecDevice(void ** remoteMultiVec, struct MultiVectorDeviceParams *
tmp->pitch_ = params->size;
else
tmp->pitch_ = (int)(((params->size*sizeof(cuFloatComplex) + 255)/256)*256)/sizeof(cuFloatComplex);
tmp->msize_ = tmp->pitch_*params->count*sizeof(cuFloatComplex);
return allocRemoteBuffer((void **)&(tmp->v_), tmp->pitch_*tmp->count_*sizeof(cuFloatComplex));
}
else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE)
@ -134,6 +135,7 @@ int allocMultiVecDevice(void ** remoteMultiVec, struct MultiVectorDeviceParams *
tmp->pitch_ = params->size;
else
tmp->pitch_ = (int)(((params->size*sizeof(cuDoubleComplex) + 255)/256)*256)/sizeof(cuDoubleComplex);
tmp->msize_ = tmp->pitch_*params->count*sizeof(cuDoubleComplex);
return allocRemoteBuffer((void **)&(tmp->v_), tmp->pitch_*tmp->count_*sizeof(cuDoubleComplex));
}
else
@ -153,7 +155,11 @@ void freeMultiVecDevice(void* deviceVec)
// fprintf(stderr,"freeMultiVecDevice\n");
if (devVec != NULL) {
//fprintf(stderr,"Before freeMultiVecDevice% ld\n",devVec->pitch_*devVec->count_*sizeof(double));
#if 0&& defined(TRACK_CUDA_MALLOC)
freeAndTrackRemoteBuffer(devVec->v_,devVec->msize_);
#else
freeRemoteBuffer(devVec->v_);
#endif
free(deviceVec);
}
}

@ -45,9 +45,12 @@ struct MultiVectDevice
//number of elements for a single vector
int size_;
//pithc in number of elements
//pitch in number of elements
int pitch_;
// malloc size
int msize_;
// Vectors in device memory (single allocation)
void *v_;
};

@ -594,7 +594,7 @@ program pdgenmv
! solver parameters
integer(psb_epk_) :: amatsize, precsize, descsize, annz, nbytes
real(psb_dpk_) :: err, eps
real(psb_dpk_) :: err, eps, td
integer, parameter :: ntests=200, ngpu=50, ncnv=20
type(psb_d_coo_sparse_mat), target :: acoo
type(psb_d_csr_sparse_mat), target :: acsr
@ -823,6 +823,28 @@ program pdgenmv
! FIXME: cache flush needed here
call xg%set(x0)
call xg%sync()
call psb_barrier(ctxt)
gt1 = psb_wtime()
do i=1,ntests*ngpu
td = psb_gedot(xg,bg,desc_a,info)
! For timing purposes we need to make sure all threads
! in the device are done.
if ((info /= 0).or.(psb_get_errstatus()/=0)) then
write(0,*) 'From 1 gedot',info,i,ntests
call psb_error()
stop
end if
end do
call psb_cuda_DeviceSync()
call psb_barrier(ctxt)
gt2 = psb_wtime() - gt1
call psb_amx(ctxt,gt2)
write(0,*) 'DOT time : ',gt2,gt2/(ntests*ngpu)
call psb_barrier(ctxt)
gt1 = psb_wtime()
do i=1,ntests*ngpu

Loading…
Cancel
Save