Fixes for compilation with CUDA 13.0

pull/28/head
sfilippone 9 months ago
parent f78c056a48
commit 390144126c

@ -51,14 +51,15 @@ lib: objs ilib cudalib spgpulib
$(AR) $(LIBNAME) $(OBJS) $(AR) $(LIBNAME) $(OBJS)
/bin/cp -p $(LIBNAME) $(LIBDIR) /bin/cp -p $(LIBNAME) $(LIBDIR)
$(COBJS): spgpuinc
objs: spgpuinc $(OBJS) iobjs cudaobjs spgpuobjs objs: spgpuinc $(OBJS) iobjs cudaobjs spgpuobjs
/bin/cp -p *$(.mod) $(MODDIR) /bin/cp -p *$(.mod) $(MODDIR)
/bin/cp -p *.h $(INCDIR) /bin/cp -p *.h $(INCDIR)
#$(COBJS): spgpuinc
spgpuinc: spgpuinc:
$(MAKE) -C spgpu includes $(MAKE) -C spgpu incl
spgpuobjs: spgpuobjs:
$(MAKE) -C spgpu objs $(MAKE) -C spgpu objs
spgpulib: spgpulib:

@ -37,6 +37,9 @@ static int hasUVA=-1;
static struct cudaDeviceProp *prop=NULL; static struct cudaDeviceProp *prop=NULL;
static spgpuHandle_t psb_cuda_handle = NULL; static spgpuHandle_t psb_cuda_handle = NULL;
static cublasHandle_t psb_cublas_handle = NULL; static cublasHandle_t psb_cublas_handle = NULL;
#if PSB_CUDA_VERSION >= 13000
static int memoryClockRate=-1;
#endif
#if defined(TRACK_CUDA_MALLOC) #if defined(TRACK_CUDA_MALLOC)
static int64_t total_cuda_mem = 0; static int64_t total_cuda_mem = 0;
#endif #endif
@ -310,17 +313,28 @@ int getGPUMultiProcessors()
int getGPUMemoryBusWidth() int getGPUMemoryBusWidth()
{ int count=0; { int count=0;
#if CUDART_VERSION >= 5000 #if PSB_CUDA_VERSION >= 5000
if (prop!=NULL) if (prop!=NULL)
count = prop->memoryBusWidth; count = prop->memoryBusWidth;
#endif #endif
return(count); return(count);
} }
#if PSB_CUDA_VERSION >= 13000
int getGPUMemoryClockRate(int dev)
#else
int getGPUMemoryClockRate() int getGPUMemoryClockRate()
#endif
{ int count=0; { int count=0;
#if CUDART_VERSION >= 5000 #if PSB_CUDA_VERSION >= 5000
#if PSB_CUDA_VERSION >= 13000
cudaDeviceGetAttribute(&memoryClockRate,
cudaDevAttrMemoryClockRate, dev);
count = memoryClockRate;
#else
if (prop!=NULL) if (prop!=NULL)
count = prop->memoryClockRate; count = prop->memoryClockRate;
#endif
#endif #endif
return(count); return(count);
} }

@ -59,7 +59,11 @@ int getDeviceHasUVA();
int setDevice(int dev); int setDevice(int dev);
int getGPUMultiProcessors(); int getGPUMultiProcessors();
int getGPUMemoryBusWidth(); int getGPUMemoryBusWidth();
#if PSB_CUDA_VERSION >= 13000
int getGPUMemoryClockRate(int dev);
#else
int getGPUMemoryClockRate(); int getGPUMemoryClockRate();
#endif
int getGPUWarpSize(); int getGPUWarpSize();
int getGPUMaxThreadsPerBlock(); int getGPUMaxThreadsPerBlock();
int getGPUMaxThreadsPerMP(); int getGPUMaxThreadsPerMP();

@ -145,11 +145,20 @@ module psb_cuda_env_mod
use iso_c_binding use iso_c_binding
integer(c_int) :: res integer(c_int) :: res
end function psb_C_get_MemoryBusWidth end function psb_C_get_MemoryBusWidth
#if (PSB_CUDA_VERSION >= 13000)
function psb_C_get_MemoryClockRate(dev) &
& result(res) bind(c,name='getGPUMemoryClockRate')
use iso_c_binding
integer(c_int), value :: dev
integer(c_int) :: res
end function psb_C_get_MemoryClockRate
#else
function psb_C_get_MemoryClockRate() & function psb_C_get_MemoryClockRate() &
& result(res) bind(c,name='getGPUMemoryClockRate') & result(res) bind(c,name='getGPUMemoryClockRate')
use iso_c_binding use iso_c_binding
integer(c_int) :: res integer(c_int) :: res
end function psb_C_get_MemoryClockRate end function psb_C_get_MemoryClockRate
#endif
function psb_C_get_WarpSize() & function psb_C_get_WarpSize() &
& result(res) bind(c,name='getGPUWarpSize') & result(res) bind(c,name='getGPUWarpSize')
use iso_c_binding use iso_c_binding
@ -174,8 +183,9 @@ module psb_cuda_env_mod
end subroutine psb_C_cpy_NameString end subroutine psb_C_cpy_NameString
end interface end interface
logical, private :: gpu_do_maybe_free_buffer = .false. logical, private, save :: gpu_do_maybe_free_buffer = .false.
integer(c_int), private, save :: myDevice
Contains Contains
function psb_cuda_get_maybe_free_buffer() result(res) function psb_cuda_get_maybe_free_buffer() result(res)
@ -217,6 +227,7 @@ Contains
if (present(dev)) then if (present(dev)) then
info = psb_C_gpu_init(dev) info = psb_C_gpu_init(dev)
myDevice = dev
else else
if (count >0) then if (count >0) then
dev_ = mod(iam,count) dev_ = mod(iam,count)
@ -224,6 +235,7 @@ Contains
dev_ = 0 dev_ = 0
end if end if
info = psb_C_gpu_init(dev_) info = psb_C_gpu_init(dev_)
myDevice = dev_
end if end if
if (info == 0) info = initFcusparse() if (info == 0) info = initFcusparse()
if (info /= 0) then if (info /= 0) then
@ -283,7 +295,11 @@ Contains
function psb_cuda_MemoryClockRate() result(res) function psb_cuda_MemoryClockRate() result(res)
integer(psb_ipk_) :: res integer(psb_ipk_) :: res
#if (PSB_CUDA_VERSION >= 13000)
res = psb_C_get_MemoryClockRate(myDevice)
#else
res = psb_C_get_MemoryClockRate() res = psb_C_get_MemoryClockRate()
#endif
end function psb_cuda_MemoryClockRate end function psb_cuda_MemoryClockRate
function psb_cuda_MemoryBusWidth() result(res) function psb_cuda_MemoryBusWidth() result(res)
@ -296,7 +312,11 @@ Contains
! Formula here: 2*ClockRate(KHz)*BusWidth(bit) ! Formula here: 2*ClockRate(KHz)*BusWidth(bit)
! normalization: bit/byte, KHz/MHz ! normalization: bit/byte, KHz/MHz
! output: MBytes/s ! output: MBytes/s
#if PSB_CUDA_VERSION >= 13000
res = 2.d0*0.125d0*1.d-3*psb_C_get_MemoryBusWidth()*psb_C_get_MemoryClockRate(myDevice)
#else
res = 2.d0*0.125d0*1.d-3*psb_C_get_MemoryBusWidth()*psb_C_get_MemoryClockRate() res = 2.d0*0.125d0*1.d-3*psb_C_get_MemoryBusWidth()*psb_C_get_MemoryClockRate()
#endif
end function psb_cuda_MemoryPeakBandwidth end function psb_cuda_MemoryPeakBandwidth
function psb_cuda_DeviceName() result(res) function psb_cuda_DeviceName() result(res)

@ -11,7 +11,7 @@ LIBNAME=libspgpu.a
OBJS=coo.o core.o dia.o ell.o hdia.o hell.o OBJS=coo.o core.o dia.o ell.o hdia.o hell.o
CU_INCLUDES=-I$(INCDIR) $(CUDA_INCLUDES) CU_INCLUDES=-I$(INCDIR) $(CUDA_INCLUDES)
all: includes objs all: incl objs
objs: $(OBJS) iobjs objs: $(OBJS) iobjs
@ -24,7 +24,7 @@ iobjs:
ilib: ilib:
$(MAKE) -C kernels lib LIBNAME=$(LIBNAME) $(MAKE) -C kernels lib LIBNAME=$(LIBNAME)
includes: incl:
/bin/cp -p *.h $(INCDIR) /bin/cp -p *.h $(INCDIR)
clean: iclean clean: iclean

@ -27,7 +27,7 @@ dir:
(if test ! -d $(EXEDIR); then mkdir $(EXEDIR); fi) (if test ! -d $(EXEDIR); then mkdir $(EXEDIR); fi)
dpdegenmv.o spdegenmv.o: data_input.o dpdegenmv.o spdegenmv.o: data_input.o
dpdegenmv: $(DPGOBJS) dpdegenmv: $(DPGOBJS)
$(FLINK) $(LOPT) $(DPGOBJS) -o dpdegenmv $(FINCLUDES) $(PSBLAS_LIB) $(LDLIBS) $(FLINK) $(LOPT) $(DPGOBJS) -o dpdegenmv $(FINCLUDES) $(PSBLAS_LIB) $(LDLIBS) -lstdc++
/bin/mv dpdegenmv $(EXEDIR) /bin/mv dpdegenmv $(EXEDIR)
spdegenmv: $(SPGOBJS) spdegenmv: $(SPGOBJS)
$(FLINK) $(LOPT) $(SPGOBJS) -o spdegenmv $(PSBLAS_LIB) $(LDLIBS) $(FLINK) $(LOPT) $(SPGOBJS) -o spdegenmv $(PSBLAS_LIB) $(LDLIBS)

Loading…
Cancel
Save