From 390144126cbbdbe74820f46f4bfe80b923d3177d Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 25 Sep 2025 13:35:30 +0200 Subject: [PATCH] Fixes for compilation with CUDA 13.0 --- cuda/Makefile | 5 +++-- cuda/cuda_util.c | 18 ++++++++++++++++-- cuda/cuda_util.h | 4 ++++ cuda/psb_cuda_env_mod.F90 | 24 ++++++++++++++++++++++-- cuda/spgpu/Makefile | 4 ++-- test/cudakern/Makefile | 2 +- 6 files changed, 48 insertions(+), 9 deletions(-) diff --git a/cuda/Makefile b/cuda/Makefile index b47e58ce..5e0aaa28 100755 --- a/cuda/Makefile +++ b/cuda/Makefile @@ -51,14 +51,15 @@ lib: objs ilib cudalib spgpulib $(AR) $(LIBNAME) $(OBJS) /bin/cp -p $(LIBNAME) $(LIBDIR) -$(COBJS): spgpuinc objs: spgpuinc $(OBJS) iobjs cudaobjs spgpuobjs /bin/cp -p *$(.mod) $(MODDIR) /bin/cp -p *.h $(INCDIR) +#$(COBJS): spgpuinc + spgpuinc: - $(MAKE) -C spgpu includes + $(MAKE) -C spgpu incl spgpuobjs: $(MAKE) -C spgpu objs spgpulib: diff --git a/cuda/cuda_util.c b/cuda/cuda_util.c index 8ae2e663..f7414695 100644 --- a/cuda/cuda_util.c +++ b/cuda/cuda_util.c @@ -37,6 +37,9 @@ static int hasUVA=-1; static struct cudaDeviceProp *prop=NULL; static spgpuHandle_t psb_cuda_handle = NULL; static cublasHandle_t psb_cublas_handle = NULL; +#if PSB_CUDA_VERSION >= 13000 +static int memoryClockRate=-1; +#endif #if defined(TRACK_CUDA_MALLOC) static int64_t total_cuda_mem = 0; #endif @@ -310,17 +313,28 @@ int getGPUMultiProcessors() int getGPUMemoryBusWidth() { int count=0; -#if CUDART_VERSION >= 5000 +#if PSB_CUDA_VERSION >= 5000 if (prop!=NULL) count = prop->memoryBusWidth; #endif return(count); } + +#if PSB_CUDA_VERSION >= 13000 +int getGPUMemoryClockRate(int dev) +#else int getGPUMemoryClockRate() +#endif { int count=0; -#if CUDART_VERSION >= 5000 +#if PSB_CUDA_VERSION >= 5000 +#if PSB_CUDA_VERSION >= 13000 + cudaDeviceGetAttribute(&memoryClockRate, + cudaDevAttrMemoryClockRate, dev); + count = memoryClockRate; +#else if (prop!=NULL) count = prop->memoryClockRate; +#endif #endif return(count); } diff --git a/cuda/cuda_util.h b/cuda/cuda_util.h index 609e6f43..0c2cce4f 100644 --- a/cuda/cuda_util.h +++ b/cuda/cuda_util.h @@ -59,7 +59,11 @@ int getDeviceHasUVA(); int setDevice(int dev); int getGPUMultiProcessors(); int getGPUMemoryBusWidth(); +#if PSB_CUDA_VERSION >= 13000 +int getGPUMemoryClockRate(int dev); +#else int getGPUMemoryClockRate(); +#endif int getGPUWarpSize(); int getGPUMaxThreadsPerBlock(); int getGPUMaxThreadsPerMP(); diff --git a/cuda/psb_cuda_env_mod.F90 b/cuda/psb_cuda_env_mod.F90 index d1cee398..58250dbc 100644 --- a/cuda/psb_cuda_env_mod.F90 +++ b/cuda/psb_cuda_env_mod.F90 @@ -145,11 +145,20 @@ module psb_cuda_env_mod use iso_c_binding integer(c_int) :: res end function psb_C_get_MemoryBusWidth +#if (PSB_CUDA_VERSION >= 13000) + function psb_C_get_MemoryClockRate(dev) & + & result(res) bind(c,name='getGPUMemoryClockRate') + use iso_c_binding + integer(c_int), value :: dev + integer(c_int) :: res + end function psb_C_get_MemoryClockRate +#else function psb_C_get_MemoryClockRate() & & result(res) bind(c,name='getGPUMemoryClockRate') use iso_c_binding integer(c_int) :: res end function psb_C_get_MemoryClockRate +#endif function psb_C_get_WarpSize() & & result(res) bind(c,name='getGPUWarpSize') use iso_c_binding @@ -174,8 +183,9 @@ module psb_cuda_env_mod end subroutine psb_C_cpy_NameString end interface - logical, private :: gpu_do_maybe_free_buffer = .false. - + logical, private, save :: gpu_do_maybe_free_buffer = .false. + integer(c_int), private, save :: myDevice + Contains function psb_cuda_get_maybe_free_buffer() result(res) @@ -217,6 +227,7 @@ Contains if (present(dev)) then info = psb_C_gpu_init(dev) + myDevice = dev else if (count >0) then dev_ = mod(iam,count) @@ -224,6 +235,7 @@ Contains dev_ = 0 end if info = psb_C_gpu_init(dev_) + myDevice = dev_ end if if (info == 0) info = initFcusparse() if (info /= 0) then @@ -283,7 +295,11 @@ Contains function psb_cuda_MemoryClockRate() result(res) integer(psb_ipk_) :: res +#if (PSB_CUDA_VERSION >= 13000) + res = psb_C_get_MemoryClockRate(myDevice) +#else res = psb_C_get_MemoryClockRate() +#endif end function psb_cuda_MemoryClockRate function psb_cuda_MemoryBusWidth() result(res) @@ -296,7 +312,11 @@ Contains ! Formula here: 2*ClockRate(KHz)*BusWidth(bit) ! normalization: bit/byte, KHz/MHz ! output: MBytes/s +#if PSB_CUDA_VERSION >= 13000 + res = 2.d0*0.125d0*1.d-3*psb_C_get_MemoryBusWidth()*psb_C_get_MemoryClockRate(myDevice) +#else res = 2.d0*0.125d0*1.d-3*psb_C_get_MemoryBusWidth()*psb_C_get_MemoryClockRate() +#endif end function psb_cuda_MemoryPeakBandwidth function psb_cuda_DeviceName() result(res) diff --git a/cuda/spgpu/Makefile b/cuda/spgpu/Makefile index ae0b1be6..75302a50 100644 --- a/cuda/spgpu/Makefile +++ b/cuda/spgpu/Makefile @@ -11,7 +11,7 @@ LIBNAME=libspgpu.a OBJS=coo.o core.o dia.o ell.o hdia.o hell.o CU_INCLUDES=-I$(INCDIR) $(CUDA_INCLUDES) -all: includes objs +all: incl objs objs: $(OBJS) iobjs @@ -24,7 +24,7 @@ iobjs: ilib: $(MAKE) -C kernels lib LIBNAME=$(LIBNAME) -includes: +incl: /bin/cp -p *.h $(INCDIR) clean: iclean diff --git a/test/cudakern/Makefile b/test/cudakern/Makefile index e1c3b78d..a7752f0a 100755 --- a/test/cudakern/Makefile +++ b/test/cudakern/Makefile @@ -27,7 +27,7 @@ dir: (if test ! -d $(EXEDIR); then mkdir $(EXEDIR); fi) dpdegenmv.o spdegenmv.o: data_input.o dpdegenmv: $(DPGOBJS) - $(FLINK) $(LOPT) $(DPGOBJS) -o dpdegenmv $(FINCLUDES) $(PSBLAS_LIB) $(LDLIBS) + $(FLINK) $(LOPT) $(DPGOBJS) -o dpdegenmv $(FINCLUDES) $(PSBLAS_LIB) $(LDLIBS) -lstdc++ /bin/mv dpdegenmv $(EXEDIR) spdegenmv: $(SPGOBJS) $(FLINK) $(LOPT) $(SPGOBJS) -o spdegenmv $(PSBLAS_LIB) $(LDLIBS)