From c74be820ea1ef6556168359f445db8a1f274d038 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 8 Oct 2024 11:48:15 +0200 Subject: [PATCH] Rework configry for CUDA --- Make.inc.in | 5 ++++- configure | 28 ++++++++++++++++++++-------- configure.ac | 27 ++++++++++++++++++--------- cuda/Makefile | 9 +++++++++ cuda/impl/Makefile | 9 +++++++++ cuda/spgpu/kernels/sdot.cu | 1 - test/cudakern/Makefile | 5 +++++ 7 files changed, 65 insertions(+), 19 deletions(-) diff --git a/Make.inc.in b/Make.inc.in index 38c8ef86..a62abd50 100755 --- a/Make.inc.in +++ b/Make.inc.in @@ -80,11 +80,14 @@ LCUDA=@LCUDA@ SPGPU_LIBS=@SPGPU_LIBS@ CUDA_DIR=@CUDA_DIR@ -CUDA_DEFINES=@CUDA_DEFINES@ CUDA_INCLUDES=@CUDA_INCLUDES@ CUDA_LIBS=@CUDA_LIBS@ CUDA_VERSION=@CUDA_VERSION@ CUDA_SHORT_VERSION=@CUDA_SHORT_VERSION@ +CUDA_DEFINES=@CUDA_DEFINES@ +FCUDEFINES=@FCUDEFINES@ +CCUDEFINES=@CCUDEFINES@ +CXXCUDEFINES=@CXXCUDEFINES@ NVCC=@CUDA_NVCC@ CUDEFINES=@CUDEFINES@ diff --git a/configure b/configure index dbc419fe..de173d6e 100755 --- a/configure +++ b/configure @@ -668,6 +668,9 @@ CUDA_SHORT_VERSION CUDA_VERSION CUDA_LIBS CUDA_INCLUDES +CXXCUDEFINES +CCUDEFINES +FCUDEFINES CUDA_DEFINES CUDA_DIR EXTRALDLIBS @@ -10856,9 +10859,10 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu LIBS="$SAVE_LIBS" CPPFLAGS="$SAVE_CPPFLAGS" + HAVE_CUDA="yes"; CUDA_VERSION="$pac_cv_cuda_version"; CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000); - HAVE_CUDA="yes"; + CUDA_DEFINES="-DHAVE_CUDA -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"; SPGPU_LIBS="-lspgpu"; CUDAD=cudad; CUDALD=cudald; @@ -10879,7 +10883,7 @@ fi if test "x$pac_cv_cudacc" == "x"; then - pac_cv_cudacc="50,60,70,75"; + pac_cv_cudacc="50,60,70,75,80,86"; CUDA_CC="$pac_cv_cudacc"; fi if (( $pac_cv_cuda_version >= 11070 )) @@ -10891,9 +10895,10 @@ fi CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc"; done if test "x$pac_cv_cuda_version" != "xunknown"; then - CUDEFINES="$CUDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" - FDEFINES="$FDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" - CDEFINES="$CDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" + CUDEFINES="$CUDEFINES ${CUDA_DEFINES}" + FCUDEFINES=" ${CUDA_DEFINES}" + CCUDEFINES=" ${CUDA_DEFINES}" + CXXCUDEFINES=" ${CUDA_DEFINES}" fi fi @@ -10907,8 +10912,12 @@ printf "%s\n" "$as_me: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ CUDAD=""; CUDALD=""; CUDEFINES=""; + CUDA_DEFINES=""; CUDA_INCLUDES=""; CUDA_LIBS=""; + FCUDEFINES=""; + CCUDEFINES=""; + CXXCUDEFINES=""; fi fi @@ -11373,9 +11382,9 @@ UTILLIBNAME=libpsb_util.a PSBLASRULES=' PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS) -CXXDEFINES=$(PSBCXXDEFINES) $(CUDA_DEFINES) -CDEFINES=$(PSBCDEFINES) $(CUDA_DEFINES) -FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES) +CXXDEFINES=$(PSBCXXDEFINES) +CDEFINES=$(PSBCDEFINES) +FDEFINES=$(PSBFDEFINES) # These should be portable rules, arent they? @@ -11420,6 +11429,9 @@ FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES) + + + diff --git a/configure.ac b/configure.ac index 5966801c..c670f6c8 100755 --- a/configure.ac +++ b/configure.ac @@ -803,10 +803,11 @@ PAC_CHECK_CUDA() if test "x$pac_cv_have_cuda" == "xyes"; then PAC_CHECK_CUDA_VERSION() - CUDA_VERSION="$pac_cv_cuda_version"; - CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000); dnl PAC_CHECK_SPGPU() HAVE_CUDA="yes"; + CUDA_VERSION="$pac_cv_cuda_version"; + CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000); + CUDA_DEFINES="-DHAVE_CUDA -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"; SPGPU_LIBS="-lspgpu"; CUDAD=cudad; CUDALD=cudald; @@ -816,7 +817,7 @@ if test "x$pac_cv_have_cuda" == "xyes"; then PAC_ARG_WITH_CUDACC() if test "x$pac_cv_cudacc" == "x"; then - pac_cv_cudacc="50,60,70,75"; + pac_cv_cudacc="50,60,70,75,80,86"; CUDA_CC="$pac_cv_cudacc"; fi if (( $pac_cv_cuda_version >= 11070 )) @@ -828,9 +829,10 @@ if test "x$pac_cv_have_cuda" == "xyes"; then CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc"; done if test "x$pac_cv_cuda_version" != "xunknown"; then - CUDEFINES="$CUDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" - FDEFINES="$FDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" - CDEFINES="$CDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" + CUDEFINES="$CUDEFINES ${CUDA_DEFINES}" + FCUDEFINES=" ${CUDA_DEFINES}" + CCUDEFINES=" ${CUDA_DEFINES}" + CXXCUDEFINES=" ${CUDA_DEFINES}" fi fi @@ -843,8 +845,12 @@ if test "x$pac_cv_ipk_size" != "x4"; then CUDAD=""; CUDALD=""; CUDEFINES=""; + CUDA_DEFINES=""; CUDA_INCLUDES=""; CUDA_LIBS=""; + FCUDEFINES=""; + CCUDEFINES=""; + CXXCUDEFINES=""; fi fi @@ -970,9 +976,9 @@ AC_SUBST(FINCLUDES) PSBLASRULES=' PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS) -CXXDEFINES=$(PSBCXXDEFINES) $(CUDA_DEFINES) -CDEFINES=$(PSBCDEFINES) $(CUDA_DEFINES) -FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES) +CXXDEFINES=$(PSBCXXDEFINES) +CDEFINES=$(PSBCDEFINES) +FDEFINES=$(PSBFDEFINES) # These should be portable rules, arent they? @@ -1008,6 +1014,9 @@ dnl AC_SUBST(SPGPU_INCDIR) AC_SUBST(EXTRALDLIBS) AC_SUBST(CUDA_DIR) AC_SUBST(CUDA_DEFINES) +AC_SUBST(FCUDEFINES) +AC_SUBST(CCUDEFINES) +AC_SUBST(CXXCUDEFINES) AC_SUBST(CUDA_INCLUDES) AC_SUBST(CUDA_LIBS) AC_SUBST(CUDA_VERSION) diff --git a/cuda/Makefile b/cuda/Makefile index 7e428629..a6757fe7 100755 --- a/cuda/Makefile +++ b/cuda/Makefile @@ -141,3 +141,12 @@ spgpuclean: $(MAKE) -C spgpu clean veryclean: clean + +.c.o: + $(CC) $(CCOPT) $(CCUDEFINES) $(CINCLUDES) $(CDEFINES) -c $< -o $@ +.f90.o: + $(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) -c $< -o $@ +.F90.o: + $(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) $(FDEFINES) -c $< -o $@ +.cpp.o: + $(CXX) $(CXXOPT) $(CXXCUDEFINES) $(CXXINCLUDES) $(CXXDEFINES) -c $< -o $@ diff --git a/cuda/impl/Makefile b/cuda/impl/Makefile index 12bf0747..9ceb4575 100755 --- a/cuda/impl/Makefile +++ b/cuda/impl/Makefile @@ -295,3 +295,12 @@ lib: objs clean: /bin/rm -f $(OBJS) + +.c.o: + $(CC) $(CCOPT) $(CCUDEFINES) $(CINCLUDES) $(CDEFINES) -c $< -o $@ +.f90.o: + $(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) -c $< -o $@ +.F90.o: + $(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) $(FDEFINES) -c $< -o $@ +.cpp.o: + $(CXX) $(CXXOPT) $(CXXCUDEFINES) $(CXXINCLUDES) $(CXXDEFINES) -c $< -o $@ diff --git a/cuda/spgpu/kernels/sdot.cu b/cuda/spgpu/kernels/sdot.cu index c19c7710..f91fab43 100644 --- a/cuda/spgpu/kernels/sdot.cu +++ b/cuda/spgpu/kernels/sdot.cu @@ -96,7 +96,6 @@ __global__ void spgpuSdot_kern(int n, float* x, float* y) { #endif - #ifdef ASSUME_LOCK_SYNC_PARALLELISM volatile float* vsSum = sSum; vsSum[threadIdx.x] = res; diff --git a/test/cudakern/Makefile b/test/cudakern/Makefile index 41cef197..4b8091d5 100755 --- a/test/cudakern/Makefile +++ b/test/cudakern/Makefile @@ -41,3 +41,8 @@ lib: (cd ../../; make library) verycleanlib: (cd ../../; make veryclean) + +%.o: %.F90 + $(FC) $(FFLAGS) $(FINCLUDES) $(FCUDEFINES) -c $< -o $@ +%.o: %.f90 + $(FC) $(FFLAGS) $(FINCLUDES) $(FCUDEFINES) -c $< -o $@