Rework configry for CUDA

repack-newsolve
sfilippone 4 months ago
parent ee56c6be3c
commit c74be820ea

@ -80,11 +80,14 @@ LCUDA=@LCUDA@
SPGPU_LIBS=@SPGPU_LIBS@ SPGPU_LIBS=@SPGPU_LIBS@
CUDA_DIR=@CUDA_DIR@ CUDA_DIR=@CUDA_DIR@
CUDA_DEFINES=@CUDA_DEFINES@
CUDA_INCLUDES=@CUDA_INCLUDES@ CUDA_INCLUDES=@CUDA_INCLUDES@
CUDA_LIBS=@CUDA_LIBS@ CUDA_LIBS=@CUDA_LIBS@
CUDA_VERSION=@CUDA_VERSION@ CUDA_VERSION=@CUDA_VERSION@
CUDA_SHORT_VERSION=@CUDA_SHORT_VERSION@ CUDA_SHORT_VERSION=@CUDA_SHORT_VERSION@
CUDA_DEFINES=@CUDA_DEFINES@
FCUDEFINES=@FCUDEFINES@
CCUDEFINES=@CCUDEFINES@
CXXCUDEFINES=@CXXCUDEFINES@
NVCC=@CUDA_NVCC@ NVCC=@CUDA_NVCC@
CUDEFINES=@CUDEFINES@ CUDEFINES=@CUDEFINES@

28
configure vendored

@ -668,6 +668,9 @@ CUDA_SHORT_VERSION
CUDA_VERSION CUDA_VERSION
CUDA_LIBS CUDA_LIBS
CUDA_INCLUDES CUDA_INCLUDES
CXXCUDEFINES
CCUDEFINES
FCUDEFINES
CUDA_DEFINES CUDA_DEFINES
CUDA_DIR CUDA_DIR
EXTRALDLIBS EXTRALDLIBS
@ -10856,9 +10859,10 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
LIBS="$SAVE_LIBS" LIBS="$SAVE_LIBS"
CPPFLAGS="$SAVE_CPPFLAGS" CPPFLAGS="$SAVE_CPPFLAGS"
HAVE_CUDA="yes";
CUDA_VERSION="$pac_cv_cuda_version"; CUDA_VERSION="$pac_cv_cuda_version";
CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000); CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000);
HAVE_CUDA="yes"; CUDA_DEFINES="-DHAVE_CUDA -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}";
SPGPU_LIBS="-lspgpu"; SPGPU_LIBS="-lspgpu";
CUDAD=cudad; CUDAD=cudad;
CUDALD=cudald; CUDALD=cudald;
@ -10879,7 +10883,7 @@ fi
if test "x$pac_cv_cudacc" == "x"; then if test "x$pac_cv_cudacc" == "x"; then
pac_cv_cudacc="50,60,70,75"; pac_cv_cudacc="50,60,70,75,80,86";
CUDA_CC="$pac_cv_cudacc"; CUDA_CC="$pac_cv_cudacc";
fi fi
if (( $pac_cv_cuda_version >= 11070 )) if (( $pac_cv_cuda_version >= 11070 ))
@ -10891,9 +10895,10 @@ fi
CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc"; CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc";
done done
if test "x$pac_cv_cuda_version" != "xunknown"; then if test "x$pac_cv_cuda_version" != "xunknown"; then
CUDEFINES="$CUDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" CUDEFINES="$CUDEFINES ${CUDA_DEFINES}"
FDEFINES="$FDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" FCUDEFINES=" ${CUDA_DEFINES}"
CDEFINES="$CDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" CCUDEFINES=" ${CUDA_DEFINES}"
CXXCUDEFINES=" ${CUDA_DEFINES}"
fi fi
fi fi
@ -10907,8 +10912,12 @@ printf "%s\n" "$as_me: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_
CUDAD=""; CUDAD="";
CUDALD=""; CUDALD="";
CUDEFINES=""; CUDEFINES="";
CUDA_DEFINES="";
CUDA_INCLUDES=""; CUDA_INCLUDES="";
CUDA_LIBS=""; CUDA_LIBS="";
FCUDEFINES="";
CCUDEFINES="";
CXXCUDEFINES="";
fi fi
fi fi
@ -11373,9 +11382,9 @@ UTILLIBNAME=libpsb_util.a
PSBLASRULES=' PSBLASRULES='
PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS) PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS)
CXXDEFINES=$(PSBCXXDEFINES) $(CUDA_DEFINES) CXXDEFINES=$(PSBCXXDEFINES)
CDEFINES=$(PSBCDEFINES) $(CUDA_DEFINES) CDEFINES=$(PSBCDEFINES)
FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES) FDEFINES=$(PSBFDEFINES)
# These should be portable rules, arent they? # These should be portable rules, arent they?
@ -11420,6 +11429,9 @@ FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES)

@ -803,10 +803,11 @@ PAC_CHECK_CUDA()
if test "x$pac_cv_have_cuda" == "xyes"; then if test "x$pac_cv_have_cuda" == "xyes"; then
PAC_CHECK_CUDA_VERSION() PAC_CHECK_CUDA_VERSION()
CUDA_VERSION="$pac_cv_cuda_version";
CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000);
dnl PAC_CHECK_SPGPU() dnl PAC_CHECK_SPGPU()
HAVE_CUDA="yes"; HAVE_CUDA="yes";
CUDA_VERSION="$pac_cv_cuda_version";
CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000);
CUDA_DEFINES="-DHAVE_CUDA -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}";
SPGPU_LIBS="-lspgpu"; SPGPU_LIBS="-lspgpu";
CUDAD=cudad; CUDAD=cudad;
CUDALD=cudald; CUDALD=cudald;
@ -816,7 +817,7 @@ if test "x$pac_cv_have_cuda" == "xyes"; then
PAC_ARG_WITH_CUDACC() PAC_ARG_WITH_CUDACC()
if test "x$pac_cv_cudacc" == "x"; then if test "x$pac_cv_cudacc" == "x"; then
pac_cv_cudacc="50,60,70,75"; pac_cv_cudacc="50,60,70,75,80,86";
CUDA_CC="$pac_cv_cudacc"; CUDA_CC="$pac_cv_cudacc";
fi fi
if (( $pac_cv_cuda_version >= 11070 )) if (( $pac_cv_cuda_version >= 11070 ))
@ -828,9 +829,10 @@ if test "x$pac_cv_have_cuda" == "xyes"; then
CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc"; CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc";
done done
if test "x$pac_cv_cuda_version" != "xunknown"; then if test "x$pac_cv_cuda_version" != "xunknown"; then
CUDEFINES="$CUDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" CUDEFINES="$CUDEFINES ${CUDA_DEFINES}"
FDEFINES="$FDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" FCUDEFINES=" ${CUDA_DEFINES}"
CDEFINES="$CDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" CCUDEFINES=" ${CUDA_DEFINES}"
CXXCUDEFINES=" ${CUDA_DEFINES}"
fi fi
fi fi
@ -843,8 +845,12 @@ if test "x$pac_cv_ipk_size" != "x4"; then
CUDAD=""; CUDAD="";
CUDALD=""; CUDALD="";
CUDEFINES=""; CUDEFINES="";
CUDA_DEFINES="";
CUDA_INCLUDES=""; CUDA_INCLUDES="";
CUDA_LIBS=""; CUDA_LIBS="";
FCUDEFINES="";
CCUDEFINES="";
CXXCUDEFINES="";
fi fi
fi fi
@ -970,9 +976,9 @@ AC_SUBST(FINCLUDES)
PSBLASRULES=' PSBLASRULES='
PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS) PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS)
CXXDEFINES=$(PSBCXXDEFINES) $(CUDA_DEFINES) CXXDEFINES=$(PSBCXXDEFINES)
CDEFINES=$(PSBCDEFINES) $(CUDA_DEFINES) CDEFINES=$(PSBCDEFINES)
FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES) FDEFINES=$(PSBFDEFINES)
# These should be portable rules, arent they? # These should be portable rules, arent they?
@ -1008,6 +1014,9 @@ dnl AC_SUBST(SPGPU_INCDIR)
AC_SUBST(EXTRALDLIBS) AC_SUBST(EXTRALDLIBS)
AC_SUBST(CUDA_DIR) AC_SUBST(CUDA_DIR)
AC_SUBST(CUDA_DEFINES) AC_SUBST(CUDA_DEFINES)
AC_SUBST(FCUDEFINES)
AC_SUBST(CCUDEFINES)
AC_SUBST(CXXCUDEFINES)
AC_SUBST(CUDA_INCLUDES) AC_SUBST(CUDA_INCLUDES)
AC_SUBST(CUDA_LIBS) AC_SUBST(CUDA_LIBS)
AC_SUBST(CUDA_VERSION) AC_SUBST(CUDA_VERSION)

@ -141,3 +141,12 @@ spgpuclean:
$(MAKE) -C spgpu clean $(MAKE) -C spgpu clean
veryclean: clean veryclean: clean
.c.o:
$(CC) $(CCOPT) $(CCUDEFINES) $(CINCLUDES) $(CDEFINES) -c $< -o $@
.f90.o:
$(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) -c $< -o $@
.F90.o:
$(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) $(FDEFINES) -c $< -o $@
.cpp.o:
$(CXX) $(CXXOPT) $(CXXCUDEFINES) $(CXXINCLUDES) $(CXXDEFINES) -c $< -o $@

@ -295,3 +295,12 @@ lib: objs
clean: clean:
/bin/rm -f $(OBJS) /bin/rm -f $(OBJS)
.c.o:
$(CC) $(CCOPT) $(CCUDEFINES) $(CINCLUDES) $(CDEFINES) -c $< -o $@
.f90.o:
$(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) -c $< -o $@
.F90.o:
$(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) $(FDEFINES) -c $< -o $@
.cpp.o:
$(CXX) $(CXXOPT) $(CXXCUDEFINES) $(CXXINCLUDES) $(CXXDEFINES) -c $< -o $@

@ -96,7 +96,6 @@ __global__ void spgpuSdot_kern(int n, float* x, float* y)
{ {
#endif #endif
#ifdef ASSUME_LOCK_SYNC_PARALLELISM #ifdef ASSUME_LOCK_SYNC_PARALLELISM
volatile float* vsSum = sSum; volatile float* vsSum = sSum;
vsSum[threadIdx.x] = res; vsSum[threadIdx.x] = res;

@ -41,3 +41,8 @@ lib:
(cd ../../; make library) (cd ../../; make library)
verycleanlib: verycleanlib:
(cd ../../; make veryclean) (cd ../../; make veryclean)
%.o: %.F90
$(FC) $(FFLAGS) $(FINCLUDES) $(FCUDEFINES) -c $< -o $@
%.o: %.f90
$(FC) $(FFLAGS) $(FINCLUDES) $(FCUDEFINES) -c $< -o $@

Loading…
Cancel
Save