Rework configry for CUDA

repack-newsolve
sfilippone 3 months ago
parent ee56c6be3c
commit c74be820ea

@ -80,11 +80,14 @@ LCUDA=@LCUDA@
SPGPU_LIBS=@SPGPU_LIBS@
CUDA_DIR=@CUDA_DIR@
CUDA_DEFINES=@CUDA_DEFINES@
CUDA_INCLUDES=@CUDA_INCLUDES@
CUDA_LIBS=@CUDA_LIBS@
CUDA_VERSION=@CUDA_VERSION@
CUDA_SHORT_VERSION=@CUDA_SHORT_VERSION@
CUDA_DEFINES=@CUDA_DEFINES@
FCUDEFINES=@FCUDEFINES@
CCUDEFINES=@CCUDEFINES@
CXXCUDEFINES=@CXXCUDEFINES@
NVCC=@CUDA_NVCC@
CUDEFINES=@CUDEFINES@

28
configure vendored

@ -668,6 +668,9 @@ CUDA_SHORT_VERSION
CUDA_VERSION
CUDA_LIBS
CUDA_INCLUDES
CXXCUDEFINES
CCUDEFINES
FCUDEFINES
CUDA_DEFINES
CUDA_DIR
EXTRALDLIBS
@ -10856,9 +10859,10 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
LIBS="$SAVE_LIBS"
CPPFLAGS="$SAVE_CPPFLAGS"
HAVE_CUDA="yes";
CUDA_VERSION="$pac_cv_cuda_version";
CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000);
HAVE_CUDA="yes";
CUDA_DEFINES="-DHAVE_CUDA -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}";
SPGPU_LIBS="-lspgpu";
CUDAD=cudad;
CUDALD=cudald;
@ -10879,7 +10883,7 @@ fi
if test "x$pac_cv_cudacc" == "x"; then
pac_cv_cudacc="50,60,70,75";
pac_cv_cudacc="50,60,70,75,80,86";
CUDA_CC="$pac_cv_cudacc";
fi
if (( $pac_cv_cuda_version >= 11070 ))
@ -10891,9 +10895,10 @@ fi
CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc";
done
if test "x$pac_cv_cuda_version" != "xunknown"; then
CUDEFINES="$CUDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"
FDEFINES="$FDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"
CDEFINES="$CDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"
CUDEFINES="$CUDEFINES ${CUDA_DEFINES}"
FCUDEFINES=" ${CUDA_DEFINES}"
CCUDEFINES=" ${CUDA_DEFINES}"
CXXCUDEFINES=" ${CUDA_DEFINES}"
fi
fi
@ -10907,8 +10912,12 @@ printf "%s\n" "$as_me: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_
CUDAD="";
CUDALD="";
CUDEFINES="";
CUDA_DEFINES="";
CUDA_INCLUDES="";
CUDA_LIBS="";
FCUDEFINES="";
CCUDEFINES="";
CXXCUDEFINES="";
fi
fi
@ -11373,9 +11382,9 @@ UTILLIBNAME=libpsb_util.a
PSBLASRULES='
PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS)
CXXDEFINES=$(PSBCXXDEFINES) $(CUDA_DEFINES)
CDEFINES=$(PSBCDEFINES) $(CUDA_DEFINES)
FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES)
CXXDEFINES=$(PSBCXXDEFINES)
CDEFINES=$(PSBCDEFINES)
FDEFINES=$(PSBFDEFINES)
# These should be portable rules, arent they?
@ -11420,6 +11429,9 @@ FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES)

@ -803,10 +803,11 @@ PAC_CHECK_CUDA()
if test "x$pac_cv_have_cuda" == "xyes"; then
PAC_CHECK_CUDA_VERSION()
CUDA_VERSION="$pac_cv_cuda_version";
CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000);
dnl PAC_CHECK_SPGPU()
HAVE_CUDA="yes";
CUDA_VERSION="$pac_cv_cuda_version";
CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000);
CUDA_DEFINES="-DHAVE_CUDA -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}";
SPGPU_LIBS="-lspgpu";
CUDAD=cudad;
CUDALD=cudald;
@ -816,7 +817,7 @@ if test "x$pac_cv_have_cuda" == "xyes"; then
PAC_ARG_WITH_CUDACC()
if test "x$pac_cv_cudacc" == "x"; then
pac_cv_cudacc="50,60,70,75";
pac_cv_cudacc="50,60,70,75,80,86";
CUDA_CC="$pac_cv_cudacc";
fi
if (( $pac_cv_cuda_version >= 11070 ))
@ -828,9 +829,10 @@ if test "x$pac_cv_have_cuda" == "xyes"; then
CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc";
done
if test "x$pac_cv_cuda_version" != "xunknown"; then
CUDEFINES="$CUDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"
FDEFINES="$FDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"
CDEFINES="$CDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"
CUDEFINES="$CUDEFINES ${CUDA_DEFINES}"
FCUDEFINES=" ${CUDA_DEFINES}"
CCUDEFINES=" ${CUDA_DEFINES}"
CXXCUDEFINES=" ${CUDA_DEFINES}"
fi
fi
@ -843,8 +845,12 @@ if test "x$pac_cv_ipk_size" != "x4"; then
CUDAD="";
CUDALD="";
CUDEFINES="";
CUDA_DEFINES="";
CUDA_INCLUDES="";
CUDA_LIBS="";
FCUDEFINES="";
CCUDEFINES="";
CXXCUDEFINES="";
fi
fi
@ -970,9 +976,9 @@ AC_SUBST(FINCLUDES)
PSBLASRULES='
PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS)
CXXDEFINES=$(PSBCXXDEFINES) $(CUDA_DEFINES)
CDEFINES=$(PSBCDEFINES) $(CUDA_DEFINES)
FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES)
CXXDEFINES=$(PSBCXXDEFINES)
CDEFINES=$(PSBCDEFINES)
FDEFINES=$(PSBFDEFINES)
# These should be portable rules, arent they?
@ -1008,6 +1014,9 @@ dnl AC_SUBST(SPGPU_INCDIR)
AC_SUBST(EXTRALDLIBS)
AC_SUBST(CUDA_DIR)
AC_SUBST(CUDA_DEFINES)
AC_SUBST(FCUDEFINES)
AC_SUBST(CCUDEFINES)
AC_SUBST(CXXCUDEFINES)
AC_SUBST(CUDA_INCLUDES)
AC_SUBST(CUDA_LIBS)
AC_SUBST(CUDA_VERSION)

@ -141,3 +141,12 @@ spgpuclean:
$(MAKE) -C spgpu clean
veryclean: clean
.c.o:
$(CC) $(CCOPT) $(CCUDEFINES) $(CINCLUDES) $(CDEFINES) -c $< -o $@
.f90.o:
$(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) -c $< -o $@
.F90.o:
$(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) $(FDEFINES) -c $< -o $@
.cpp.o:
$(CXX) $(CXXOPT) $(CXXCUDEFINES) $(CXXINCLUDES) $(CXXDEFINES) -c $< -o $@

@ -295,3 +295,12 @@ lib: objs
clean:
/bin/rm -f $(OBJS)
.c.o:
$(CC) $(CCOPT) $(CCUDEFINES) $(CINCLUDES) $(CDEFINES) -c $< -o $@
.f90.o:
$(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) -c $< -o $@
.F90.o:
$(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) $(FDEFINES) -c $< -o $@
.cpp.o:
$(CXX) $(CXXOPT) $(CXXCUDEFINES) $(CXXINCLUDES) $(CXXDEFINES) -c $< -o $@

@ -96,7 +96,6 @@ __global__ void spgpuSdot_kern(int n, float* x, float* y)
{
#endif
#ifdef ASSUME_LOCK_SYNC_PARALLELISM
volatile float* vsSum = sSum;
vsSum[threadIdx.x] = res;

@ -41,3 +41,8 @@ lib:
(cd ../../; make library)
verycleanlib:
(cd ../../; make veryclean)
%.o: %.F90
$(FC) $(FFLAGS) $(FINCLUDES) $(FCUDEFINES) -c $< -o $@
%.o: %.f90
$(FC) $(FFLAGS) $(FINCLUDES) $(FCUDEFINES) -c $< -o $@

Loading…
Cancel
Save