Merge branch 'repackage' of github.com:sfilippone/psblas3 into repackage

repack-newsolve
Salvatore Filippone 2 months ago
commit f1d21b1c95

@ -184,7 +184,7 @@ int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n,
} }
int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA) int nrm2MultiVecDeviceFloatComplex(float* y_res, int n, void* devMultiVecA)
{ int i=0; { int i=0;
spgpuHandle_t handle=psb_cudaGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
@ -194,7 +194,7 @@ int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiV
return(i); return(i);
} }
int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA) int amaxMultiVecDeviceFloatComplex(float* y_res, int n, void* devMultiVecA)
{ int i=0; { int i=0;
spgpuHandle_t handle=psb_cudaGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
@ -204,7 +204,7 @@ int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiV
return(i); return(i);
} }
int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA) int asumMultiVecDeviceFloatComplex(float* y_res, int n, void* devMultiVecA)
{ int i=0; { int i=0;
spgpuHandle_t handle=psb_cudaGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;

@ -37,6 +37,7 @@
#include "vectordev.h" #include "vectordev.h"
#include "cuda_runtime.h" #include "cuda_runtime.h"
#include "core.h" #include "core.h"
#include "vector.h"
int registerMappedFloatComplex(void *, void **, int, cuFloatComplex); int registerMappedFloatComplex(void *, void **, int, cuFloatComplex);
int writeMultiVecDeviceFloatComplex(void* deviceMultiVec, cuFloatComplex* hostMultiVec); int writeMultiVecDeviceFloatComplex(void* deviceMultiVec, cuFloatComplex* hostMultiVec);
@ -63,9 +64,9 @@ int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n, int fi
int hfirst, void* host_values, int indexBase, cuFloatComplex beta); int hfirst, void* host_values, int indexBase, cuFloatComplex beta);
int scalMultiVecDeviceFloatComplex(cuFloatComplex alpha, void* devMultiVecA); int scalMultiVecDeviceFloatComplex(cuFloatComplex alpha, void* devMultiVecA);
int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA); int nrm2MultiVecDeviceFloatComplex(float* y_res, int n, void* devVecA);
int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA); int amaxMultiVecDeviceFloatComplex(float* y_res, int n, void* devVecA);
int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA); int asumMultiVecDeviceFloatComplex(float* y_res, int n, void* devVecA);
int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA, void* devVecB); int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA, void* devVecB);
int axpbyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void* devVecX, cuFloatComplex beta, void* devVecY); int axpbyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void* devVecX, cuFloatComplex beta, void* devVecY);

@ -178,12 +178,12 @@ int spmvDnsDeviceFloatComplex(char transa, int m, int n, int k, float complex *a
/* Note: the M,N,K choices according to TRANS have already been handled in the caller */ /* Note: the M,N,K choices according to TRANS have already been handled in the caller */
if (n == 1) { if (n == 1) {
status = cublasCgemv(handle, trans, m,k, status = cublasCgemv(handle, trans, m,k,
alpha, devMat->cM,devMat->pitch, x->v_,1, (const cuComplex *) alpha, devMat->cM,devMat->pitch, x->v_,1,
beta, y->v_,1); (const cuComplex *) beta, y->v_,1);
} else { } else {
status = cublasCgemm(handle, trans, CUBLAS_OP_N, m,n,k, status = cublasCgemm(handle, trans, CUBLAS_OP_N, m,n,k,
alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_, (const cuComplex *) alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
beta, y->v_,y->pitch_); (const cuComplex *) beta, y->v_,y->pitch_);
} }
if (status == CUBLAS_STATUS_SUCCESS) if (status == CUBLAS_STATUS_SUCCESS)
@ -205,12 +205,12 @@ int spmvDnsDeviceDoubleComplex(char transa, int m, int n, int k, double complex
/* Note: the M,N,K choices according to TRANS have already been handled in the caller */ /* Note: the M,N,K choices according to TRANS have already been handled in the caller */
if (n == 1) { if (n == 1) {
status = cublasZgemv(handle, trans, m,k, status = cublasZgemv(handle, trans, m,k,
alpha, devMat->cM,devMat->pitch, x->v_,1, (const cuDoubleComplex *) alpha, devMat->cM,devMat->pitch, x->v_,1,
beta, y->v_,1); (const cuDoubleComplex *) beta, y->v_,1);
} else { } else {
status = cublasZgemm(handle, trans, CUBLAS_OP_N, m,n,k, status = cublasZgemm(handle, trans, CUBLAS_OP_N, m,n,k,
alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_, (const cuDoubleComplex *) alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
beta, y->v_,y->pitch_); (const cuDoubleComplex *) beta, y->v_,y->pitch_);
} }
if (status == CUBLAS_STATUS_SUCCESS) if (status == CUBLAS_STATUS_SUCCESS)

@ -35,6 +35,7 @@
#include "vectordev.h" #include "vectordev.h"
#include "cuda_runtime.h" #include "cuda_runtime.h"
#include "core.h" #include "core.h"
#include "vector.h"
int registerMappedDouble(void *, void **, int, double); int registerMappedDouble(void *, void **, int, double);
int writeMultiVecDeviceDouble(void* deviceMultiVec, double* hostMultiVec); int writeMultiVecDeviceDouble(void* deviceMultiVec, double* hostMultiVec);

@ -439,7 +439,7 @@ int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type)
T_CSRGDeviceMat *cMat= Matrix->mat; T_CSRGDeviceMat *cMat= Matrix->mat;
cusparseFillMode_t mode=type; cusparseFillMode_t mode=type;
CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->spmvDescr, CHECK_CUSPARSE(cusparseSpMatSetAttribute((*(cMat->spmvDescr)),
CUSPARSE_SPMAT_FILL_MODE, CUSPARSE_SPMAT_FILL_MODE,
(const void*) &mode, (const void*) &mode,
sizeof(cusparseFillMode_t))); sizeof(cusparseFillMode_t)));
@ -450,7 +450,7 @@ int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type)
{ {
T_CSRGDeviceMat *cMat= Matrix->mat; T_CSRGDeviceMat *cMat= Matrix->mat;
cusparseDiagType_t cutype=type; cusparseDiagType_t cutype=type;
CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->spmvDescr, CHECK_CUSPARSE(cusparseSpMatSetAttribute((*(cMat->spmvDescr)),
CUSPARSE_SPMAT_DIAG_TYPE, CUSPARSE_SPMAT_DIAG_TYPE,
(const void*) &cutype, (const void*) &cutype,
sizeof(cusparseDiagType_t))); sizeof(cusparseDiagType_t)));

@ -35,6 +35,7 @@
#include "vectordev.h" #include "vectordev.h"
#include "cuda_runtime.h" #include "cuda_runtime.h"
#include "core.h" #include "core.h"
#include "vector.h"
int registerMappedInt(void *, void **, int, int); int registerMappedInt(void *, void **, int, int);
int writeMultiVecDeviceInt(void* deviceMultiVec, int* hostMultiVec); int writeMultiVecDeviceInt(void* deviceMultiVec, int* hostMultiVec);

@ -35,6 +35,7 @@
#include "vectordev.h" #include "vectordev.h"
#include "cuda_runtime.h" #include "cuda_runtime.h"
#include "core.h" #include "core.h"
#include "vector.h"
int registerMappedFloat(void *, void **, int, float); int registerMappedFloat(void *, void **, int, float);
int writeMultiVecDeviceFloat(void* deviceMultiVec, float* hostMultiVec); int writeMultiVecDeviceFloat(void* deviceMultiVec, float* hostMultiVec);

@ -34,6 +34,7 @@
#include "cuda_runtime.h" #include "cuda_runtime.h"
//#include "common.h" //#include "common.h"
//#include "cintrf.h" //#include "cintrf.h"
#include "cuda_util.h"
#include <complex.h> #include <complex.h>
struct MultiVectDevice struct MultiVectDevice

@ -183,7 +183,7 @@ int iscatMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n,
} }
int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA) int nrm2MultiVecDeviceDoubleComplex(double* y_res, int n, void* devMultiVecA)
{ int i=0; { int i=0;
spgpuHandle_t handle=psb_cudaGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
@ -192,7 +192,7 @@ int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMult
return(i); return(i);
} }
int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA) int amaxMultiVecDeviceDoubleComplex(double* y_res, int n, void* devMultiVecA)
{ int i=0; { int i=0;
spgpuHandle_t handle=psb_cudaGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
@ -202,7 +202,7 @@ int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMult
return(i); return(i);
} }
int asumMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA) int asumMultiVecDeviceDoubleComplex(double* y_res, int n, void* devMultiVecA)
{ int i=0; { int i=0;
spgpuHandle_t handle=psb_cudaGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;

@ -37,6 +37,7 @@
#include "vectordev.h" #include "vectordev.h"
#include "cuda_runtime.h" #include "cuda_runtime.h"
#include "core.h" #include "core.h"
#include "vector.h"
int registerMappedDoubleComplex(void *, void **, int, cuDoubleComplex); int registerMappedDoubleComplex(void *, void **, int, cuDoubleComplex);
int writeMultiVecDeviceDoubleComplex(void* deviceMultiVec, cuDoubleComplex* hostMultiVec); int writeMultiVecDeviceDoubleComplex(void* deviceMultiVec, cuDoubleComplex* hostMultiVec);
@ -69,9 +70,9 @@ int iscatMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n,
int indexBase, cuDoubleComplex beta); int indexBase, cuDoubleComplex beta);
int scalMultiVecDeviceDoubleComplex(cuDoubleComplex alpha, void* devMultiVecA); int scalMultiVecDeviceDoubleComplex(cuDoubleComplex alpha, void* devMultiVecA);
int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devVecA); int nrm2MultiVecDeviceDoubleComplex(double* y_res, int n, void* devVecA);
int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devVecA); int amaxMultiVecDeviceDoubleComplex(double* y_res, int n, void* devVecA);
int asumMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devVecA); int asumMultiVecDeviceDoubleComplex(double* y_res, int n, void* devVecA);
int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n,
void* devVecA, void* devVecB); void* devVecA, void* devVecB);

Loading…
Cancel
Save