From 08984619dc3f797704376435897274e877af0e4c Mon Sep 17 00:00:00 2001 From: gabrielequatrana Date: Wed, 10 Apr 2024 15:34:57 +0200 Subject: [PATCH] Fixed SpMM for HLG --- cuda/hlldev.c | 30 ++++++++++++++----- cuda/impl/psb_d_cuda_hlg_csmm.F90 | 6 ++-- test/block_krylov/kernel/dpdegenmm.F90 | 41 +++++++++++++------------- 3 files changed, 45 insertions(+), 32 deletions(-) diff --git a/cuda/hlldev.c b/cuda/hlldev.c index 9da6a48c..f9359bf7 100644 --- a/cuda/hlldev.c +++ b/cuda/hlldev.c @@ -181,6 +181,23 @@ int spmvHllDeviceFloat(void *deviceMat, float alpha, void* deviceX, return SPGPU_SUCCESS; } +void +dspmdmmhll_gpu (double *z, int s, int vPitch, double *y, double alpha, double* cM, int* rP, + int* rS, int hackSize, int* hackOffs, int avgNnzPerRow, int rows, double *x, double beta, int firstIndex) +{ + int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + for (i=0; icount_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ /*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/ #endif - /*dspmdmm_gpu ((double *)z->v_, y->count_, y->pitch_, (double *)y->v_, alpha, (double *)devMat->cM, - devMat->rP, devMat->rS, devMat->rows, devMat->pitch, (double *)x->v_, beta, - devMat->baseIndex);*/ + dspmdmmhll_gpu ((double *)y->v_, y->count_, y->pitch_, (double *)y->v_, + alpha, (double *)devMat->cM, + devMat->rP, devMat->rS, devMat->hackSize, devMat->hackOffs, + devMat->avgNzr, devMat->rows, + (double *)x->v_, beta, devMat->baseIndex); - spgpuDhellspmv (handle, (double *)y->v_, (double *)y->v_, alpha, (double*)devMat->cM, - devMat->rP,devMat->hackSize,devMat->hackOffs, devMat->rS, NULL, - devMat->avgNzr, devMat->rows, (double *)x->v_, beta, devMat->baseIndex); - //cudaSync(); return SPGPU_SUCCESS; } diff --git a/cuda/impl/psb_d_cuda_hlg_csmm.F90 b/cuda/impl/psb_d_cuda_hlg_csmm.F90 index ee8424e6..78a1820a 100644 --- a/cuda/impl/psb_d_cuda_hlg_csmm.F90 +++ b/cuda/impl/psb_d_cuda_hlg_csmm.F90 @@ -98,16 +98,16 @@ subroutine psb_d_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) if (info == 0) & & info = FallocMultiVecDevice(gpX,nxy,size(x,1),spgpu_type_double) if (info == 0) & - & info = writeMultiVecDevice(gpX,x,nxy) + & info = writeMultiVecDevice(gpX,x,size(x,1)) if (info == 0) & & info = FallocMultiVecDevice(gpY,nxy,size(y,1),spgpu_type_double) if (info == 0) & - & info = writeMultiVecDevice(gpY,y,nxy) + & info = writeMultiVecDevice(gpY,y,size(y,1)) if (info == 0) & & info = spmvhllDevice(a%deviceMat,alpha,gpX,beta,gpY) if (info == 0) & - & info = readMultiVecDevice(gpY,y,nxy) + & info = readMultiVecDevice(gpY,y,size(y,1)) if (info /= 0) goto 9999 call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpY) diff --git a/test/block_krylov/kernel/dpdegenmm.F90 b/test/block_krylov/kernel/dpdegenmm.F90 index 448fb155..cece5417 100644 --- a/test/block_krylov/kernel/dpdegenmm.F90 +++ b/test/block_krylov/kernel/dpdegenmm.F90 @@ -612,7 +612,6 @@ program pdegenmm #if CUDA_SHORT_VERSION <= 10 type(psb_d_cuda_hybg_sparse_mat), target :: ahybg #endif - ! TODO HLG da fare (complesso) type(psb_d_cuda_hlg_sparse_mat), target :: ahlg ! TODO HDIAG E DNSG non hanno nemmeno CSMM type(psb_d_cuda_hdiag_sparse_mat), target :: ahdiag @@ -663,7 +662,7 @@ program pdegenmm !call get_parms(ctxt,nrhs,acfmt,agfmt,idim,tnd) nrhs=2 acfmt='CSR' - agfmt='CSRG' + agfmt='HLG' idim=2 tnd=.false. call psb_init_timers() @@ -806,26 +805,26 @@ program pdegenmm x2 = b_mv_g%get_vect() ! ! TODO test AXPBY - call psb_geall(xg,desc_a,info) - call psb_geasb(xg,desc_a,info,mold=tmold) - call xg%set(done) - call xg%sync() - call psb_geall(bg,desc_a,info) - call psb_geasb(bg,desc_a,info,mold=tmold) - !call bg%set(done+done) - -! ! TODO: Non funziona spgpuDaxpby (axpbyMultiVecDeviceDouble) - call psb_geaxpby(done,xg,dzero,bg,desc_a,info) - call psb_cuda_DeviceSync() - - write(*,*) 'BG ', bg%is_dev(), bg%is_host(), bg%is_sync() - call bg%sync() - write(*,*) 'BG ', bg%is_dev(), bg%is_host(), bg%is_sync() - do i=1,8 - write(*,*) bg%v%v(i) - end do +! call psb_geall(xg,desc_a,info) +! call psb_geasb(xg,desc_a,info,mold=tmold) +! call xg%set(done) +! call xg%sync() +! call psb_geall(bg,desc_a,info) +! call psb_geasb(bg,desc_a,info,mold=tmold) +! !call bg%set(done+done) + +! ! ! TODO: Non funziona spgpuDaxpby (axpbyMultiVecDeviceDouble) +! call psb_geaxpby(done,xg,dzero,bg,desc_a,info) +! call psb_cuda_DeviceSync() + +! write(*,*) 'BG ', bg%is_dev(), bg%is_host(), bg%is_sync() +! call bg%sync() +! write(*,*) 'BG ', bg%is_dev(), bg%is_host(), bg%is_sync() +! do i=1,8 +! write(*,*) bg%v%v(i) +! end do - return +! return ! call x_mv_g%set(done) ! call x_mv_g%sync()