Fixed SpMM for HLG

psblas-bgmres
gabrielequatrana 11 months ago
parent beb418e00b
commit 08984619dc

@ -181,6 +181,23 @@ int spmvHllDeviceFloat(void *deviceMat, float alpha, void* deviceX,
return SPGPU_SUCCESS; return SPGPU_SUCCESS;
} }
void
dspmdmmhll_gpu (double *z, int s, int vPitch, double *y, double alpha, double* cM, int* rP,
int* rS, int hackSize, int* hackOffs, int avgNnzPerRow, int rows, double *x, double beta, int firstIndex)
{
int i=0;
spgpuHandle_t handle=psb_cudaGetHandle();
for (i=0; i<s; i++)
{
spgpuDhellspmv (handle, (double*) z, (double*)y, alpha, (double*) cM, rP,
hackSize, hackOffs, rS, NULL,
avgNnzPerRow, rows, (double*)x, beta, firstIndex);
z += vPitch;
y += vPitch;
x += vPitch;
}
}
//new //new
int spmvHllDeviceDouble(void *deviceMat, double alpha, void* deviceX, int spmvHllDeviceDouble(void *deviceMat, double alpha, void* deviceX,
double beta, void* deviceY) double beta, void* deviceY)
@ -188,21 +205,18 @@ int spmvHllDeviceDouble(void *deviceMat, double alpha, void* deviceX,
HllDevice *devMat = (HllDevice *) deviceMat; HllDevice *devMat = (HllDevice *) deviceMat;
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
spgpuHandle_t handle=psb_cudaGetHandle();
#ifdef VERBOSE #ifdef VERBOSE
/*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/
/*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/
/*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/ /*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/
#endif #endif
/*dspmdmm_gpu ((double *)z->v_, y->count_, y->pitch_, (double *)y->v_, alpha, (double *)devMat->cM, dspmdmmhll_gpu ((double *)y->v_, y->count_, y->pitch_, (double *)y->v_,
devMat->rP, devMat->rS, devMat->rows, devMat->pitch, (double *)x->v_, beta, alpha, (double *)devMat->cM,
devMat->baseIndex);*/ devMat->rP, devMat->rS, devMat->hackSize, devMat->hackOffs,
devMat->avgNzr, devMat->rows,
(double *)x->v_, beta, devMat->baseIndex);
spgpuDhellspmv (handle, (double *)y->v_, (double *)y->v_, alpha, (double*)devMat->cM,
devMat->rP,devMat->hackSize,devMat->hackOffs, devMat->rS, NULL,
devMat->avgNzr, devMat->rows, (double *)x->v_, beta, devMat->baseIndex);
//cudaSync();
return SPGPU_SUCCESS; return SPGPU_SUCCESS;
} }

@ -98,16 +98,16 @@ subroutine psb_d_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans)
if (info == 0) & if (info == 0) &
& info = FallocMultiVecDevice(gpX,nxy,size(x,1),spgpu_type_double) & info = FallocMultiVecDevice(gpX,nxy,size(x,1),spgpu_type_double)
if (info == 0) & if (info == 0) &
& info = writeMultiVecDevice(gpX,x,nxy) & info = writeMultiVecDevice(gpX,x,size(x,1))
if (info == 0) & if (info == 0) &
& info = FallocMultiVecDevice(gpY,nxy,size(y,1),spgpu_type_double) & info = FallocMultiVecDevice(gpY,nxy,size(y,1),spgpu_type_double)
if (info == 0) & if (info == 0) &
& info = writeMultiVecDevice(gpY,y,nxy) & info = writeMultiVecDevice(gpY,y,size(y,1))
if (info == 0) & if (info == 0) &
& info = spmvhllDevice(a%deviceMat,alpha,gpX,beta,gpY) & info = spmvhllDevice(a%deviceMat,alpha,gpX,beta,gpY)
if (info == 0) & if (info == 0) &
& info = readMultiVecDevice(gpY,y,nxy) & info = readMultiVecDevice(gpY,y,size(y,1))
if (info /= 0) goto 9999 if (info /= 0) goto 9999
call freeMultiVecDevice(gpX) call freeMultiVecDevice(gpX)
call freeMultiVecDevice(gpY) call freeMultiVecDevice(gpY)

@ -612,7 +612,6 @@ program pdegenmm
#if CUDA_SHORT_VERSION <= 10 #if CUDA_SHORT_VERSION <= 10
type(psb_d_cuda_hybg_sparse_mat), target :: ahybg type(psb_d_cuda_hybg_sparse_mat), target :: ahybg
#endif #endif
! TODO HLG da fare (complesso)
type(psb_d_cuda_hlg_sparse_mat), target :: ahlg type(psb_d_cuda_hlg_sparse_mat), target :: ahlg
! TODO HDIAG E DNSG non hanno nemmeno CSMM ! TODO HDIAG E DNSG non hanno nemmeno CSMM
type(psb_d_cuda_hdiag_sparse_mat), target :: ahdiag type(psb_d_cuda_hdiag_sparse_mat), target :: ahdiag
@ -663,7 +662,7 @@ program pdegenmm
!call get_parms(ctxt,nrhs,acfmt,agfmt,idim,tnd) !call get_parms(ctxt,nrhs,acfmt,agfmt,idim,tnd)
nrhs=2 nrhs=2
acfmt='CSR' acfmt='CSR'
agfmt='CSRG' agfmt='HLG'
idim=2 idim=2
tnd=.false. tnd=.false.
call psb_init_timers() call psb_init_timers()
@ -806,26 +805,26 @@ program pdegenmm
x2 = b_mv_g%get_vect() x2 = b_mv_g%get_vect()
! ! TODO test AXPBY ! ! TODO test AXPBY
call psb_geall(xg,desc_a,info) ! call psb_geall(xg,desc_a,info)
call psb_geasb(xg,desc_a,info,mold=tmold) ! call psb_geasb(xg,desc_a,info,mold=tmold)
call xg%set(done) ! call xg%set(done)
call xg%sync() ! call xg%sync()
call psb_geall(bg,desc_a,info) ! call psb_geall(bg,desc_a,info)
call psb_geasb(bg,desc_a,info,mold=tmold) ! call psb_geasb(bg,desc_a,info,mold=tmold)
!call bg%set(done+done) ! !call bg%set(done+done)
! ! TODO: Non funziona spgpuDaxpby (axpbyMultiVecDeviceDouble) ! ! ! TODO: Non funziona spgpuDaxpby (axpbyMultiVecDeviceDouble)
call psb_geaxpby(done,xg,dzero,bg,desc_a,info) ! call psb_geaxpby(done,xg,dzero,bg,desc_a,info)
call psb_cuda_DeviceSync() ! call psb_cuda_DeviceSync()
write(*,*) 'BG ', bg%is_dev(), bg%is_host(), bg%is_sync() ! write(*,*) 'BG ', bg%is_dev(), bg%is_host(), bg%is_sync()
call bg%sync() ! call bg%sync()
write(*,*) 'BG ', bg%is_dev(), bg%is_host(), bg%is_sync() ! write(*,*) 'BG ', bg%is_dev(), bg%is_host(), bg%is_sync()
do i=1,8 ! do i=1,8
write(*,*) bg%v%v(i) ! write(*,*) bg%v%v(i)
end do ! end do
return ! return
! call x_mv_g%set(done) ! call x_mv_g%set(done)
! call x_mv_g%sync() ! call x_mv_g%sync()

Loading…
Cancel
Save