Fixed SpMM for HLG

11 months ago · 08984619dc
parent beb418e00b
commit 08984619dc
3 changed files with 45 additions and 32 deletions
--- a/cuda/hlldev.c
+++ b/cuda/hlldev.c
@ -181,6 +181,23 @@ int spmvHllDeviceFloat(void *deviceMat, float alpha, void* deviceX,
  return SPGPU_SUCCESS;
 }
 void
 dspmdmmhll_gpu (double *z, int s, int vPitch, double *y, double alpha, double* cM, int* rP,
 	     int* rS, int hackSize, int* hackOffs, int avgNnzPerRow, int rows, double *x, double beta, int firstIndex)
 {
  int i=0;
  spgpuHandle_t handle=psb_cudaGetHandle();
  for (i=0; i<s; i++)
    {
      spgpuDhellspmv (handle, (double*) z, (double*)y, alpha, (double*) cM, rP,
 		       hackSize, hackOffs, rS, NULL,
               avgNnzPerRow, rows, (double*)x, beta, firstIndex);
      z += vPitch;
      y += vPitch;
      x += vPitch;		
    }
 }
 //new
 int spmvHllDeviceDouble(void *deviceMat, double alpha, void* deviceX, 
 		       double beta, void* deviceY)
@ -188,21 +205,18 @@ int spmvHllDeviceDouble(void *deviceMat, double alpha, void* deviceX,
  HllDevice *devMat = (HllDevice *) deviceMat;
  struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
  struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
  spgpuHandle_t handle=psb_cudaGetHandle();
 #ifdef VERBOSE
  /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/
  /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/
  /*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/
 #endif
-  /*dspmdmm_gpu ((double *)z->v_, y->count_, y->pitch_, (double *)y->v_, alpha, (double *)devMat->cM, 
+  dspmdmmhll_gpu ((double *)y->v_, y->count_, y->pitch_, (double *)y->v_,
-	       devMat->rP, devMat->rS, devMat->rows, devMat->pitch, (double *)x->v_, beta,
+           alpha, (double *)devMat->cM, 
-	       devMat->baseIndex);*/
+	       devMat->rP, devMat->rS, devMat->hackSize, devMat->hackOffs,
           devMat->avgNzr, devMat->rows,
           (double *)x->v_, beta, devMat->baseIndex);
  spgpuDhellspmv (handle, (double *)y->v_, (double *)y->v_, alpha, (double*)devMat->cM, 
 		  devMat->rP,devMat->hackSize,devMat->hackOffs, devMat->rS, NULL,
 		  devMat->avgNzr, devMat->rows, (double *)x->v_, beta, devMat->baseIndex);
  //cudaSync();
  return SPGPU_SUCCESS;
 }
--- a/cuda/impl/psb_d_cuda_hlg_csmm.F90
+++ b/cuda/impl/psb_d_cuda_hlg_csmm.F90
@ -98,16 +98,16 @@ subroutine psb_d_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans)
    if (info == 0) &
         & info = FallocMultiVecDevice(gpX,nxy,size(x,1),spgpu_type_double)
    if (info == 0) &
-         & info = writeMultiVecDevice(gpX,x,nxy)
+         & info = writeMultiVecDevice(gpX,x,size(x,1))
    if (info == 0) &
         & info = FallocMultiVecDevice(gpY,nxy,size(y,1),spgpu_type_double)
    if (info == 0) &
-         & info = writeMultiVecDevice(gpY,y,nxy)
+         & info = writeMultiVecDevice(gpY,y,size(y,1))
    if (info == 0)  &
         & info = spmvhllDevice(a%deviceMat,alpha,gpX,beta,gpY)
    if (info == 0) &
-         & info = readMultiVecDevice(gpY,y,nxy)
+         & info = readMultiVecDevice(gpY,y,size(y,1))
    if (info /= 0) goto 9999
    call freeMultiVecDevice(gpX)
    call freeMultiVecDevice(gpY)
--- a/test/block_krylov/kernel/dpdegenmm.F90
+++ b/test/block_krylov/kernel/dpdegenmm.F90
@ -612,7 +612,6 @@ program pdegenmm
 #if CUDA_SHORT_VERSION <= 10
  type(psb_d_cuda_hybg_sparse_mat), target  :: ahybg
 #endif
  ! TODO HLG da fare (complesso)
  type(psb_d_cuda_hlg_sparse_mat), target   :: ahlg
  ! TODO HDIAG E DNSG non hanno nemmeno CSMM
  type(psb_d_cuda_hdiag_sparse_mat), target   :: ahdiag
@ -663,7 +662,7 @@ program pdegenmm
  !call get_parms(ctxt,nrhs,acfmt,agfmt,idim,tnd)
  nrhs=2
  acfmt='CSR'
-  agfmt='CSRG'
+  agfmt='HLG'
  idim=2
  tnd=.false.
  call psb_init_timers()
@ -806,26 +805,26 @@ program pdegenmm
  x2 = b_mv_g%get_vect()
 !   ! TODO test AXPBY
-  call psb_geall(xg,desc_a,info)
+!   call psb_geall(xg,desc_a,info)
-  call psb_geasb(xg,desc_a,info,mold=tmold)
+!   call psb_geasb(xg,desc_a,info,mold=tmold)
-  call xg%set(done)
+!   call xg%set(done)
-  call xg%sync()
+!   call xg%sync()
-  call psb_geall(bg,desc_a,info)
+!   call psb_geall(bg,desc_a,info)
-  call psb_geasb(bg,desc_a,info,mold=tmold)
+!   call psb_geasb(bg,desc_a,info,mold=tmold)
-  !call bg%set(done+done)
+!   !call bg%set(done+done)
-
+
-!   ! TODO: Non funziona spgpuDaxpby (axpbyMultiVecDeviceDouble)
+! !   ! TODO: Non funziona spgpuDaxpby (axpbyMultiVecDeviceDouble)
-  call psb_geaxpby(done,xg,dzero,bg,desc_a,info)
+!   call psb_geaxpby(done,xg,dzero,bg,desc_a,info)
-  call psb_cuda_DeviceSync()
+!   call psb_cuda_DeviceSync()
-
+
-  write(*,*) 'BG ', bg%is_dev(), bg%is_host(), bg%is_sync()
+!   write(*,*) 'BG ', bg%is_dev(), bg%is_host(), bg%is_sync()
-  call bg%sync()
+!   call bg%sync()
-  write(*,*) 'BG ', bg%is_dev(), bg%is_host(), bg%is_sync()
+!   write(*,*) 'BG ', bg%is_dev(), bg%is_host(), bg%is_sync()
-  do i=1,8
+!   do i=1,8
-    write(*,*) bg%v%v(i)
+!     write(*,*) bg%v%v(i)
-  end do
+!   end do
-  return
+!   return
 !   call x_mv_g%set(done)
 !   call x_mv_g%sync()