From fe87ca52e3a43f6ed30ac65500a721e91bc6e9c3 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Mon, 27 May 2024 07:15:00 -0400 Subject: [PATCH] Missing impl files --- cuda/dcsga.c | 171 +++++++++++++++++++++++++ cuda/dcsga.h | 65 ++++++++++ cuda/impl/psb_d_cuda_csga_from_gpu.F90 | 68 ++++++++++ cuda/impl/psb_d_cuda_csga_mold.F90 | 65 ++++++++++ cuda/impl/psb_d_cuda_csga_to_gpu.F90 | 139 ++++++++++++++++++++ cuda/impl/psb_d_cuda_csga_vect_mv.F90 | 119 +++++++++++++++++ cuda/spgpu/csga.h | 49 +++++++ 7 files changed, 676 insertions(+) create mode 100644 cuda/dcsga.c create mode 100644 cuda/dcsga.h create mode 100644 cuda/impl/psb_d_cuda_csga_from_gpu.F90 create mode 100644 cuda/impl/psb_d_cuda_csga_mold.F90 create mode 100644 cuda/impl/psb_d_cuda_csga_to_gpu.F90 create mode 100644 cuda/impl/psb_d_cuda_csga_vect_mv.F90 create mode 100644 cuda/spgpu/csga.h diff --git a/cuda/dcsga.c b/cuda/dcsga.c new file mode 100644 index 00000000..27747740 --- /dev/null +++ b/cuda/dcsga.c @@ -0,0 +1,171 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include + +#include +#include +#include "cintrf.h" +#include "dcsga.h" + + +int d_CSGADeviceFree(d_Cmat *Matrix) +{ + d_CSRGDeviceMat *cMat= Matrix->mat; + + if (cMat!=NULL) d_CSRGDeviceFree(cMat); + return(CUSPARSE_STATUS_SUCCESS); +} + + +int d_CSGADeviceAlloc(d_Cmat *Matrix,int nr, int nc, int nz) +{ + int rc=0; + d_CSRGDeviceMat *cMat; + + if ((rc=d_CSRGDeviceAlloc(Matrix,nr,nc,nz))!=0) + return(rc); + + cMat = Matrix->mat; + if (nr <= 0) nr = 1; + + if ((rc= allocRemoteBuffer(((void **) &(cMat->rowBlocks)), ((nr+1)*sizeof(int)))) != 0) + return(rc); + + return(CUSPARSE_STATUS_SUCCESS); +} + +void d_CSGAComputeRowBlocks(int totalRows, int* irp, int* numBlocks, int *rowBlocks){ + rowBlocks[0] = 1; + int sum = 0, last_i= 1, ctr=1; + for(int i = 1; i < totalRows; i++){ + sum += irp[i]-irp[i-1]; + if(sum == MAX_NNZ_PER_WG){ + last_i = i+1; + rowBlocks[ctr++] = i+1; + sum = 0; + + } + else if( sum > MAX_NNZ_PER_WG){ + if(i - last_i > 1){ + rowBlocks[ctr++] = i-1 +1; + i--; + } + else if(i - last_i == 1){ + rowBlocks[ctr++] = i +1; + } + last_i = i+1; + sum = 0; + } + } + + //printf("%d %d\n",ctr,totalRows); + *numBlocks = ctr; + rowBlocks[ctr++] = totalRows; + return ; +} + +int d_CSGAHost2Device(d_Cmat *Matrix,int nr, int nc, int nz, + int *irp, int *ja, double *val, int numBlocks, int *rowBlocks) +{ + int rc=0; + d_CSRGDeviceMat *cMat= Matrix->mat; + + if (cMat!=NULL) { + if ((rc=d_CSRGHost2Device(Matrix,nr,nc,nz,irp,ja,val)) + != SPGPU_SUCCESS) + return(rc); + cMat->numBlocks = numBlocks; + // fprintf(stderr," CSGAH2D: %d (%d:%d) %p\n",numBlocks, + // rowBlocks[0],rowBlocks[1],cMat->rowBlocks); + if ((rc=writeRemoteBuffer((void *) rowBlocks,(void *) cMat->rowBlocks, + (numBlocks+1)*sizeof(int))) + != SPGPU_SUCCESS) + return(rc); + //fprintf(stderr," CSGAH2D ok\n"); + } else { + return(-1); + } + return(CUSPARSE_STATUS_SUCCESS); +} + +int d_CSGADevice2Host(d_Cmat *Matrix,int nr, int nc, int nz, + int *irp, int *ja, double *val, int *numBlocks, int *rowBlocks) +{ + int rc=0; + d_CSRGDeviceMat *cMat= Matrix->mat; + + if (cMat!=NULL) { + if ((rc=d_CSRGDevice2Host(Matrix,nr,nc,nz,irp,ja,val)) + != SPGPU_SUCCESS) + return(rc); + *numBlocks = cMat->numBlocks ; + if ((rc=readRemoteBuffer((void *) rowBlocks,(void *) cMat->rowBlocks, + ((*numBlocks)+1)*sizeof(int))) + != SPGPU_SUCCESS) + return(rc); + + } + return(CUSPARSE_STATUS_SUCCESS); +} + + +int d_spmvCSGADevice(d_Cmat *Matrix, double alpha, void* deviceX, + double beta, void* deviceY, int *rb) +{ + d_CSRGDeviceMat *devMat = (d_CSRGDeviceMat *) Matrix->mat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + spgpuHandle_t handle=psb_cudaGetHandle(); + int indexBase=1; + +#if 0 + fprintf(stderr,"devMat %p m %d n %d nB %d rB %p x %p y %p \n",devMat, + devMat->m,devMat->n,devMat->numBlocks,devMat->rowBlocks,x->v_,y->v_); + fprintf(stderr,"x_count %d y_count %d xsize %d ysize %d \n",x->count_,y->count_, + x->size_,y->size_); +#endif +#if 0&&defined(VERBOSE) + __assert(x->count_ == y->count_, "ERROR: x and y don't share the same number of vectors"); + __assert(x->size_ >= devMat->n, "ERROR: x vector's size is not >= to matrix size (columns)"); + __assert(y->size_ >= devMat->m, "ERROR: y vector's size is not >= to matrix size (rows)"); +#endif + //fprintf(stderr,"Calling dCSGAMV \n"); + dCSGAMV(handle, beta,(double *)y->v_, alpha, + (double *)devMat->val, devMat->ja, devMat->irp, + devMat->m,devMat->n,y->count_, devMat->numBlocks, devMat->rowBlocks, + (double *)x->v_, 1, rb); + + +} + diff --git a/cuda/dcsga.h b/cuda/dcsga.h new file mode 100644 index 00000000..21f82f19 --- /dev/null +++ b/cuda/dcsga.h @@ -0,0 +1,65 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + +#ifndef DCSGA_ +#define DCSGA_ + + +#include +#include + +#include +#include +#include "cintrf.h" + +#include "dcusparse.h" +#include "fcusparse.h" +#include "fcusparse_dat.h" + +#define MAX_NNZ_PER_WG 4096 + +int d_CSGADeviceFree(d_Cmat *Matrix); +int d_CSGADeviceAlloc(d_Cmat *Matrix,int nr, int nc, int nz); +void d_CSGAComputeRowBlocks(int totalRows, int* irp, int* numBlocks, int *rowBlocks); +int d_CSGAHost2Device(d_Cmat *Matrix,int nr, int nc, int nz, + int *irp, int *ja, double *val, int numBlocks, int *rowBlocks); +int d_CSGADevice2Host(d_Cmat *Matrix,int nr, int nc, int nz, + int *irp, int *ja, double *val, int *numBlocks, int *rowBlocks); +int d_spmvCSGADevice(d_Cmat *Matrix, double alpha, void* deviceX, + double beta, void* deviceY, int *rb); + +int dCSGAMV(spgpuHandle_t handle, double beta, double* y, double alpha, + const double* as, const int* ja, const int* irp, + int m, int n, int ncol, int numBlocks, + const int* rowBlocks, const double *x, + int baseIndex, int *rb); + +#endif + diff --git a/cuda/impl/psb_d_cuda_csga_from_gpu.F90 b/cuda/impl/psb_d_cuda_csga_from_gpu.F90 new file mode 100644 index 00000000..e064730d --- /dev/null +++ b/cuda/impl/psb_d_cuda_csga_from_gpu.F90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cuda_csga_from_gpu(a,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_csga_mat_mod, psb_protect_name => psb_d_cuda_csga_from_gpu + implicit none + class(psb_d_cuda_csga_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: m, n, nz + + info = 0 + + if (.not.(c_associated(a%deviceMat%mat))) then + call a%free() + return + end if + + info = CSRGDeviceGetParms(a%deviceMat,m,n,nz) + if (info /= psb_success_) return + + if (info == 0) call psb_realloc(m+1,a%irp,info) + if (info == 0) call psb_realloc(nz,a%ja,info) + if (info == 0) call psb_realloc(nz,a%val,info) + if (info == 0) call psb_realloc(m+1,a%rowBlocks,info) + if (info == 0) info = & + & CSGADevice2Host(a%deviceMat,m,n,nz,a%irp,a%ja,a%val,a%numBlocks,a%rowBlocks) +#if (CUDA_SHORT_VERSION <= 10) || (CUDA_VERSION < 11030) + a%irp(:) = a%irp(:)+1 + a%ja(:) = a%ja(:)+1 +#endif + + call a%set_sync() + +end subroutine psb_d_cuda_csga_from_gpu diff --git a/cuda/impl/psb_d_cuda_csga_mold.F90 b/cuda/impl/psb_d_cuda_csga_mold.F90 new file mode 100644 index 00000000..e77d6b3a --- /dev/null +++ b/cuda/impl/psb_d_cuda_csga_mold.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cuda_csga_mold(a,b,info) + + use psb_base_mod + use psb_d_cuda_csga_mat_mod, psb_protect_name => psb_d_cuda_csga_mold + implicit none + class(psb_d_cuda_csga_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='csga_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_cuda_csga_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_csga_mold diff --git a/cuda/impl/psb_d_cuda_csga_to_gpu.F90 b/cuda/impl/psb_d_cuda_csga_to_gpu.F90 new file mode 100644 index 00000000..e03b05f0 --- /dev/null +++ b/cuda/impl/psb_d_cuda_csga_to_gpu.F90 @@ -0,0 +1,139 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_cuda_csga_to_gpu(a,info,nzrm) + + use psb_base_mod + use cusparse_mod + use d_csga_mod + use psb_d_cuda_csga_mat_mod, psb_protect_name => psb_d_cuda_csga_to_gpu + implicit none + class(psb_d_cuda_csga_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz + integer(psb_ipk_) :: nzdi,i,j,k,nrz + integer(psb_ipk_), allocatable :: irpdi(:),jadi(:),rbi(:) + real(psb_dpk_), allocatable :: valdi(:) + + info = 0 + + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + n = a%get_ncols() + nz = a%get_nzeros() + if (c_associated(a%deviceMat%Mat)) then + info = CSGADeviceFree(a%deviceMat) + end if + call psb_realloc(m+1,a%rowBlocks,info) + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSGADeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + if ((info == 0) .and. a%is_triangle()) then + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = done + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = done + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + call CSGAComputeRowBlocks(m,a%irp,a%numBlocks,a%rowBlocks) + a%irp(:) = a%irp(:) -1 + a%ja(:) = a%ja(:) -1 + a%rowBlocks(:) = a%rowBlocks(:) -1 + if (info == 0) info = CSGAHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi,& + & a%numBlocks,a%rowBlocks) + a%irp(:) = a%irp(:) +1 + a%ja(:) = a%ja(:) +1 + a%rowBlocks(:) = a%rowBlocks(:) +1 + + else + if (info == 0) info = CSGADeviceAlloc(a%deviceMat,m,n,nz) + call CSGAComputeRowBlocks(m,a%irp,a%numBlocks,a%rowBlocks) +!!$ write(0,*) 'to_gpu: ',a%numBlocks,':',& +!!$ & a%rowBlocks(1:2),a%rowBlocks(a%numBlocks:a%numBlocks+1) + a%irp(:) = a%irp(:) -1 + a%ja(:) = a%ja(:) -1 + a%rowBlocks(:) = a%rowBlocks(:) -1 + if (info == 0) info = CSGAHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val,& + & a%numBlocks,a%rowBlocks) + a%irp(:) = a%irp(:) +1 + a%ja(:) = a%ja(:) +1 + a%rowBlocks(:) = a%rowBlocks(:) +1 + + endif + + call a%set_sync() + + if (info /= 0) then + write(0,*) 'Error in CSGA_TO_GPU ',info + end if + +end subroutine psb_d_cuda_csga_to_gpu diff --git a/cuda/impl/psb_d_cuda_csga_vect_mv.F90 b/cuda/impl/psb_d_cuda_csga_vect_mv.F90 new file mode 100644 index 00000000..fbfe3c1b --- /dev/null +++ b/cuda/impl/psb_d_cuda_csga_vect_mv.F90 @@ -0,0 +1,119 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cuda_csga_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_csga_mat_mod, psb_protect_name => psb_d_cuda_csga_vect_mv + use psb_d_cuda_vect_mod + implicit none + class(psb_d_cuda_csga_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_csga_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= dzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_d_vect_cuda) + select type(yy => y) + type is (psb_d_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvCSGADevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect,a%rowBlocks) +!!$ info = spmvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,& +!!$ & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvCSRGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_d_cuda_csga_vect_mv diff --git a/cuda/spgpu/csga.h b/cuda/spgpu/csga.h new file mode 100644 index 00000000..d9d57f00 --- /dev/null +++ b/cuda/spgpu/csga.h @@ -0,0 +1,49 @@ +#pragma once + +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2013 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "core.h" +#include "cuComplex.h" + +/** \addtogroup diaFun DIA/HDIA Format + * @{ + */ + +#ifdef __cplusplus +extern "C" { +#endif + +int dCSGAMV(spgpuHandle_t handle, + double beta, + double* y, + double alpha, + const double* as, + const int* ja, + const int* irp, + int m, + int n, + int ncol, + int numBlocks, + const int* rowBlocks, + const double *x, + int baseIndex, + int *rb); + +/** @}*/ +#ifdef __cplusplus +} +#endif