Missing impl files
parent
173ffec2d3
commit
fe87ca52e3
@ -0,0 +1,171 @@
|
|||||||
|
/* Parallel Sparse BLAS GPU plugin */
|
||||||
|
/* (C) Copyright 2013 */
|
||||||
|
|
||||||
|
/* Salvatore Filippone */
|
||||||
|
/* Alessandro Fanfarillo */
|
||||||
|
|
||||||
|
/* Redistribution and use in source and binary forms, with or without */
|
||||||
|
/* modification, are permitted provided that the following conditions */
|
||||||
|
/* are met: */
|
||||||
|
/* 1. Redistributions of source code must retain the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer. */
|
||||||
|
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||||
|
/* notice, this list of conditions, and the following disclaimer in the */
|
||||||
|
/* documentation and/or other materials provided with the distribution. */
|
||||||
|
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||||
|
/* not be used to endorse or promote products derived from this */
|
||||||
|
/* software without specific written permission. */
|
||||||
|
|
||||||
|
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||||
|
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||||
|
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||||
|
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||||
|
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||||
|
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||||
|
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||||
|
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||||
|
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||||
|
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
#include <cusparse_v2.h>
|
||||||
|
#include "cintrf.h"
|
||||||
|
#include "dcsga.h"
|
||||||
|
|
||||||
|
|
||||||
|
int d_CSGADeviceFree(d_Cmat *Matrix)
|
||||||
|
{
|
||||||
|
d_CSRGDeviceMat *cMat= Matrix->mat;
|
||||||
|
|
||||||
|
if (cMat!=NULL) d_CSRGDeviceFree(cMat);
|
||||||
|
return(CUSPARSE_STATUS_SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int d_CSGADeviceAlloc(d_Cmat *Matrix,int nr, int nc, int nz)
|
||||||
|
{
|
||||||
|
int rc=0;
|
||||||
|
d_CSRGDeviceMat *cMat;
|
||||||
|
|
||||||
|
if ((rc=d_CSRGDeviceAlloc(Matrix,nr,nc,nz))!=0)
|
||||||
|
return(rc);
|
||||||
|
|
||||||
|
cMat = Matrix->mat;
|
||||||
|
if (nr <= 0) nr = 1;
|
||||||
|
|
||||||
|
if ((rc= allocRemoteBuffer(((void **) &(cMat->rowBlocks)), ((nr+1)*sizeof(int)))) != 0)
|
||||||
|
return(rc);
|
||||||
|
|
||||||
|
return(CUSPARSE_STATUS_SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
void d_CSGAComputeRowBlocks(int totalRows, int* irp, int* numBlocks, int *rowBlocks){
|
||||||
|
rowBlocks[0] = 1;
|
||||||
|
int sum = 0, last_i= 1, ctr=1;
|
||||||
|
for(int i = 1; i < totalRows; i++){
|
||||||
|
sum += irp[i]-irp[i-1];
|
||||||
|
if(sum == MAX_NNZ_PER_WG){
|
||||||
|
last_i = i+1;
|
||||||
|
rowBlocks[ctr++] = i+1;
|
||||||
|
sum = 0;
|
||||||
|
|
||||||
|
}
|
||||||
|
else if( sum > MAX_NNZ_PER_WG){
|
||||||
|
if(i - last_i > 1){
|
||||||
|
rowBlocks[ctr++] = i-1 +1;
|
||||||
|
i--;
|
||||||
|
}
|
||||||
|
else if(i - last_i == 1){
|
||||||
|
rowBlocks[ctr++] = i +1;
|
||||||
|
}
|
||||||
|
last_i = i+1;
|
||||||
|
sum = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//printf("%d %d\n",ctr,totalRows);
|
||||||
|
*numBlocks = ctr;
|
||||||
|
rowBlocks[ctr++] = totalRows;
|
||||||
|
return ;
|
||||||
|
}
|
||||||
|
|
||||||
|
int d_CSGAHost2Device(d_Cmat *Matrix,int nr, int nc, int nz,
|
||||||
|
int *irp, int *ja, double *val, int numBlocks, int *rowBlocks)
|
||||||
|
{
|
||||||
|
int rc=0;
|
||||||
|
d_CSRGDeviceMat *cMat= Matrix->mat;
|
||||||
|
|
||||||
|
if (cMat!=NULL) {
|
||||||
|
if ((rc=d_CSRGHost2Device(Matrix,nr,nc,nz,irp,ja,val))
|
||||||
|
!= SPGPU_SUCCESS)
|
||||||
|
return(rc);
|
||||||
|
cMat->numBlocks = numBlocks;
|
||||||
|
// fprintf(stderr," CSGAH2D: %d (%d:%d) %p\n",numBlocks,
|
||||||
|
// rowBlocks[0],rowBlocks[1],cMat->rowBlocks);
|
||||||
|
if ((rc=writeRemoteBuffer((void *) rowBlocks,(void *) cMat->rowBlocks,
|
||||||
|
(numBlocks+1)*sizeof(int)))
|
||||||
|
!= SPGPU_SUCCESS)
|
||||||
|
return(rc);
|
||||||
|
//fprintf(stderr," CSGAH2D ok\n");
|
||||||
|
} else {
|
||||||
|
return(-1);
|
||||||
|
}
|
||||||
|
return(CUSPARSE_STATUS_SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
int d_CSGADevice2Host(d_Cmat *Matrix,int nr, int nc, int nz,
|
||||||
|
int *irp, int *ja, double *val, int *numBlocks, int *rowBlocks)
|
||||||
|
{
|
||||||
|
int rc=0;
|
||||||
|
d_CSRGDeviceMat *cMat= Matrix->mat;
|
||||||
|
|
||||||
|
if (cMat!=NULL) {
|
||||||
|
if ((rc=d_CSRGDevice2Host(Matrix,nr,nc,nz,irp,ja,val))
|
||||||
|
!= SPGPU_SUCCESS)
|
||||||
|
return(rc);
|
||||||
|
*numBlocks = cMat->numBlocks ;
|
||||||
|
if ((rc=readRemoteBuffer((void *) rowBlocks,(void *) cMat->rowBlocks,
|
||||||
|
((*numBlocks)+1)*sizeof(int)))
|
||||||
|
!= SPGPU_SUCCESS)
|
||||||
|
return(rc);
|
||||||
|
|
||||||
|
}
|
||||||
|
return(CUSPARSE_STATUS_SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int d_spmvCSGADevice(d_Cmat *Matrix, double alpha, void* deviceX,
|
||||||
|
double beta, void* deviceY, int *rb)
|
||||||
|
{
|
||||||
|
d_CSRGDeviceMat *devMat = (d_CSRGDeviceMat *) Matrix->mat;
|
||||||
|
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||||
|
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||||
|
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||||
|
int indexBase=1;
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
fprintf(stderr,"devMat %p m %d n %d nB %d rB %p x %p y %p \n",devMat,
|
||||||
|
devMat->m,devMat->n,devMat->numBlocks,devMat->rowBlocks,x->v_,y->v_);
|
||||||
|
fprintf(stderr,"x_count %d y_count %d xsize %d ysize %d \n",x->count_,y->count_,
|
||||||
|
x->size_,y->size_);
|
||||||
|
#endif
|
||||||
|
#if 0&&defined(VERBOSE)
|
||||||
|
__assert(x->count_ == y->count_, "ERROR: x and y don't share the same number of vectors");
|
||||||
|
__assert(x->size_ >= devMat->n, "ERROR: x vector's size is not >= to matrix size (columns)");
|
||||||
|
__assert(y->size_ >= devMat->m, "ERROR: y vector's size is not >= to matrix size (rows)");
|
||||||
|
#endif
|
||||||
|
//fprintf(stderr,"Calling dCSGAMV \n");
|
||||||
|
dCSGAMV(handle, beta,(double *)y->v_, alpha,
|
||||||
|
(double *)devMat->val, devMat->ja, devMat->irp,
|
||||||
|
devMat->m,devMat->n,y->count_, devMat->numBlocks, devMat->rowBlocks,
|
||||||
|
(double *)x->v_, 1, rb);
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
@ -0,0 +1,65 @@
|
|||||||
|
/* Parallel Sparse BLAS GPU plugin */
|
||||||
|
/* (C) Copyright 2013 */
|
||||||
|
|
||||||
|
/* Salvatore Filippone */
|
||||||
|
/* Alessandro Fanfarillo */
|
||||||
|
|
||||||
|
/* Redistribution and use in source and binary forms, with or without */
|
||||||
|
/* modification, are permitted provided that the following conditions */
|
||||||
|
/* are met: */
|
||||||
|
/* 1. Redistributions of source code must retain the above copyright */
|
||||||
|
/* notice, this list of conditions and the following disclaimer. */
|
||||||
|
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||||
|
/* notice, this list of conditions, and the following disclaimer in the */
|
||||||
|
/* documentation and/or other materials provided with the distribution. */
|
||||||
|
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||||
|
/* not be used to endorse or promote products derived from this */
|
||||||
|
/* software without specific written permission. */
|
||||||
|
|
||||||
|
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||||
|
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||||
|
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||||
|
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||||
|
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||||
|
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||||
|
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||||
|
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||||
|
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||||
|
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
|
||||||
|
#ifndef DCSGA_
|
||||||
|
#define DCSGA_
|
||||||
|
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
#include <cusparse_v2.h>
|
||||||
|
#include "cintrf.h"
|
||||||
|
|
||||||
|
#include "dcusparse.h"
|
||||||
|
#include "fcusparse.h"
|
||||||
|
#include "fcusparse_dat.h"
|
||||||
|
|
||||||
|
#define MAX_NNZ_PER_WG 4096
|
||||||
|
|
||||||
|
int d_CSGADeviceFree(d_Cmat *Matrix);
|
||||||
|
int d_CSGADeviceAlloc(d_Cmat *Matrix,int nr, int nc, int nz);
|
||||||
|
void d_CSGAComputeRowBlocks(int totalRows, int* irp, int* numBlocks, int *rowBlocks);
|
||||||
|
int d_CSGAHost2Device(d_Cmat *Matrix,int nr, int nc, int nz,
|
||||||
|
int *irp, int *ja, double *val, int numBlocks, int *rowBlocks);
|
||||||
|
int d_CSGADevice2Host(d_Cmat *Matrix,int nr, int nc, int nz,
|
||||||
|
int *irp, int *ja, double *val, int *numBlocks, int *rowBlocks);
|
||||||
|
int d_spmvCSGADevice(d_Cmat *Matrix, double alpha, void* deviceX,
|
||||||
|
double beta, void* deviceY, int *rb);
|
||||||
|
|
||||||
|
int dCSGAMV(spgpuHandle_t handle, double beta, double* y, double alpha,
|
||||||
|
const double* as, const int* ja, const int* irp,
|
||||||
|
int m, int n, int ncol, int numBlocks,
|
||||||
|
const int* rowBlocks, const double *x,
|
||||||
|
int baseIndex, int *rb);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
@ -0,0 +1,68 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
|
||||||
|
subroutine psb_d_cuda_csga_from_gpu(a,info)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use elldev_mod
|
||||||
|
use psb_vectordev_mod
|
||||||
|
use psb_d_cuda_csga_mat_mod, psb_protect_name => psb_d_cuda_csga_from_gpu
|
||||||
|
implicit none
|
||||||
|
class(psb_d_cuda_csga_sparse_mat), intent(inout) :: a
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
|
||||||
|
integer(psb_ipk_) :: m, n, nz
|
||||||
|
|
||||||
|
info = 0
|
||||||
|
|
||||||
|
if (.not.(c_associated(a%deviceMat%mat))) then
|
||||||
|
call a%free()
|
||||||
|
return
|
||||||
|
end if
|
||||||
|
|
||||||
|
info = CSRGDeviceGetParms(a%deviceMat,m,n,nz)
|
||||||
|
if (info /= psb_success_) return
|
||||||
|
|
||||||
|
if (info == 0) call psb_realloc(m+1,a%irp,info)
|
||||||
|
if (info == 0) call psb_realloc(nz,a%ja,info)
|
||||||
|
if (info == 0) call psb_realloc(nz,a%val,info)
|
||||||
|
if (info == 0) call psb_realloc(m+1,a%rowBlocks,info)
|
||||||
|
if (info == 0) info = &
|
||||||
|
& CSGADevice2Host(a%deviceMat,m,n,nz,a%irp,a%ja,a%val,a%numBlocks,a%rowBlocks)
|
||||||
|
#if (CUDA_SHORT_VERSION <= 10) || (CUDA_VERSION < 11030)
|
||||||
|
a%irp(:) = a%irp(:)+1
|
||||||
|
a%ja(:) = a%ja(:)+1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
call a%set_sync()
|
||||||
|
|
||||||
|
end subroutine psb_d_cuda_csga_from_gpu
|
||||||
@ -0,0 +1,65 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
|
||||||
|
subroutine psb_d_cuda_csga_mold(a,b,info)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use psb_d_cuda_csga_mat_mod, psb_protect_name => psb_d_cuda_csga_mold
|
||||||
|
implicit none
|
||||||
|
class(psb_d_cuda_csga_sparse_mat), intent(in) :: a
|
||||||
|
class(psb_d_base_sparse_mat), intent(inout), allocatable :: b
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
Integer(Psb_ipk_) :: err_act
|
||||||
|
character(len=20) :: name='csga_mold'
|
||||||
|
logical, parameter :: debug=.false.
|
||||||
|
|
||||||
|
call psb_get_erraction(err_act)
|
||||||
|
|
||||||
|
info = 0
|
||||||
|
if (allocated(b)) then
|
||||||
|
call b%free()
|
||||||
|
deallocate(b,stat=info)
|
||||||
|
end if
|
||||||
|
if (info == 0) allocate(psb_d_cuda_csga_sparse_mat :: b, stat=info)
|
||||||
|
|
||||||
|
if (info /= psb_success_) then
|
||||||
|
info = psb_err_alloc_dealloc_
|
||||||
|
call psb_errpush(info, name)
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
end subroutine psb_d_cuda_csga_mold
|
||||||
@ -0,0 +1,139 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
subroutine psb_d_cuda_csga_to_gpu(a,info,nzrm)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use cusparse_mod
|
||||||
|
use d_csga_mod
|
||||||
|
use psb_d_cuda_csga_mat_mod, psb_protect_name => psb_d_cuda_csga_to_gpu
|
||||||
|
implicit none
|
||||||
|
class(psb_d_cuda_csga_sparse_mat), intent(inout) :: a
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
integer(psb_ipk_), intent(in), optional :: nzrm
|
||||||
|
|
||||||
|
integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz
|
||||||
|
integer(psb_ipk_) :: nzdi,i,j,k,nrz
|
||||||
|
integer(psb_ipk_), allocatable :: irpdi(:),jadi(:),rbi(:)
|
||||||
|
real(psb_dpk_), allocatable :: valdi(:)
|
||||||
|
|
||||||
|
info = 0
|
||||||
|
|
||||||
|
if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return
|
||||||
|
|
||||||
|
m = a%get_nrows()
|
||||||
|
n = a%get_ncols()
|
||||||
|
nz = a%get_nzeros()
|
||||||
|
if (c_associated(a%deviceMat%Mat)) then
|
||||||
|
info = CSGADeviceFree(a%deviceMat)
|
||||||
|
end if
|
||||||
|
call psb_realloc(m+1,a%rowBlocks,info)
|
||||||
|
if (a%is_unit()) then
|
||||||
|
!
|
||||||
|
! CUSPARSE has the habit of storing the diagonal and then ignoring,
|
||||||
|
! whereas we do not store it. Hence this adapter code.
|
||||||
|
!
|
||||||
|
nzdi = nz + m
|
||||||
|
if (info == 0) info = CSGADeviceAlloc(a%deviceMat,m,n,nzdi)
|
||||||
|
if (info == 0) then
|
||||||
|
if (a%is_unit()) then
|
||||||
|
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit)
|
||||||
|
else
|
||||||
|
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
|
||||||
|
end if
|
||||||
|
end if
|
||||||
|
!!! We are explicitly adding the diagonal
|
||||||
|
if ((info == 0) .and. a%is_triangle()) then
|
||||||
|
if ((info == 0).and.a%is_upper()) then
|
||||||
|
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
|
||||||
|
else
|
||||||
|
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower)
|
||||||
|
end if
|
||||||
|
end if
|
||||||
|
if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info)
|
||||||
|
if (info == 0) then
|
||||||
|
irpdi(1) = 1
|
||||||
|
if (a%is_triangle().and.a%is_upper()) then
|
||||||
|
do i=1,m
|
||||||
|
j = irpdi(i)
|
||||||
|
jadi(j) = i
|
||||||
|
valdi(j) = done
|
||||||
|
nrz = a%irp(i+1)-a%irp(i)
|
||||||
|
jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)
|
||||||
|
valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1)
|
||||||
|
irpdi(i+1) = j + nrz + 1
|
||||||
|
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
|
||||||
|
end do
|
||||||
|
else
|
||||||
|
do i=1,m
|
||||||
|
j = irpdi(i)
|
||||||
|
nrz = a%irp(i+1)-a%irp(i)
|
||||||
|
jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)
|
||||||
|
valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1)
|
||||||
|
jadi(j+nrz) = i
|
||||||
|
valdi(j+nrz) = done
|
||||||
|
irpdi(i+1) = j + nrz + 1
|
||||||
|
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
|
||||||
|
end do
|
||||||
|
end if
|
||||||
|
end if
|
||||||
|
call CSGAComputeRowBlocks(m,a%irp,a%numBlocks,a%rowBlocks)
|
||||||
|
a%irp(:) = a%irp(:) -1
|
||||||
|
a%ja(:) = a%ja(:) -1
|
||||||
|
a%rowBlocks(:) = a%rowBlocks(:) -1
|
||||||
|
if (info == 0) info = CSGAHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi,&
|
||||||
|
& a%numBlocks,a%rowBlocks)
|
||||||
|
a%irp(:) = a%irp(:) +1
|
||||||
|
a%ja(:) = a%ja(:) +1
|
||||||
|
a%rowBlocks(:) = a%rowBlocks(:) +1
|
||||||
|
|
||||||
|
else
|
||||||
|
if (info == 0) info = CSGADeviceAlloc(a%deviceMat,m,n,nz)
|
||||||
|
call CSGAComputeRowBlocks(m,a%irp,a%numBlocks,a%rowBlocks)
|
||||||
|
!!$ write(0,*) 'to_gpu: ',a%numBlocks,':',&
|
||||||
|
!!$ & a%rowBlocks(1:2),a%rowBlocks(a%numBlocks:a%numBlocks+1)
|
||||||
|
a%irp(:) = a%irp(:) -1
|
||||||
|
a%ja(:) = a%ja(:) -1
|
||||||
|
a%rowBlocks(:) = a%rowBlocks(:) -1
|
||||||
|
if (info == 0) info = CSGAHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val,&
|
||||||
|
& a%numBlocks,a%rowBlocks)
|
||||||
|
a%irp(:) = a%irp(:) +1
|
||||||
|
a%ja(:) = a%ja(:) +1
|
||||||
|
a%rowBlocks(:) = a%rowBlocks(:) +1
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
|
call a%set_sync()
|
||||||
|
|
||||||
|
if (info /= 0) then
|
||||||
|
write(0,*) 'Error in CSGA_TO_GPU ',info
|
||||||
|
end if
|
||||||
|
|
||||||
|
end subroutine psb_d_cuda_csga_to_gpu
|
||||||
@ -0,0 +1,119 @@
|
|||||||
|
! Parallel Sparse BLAS GPU plugin
|
||||||
|
! (C) Copyright 2013
|
||||||
|
!
|
||||||
|
! Salvatore Filippone
|
||||||
|
! Alessandro Fanfarillo
|
||||||
|
!
|
||||||
|
! Redistribution and use in source and binary forms, with or without
|
||||||
|
! modification, are permitted provided that the following conditions
|
||||||
|
! are met:
|
||||||
|
! 1. Redistributions of source code must retain the above copyright
|
||||||
|
! notice, this list of conditions and the following disclaimer.
|
||||||
|
! 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
! notice, this list of conditions, and the following disclaimer in the
|
||||||
|
! documentation and/or other materials provided with the distribution.
|
||||||
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||||
|
! not be used to endorse or promote products derived from this
|
||||||
|
! software without specific written permission.
|
||||||
|
!
|
||||||
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||||
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
! POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
!
|
||||||
|
|
||||||
|
|
||||||
|
subroutine psb_d_cuda_csga_vect_mv(alpha,a,x,beta,y,info,trans)
|
||||||
|
|
||||||
|
use psb_base_mod
|
||||||
|
use cusparse_mod
|
||||||
|
use elldev_mod
|
||||||
|
use psb_vectordev_mod
|
||||||
|
use psb_d_cuda_csga_mat_mod, psb_protect_name => psb_d_cuda_csga_vect_mv
|
||||||
|
use psb_d_cuda_vect_mod
|
||||||
|
implicit none
|
||||||
|
class(psb_d_cuda_csga_sparse_mat), intent(in) :: a
|
||||||
|
real(psb_dpk_), intent(in) :: alpha, beta
|
||||||
|
class(psb_d_base_vect_type), intent(inout) :: x
|
||||||
|
class(psb_d_base_vect_type), intent(inout) :: y
|
||||||
|
integer(psb_ipk_), intent(out) :: info
|
||||||
|
character, optional, intent(in) :: trans
|
||||||
|
|
||||||
|
real(psb_dpk_), allocatable :: rx(:), ry(:)
|
||||||
|
logical :: tra
|
||||||
|
character :: trans_
|
||||||
|
Integer(Psb_ipk_) :: err_act
|
||||||
|
character(len=20) :: name='d_cuda_csga_vect_mv'
|
||||||
|
|
||||||
|
call psb_erractionsave(err_act)
|
||||||
|
info = psb_success_
|
||||||
|
if (present(trans)) then
|
||||||
|
trans_ = trans
|
||||||
|
else
|
||||||
|
trans_ = 'N'
|
||||||
|
end if
|
||||||
|
|
||||||
|
if (.not.a%is_asb()) then
|
||||||
|
info = psb_err_invalid_mat_state_
|
||||||
|
call psb_errpush(info,name)
|
||||||
|
goto 9999
|
||||||
|
endif
|
||||||
|
|
||||||
|
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
|
||||||
|
|
||||||
|
if (tra) then
|
||||||
|
if (.not.x%is_host()) call x%sync()
|
||||||
|
if (beta /= dzero) then
|
||||||
|
if (.not.y%is_host()) call y%sync()
|
||||||
|
end if
|
||||||
|
call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans)
|
||||||
|
call y%set_host()
|
||||||
|
else
|
||||||
|
if (a%is_host()) call a%sync()
|
||||||
|
select type (xx => x)
|
||||||
|
type is (psb_d_vect_cuda)
|
||||||
|
select type(yy => y)
|
||||||
|
type is (psb_d_vect_cuda)
|
||||||
|
if (xx%is_host()) call xx%sync()
|
||||||
|
if (beta /= dzero) then
|
||||||
|
if (yy%is_host()) call yy%sync()
|
||||||
|
end if
|
||||||
|
info = spmvCSGADevice(a%deviceMat,alpha,xx%deviceVect,&
|
||||||
|
& beta,yy%deviceVect,a%rowBlocks)
|
||||||
|
!!$ info = spmvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,&
|
||||||
|
!!$ & beta,yy%deviceVect)
|
||||||
|
if (info /= 0) then
|
||||||
|
call psb_errpush(psb_err_from_subroutine_ai_,name,&
|
||||||
|
& a_err='spmvCSRGDevice',i_err=(/info,izero,izero,izero,izero/))
|
||||||
|
info = psb_err_from_subroutine_ai_
|
||||||
|
goto 9999
|
||||||
|
end if
|
||||||
|
call yy%set_dev()
|
||||||
|
class default
|
||||||
|
rx = xx%get_vect()
|
||||||
|
ry = y%get_vect()
|
||||||
|
call a%psb_d_csr_sparse_mat%spmm(alpha,rx,beta,ry,info)
|
||||||
|
call y%bld(ry)
|
||||||
|
end select
|
||||||
|
class default
|
||||||
|
rx = x%get_vect()
|
||||||
|
ry = y%get_vect()
|
||||||
|
call a%psb_d_csr_sparse_mat%spmm(alpha,rx,beta,ry,info)
|
||||||
|
call y%bld(ry)
|
||||||
|
end select
|
||||||
|
end if
|
||||||
|
if (info /= 0) goto 9999
|
||||||
|
call psb_erractionrestore(err_act)
|
||||||
|
return
|
||||||
|
|
||||||
|
9999 call psb_error_handler(err_act)
|
||||||
|
|
||||||
|
return
|
||||||
|
end subroutine psb_d_cuda_csga_vect_mv
|
||||||
@ -0,0 +1,49 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
/*
|
||||||
|
* spGPU - Sparse matrices on GPU library.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2010 - 2013
|
||||||
|
* Davide Barbieri - University of Rome Tor Vergata
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License
|
||||||
|
* version 3 as published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "core.h"
|
||||||
|
#include "cuComplex.h"
|
||||||
|
|
||||||
|
/** \addtogroup diaFun DIA/HDIA Format
|
||||||
|
* @{
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int dCSGAMV(spgpuHandle_t handle,
|
||||||
|
double beta,
|
||||||
|
double* y,
|
||||||
|
double alpha,
|
||||||
|
const double* as,
|
||||||
|
const int* ja,
|
||||||
|
const int* irp,
|
||||||
|
int m,
|
||||||
|
int n,
|
||||||
|
int ncol,
|
||||||
|
int numBlocks,
|
||||||
|
const int* rowBlocks,
|
||||||
|
const double *x,
|
||||||
|
int baseIndex,
|
||||||
|
int *rb);
|
||||||
|
|
||||||
|
/** @}*/
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
Loading…
Reference in New Issue