Missing impl files
parent
173ffec2d3
commit
fe87ca52e3
@ -0,0 +1,171 @@
|
||||
/* Parallel Sparse BLAS GPU plugin */
|
||||
/* (C) Copyright 2013 */
|
||||
|
||||
/* Salvatore Filippone */
|
||||
/* Alessandro Fanfarillo */
|
||||
|
||||
/* Redistribution and use in source and binary forms, with or without */
|
||||
/* modification, are permitted provided that the following conditions */
|
||||
/* are met: */
|
||||
/* 1. Redistributions of source code must retain the above copyright */
|
||||
/* notice, this list of conditions and the following disclaimer. */
|
||||
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||
/* notice, this list of conditions, and the following disclaimer in the */
|
||||
/* documentation and/or other materials provided with the distribution. */
|
||||
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||
/* not be used to endorse or promote products derived from this */
|
||||
/* software without specific written permission. */
|
||||
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cusparse_v2.h>
|
||||
#include "cintrf.h"
|
||||
#include "dcsga.h"
|
||||
|
||||
|
||||
int d_CSGADeviceFree(d_Cmat *Matrix)
|
||||
{
|
||||
d_CSRGDeviceMat *cMat= Matrix->mat;
|
||||
|
||||
if (cMat!=NULL) d_CSRGDeviceFree(cMat);
|
||||
return(CUSPARSE_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
int d_CSGADeviceAlloc(d_Cmat *Matrix,int nr, int nc, int nz)
|
||||
{
|
||||
int rc=0;
|
||||
d_CSRGDeviceMat *cMat;
|
||||
|
||||
if ((rc=d_CSRGDeviceAlloc(Matrix,nr,nc,nz))!=0)
|
||||
return(rc);
|
||||
|
||||
cMat = Matrix->mat;
|
||||
if (nr <= 0) nr = 1;
|
||||
|
||||
if ((rc= allocRemoteBuffer(((void **) &(cMat->rowBlocks)), ((nr+1)*sizeof(int)))) != 0)
|
||||
return(rc);
|
||||
|
||||
return(CUSPARSE_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
void d_CSGAComputeRowBlocks(int totalRows, int* irp, int* numBlocks, int *rowBlocks){
|
||||
rowBlocks[0] = 1;
|
||||
int sum = 0, last_i= 1, ctr=1;
|
||||
for(int i = 1; i < totalRows; i++){
|
||||
sum += irp[i]-irp[i-1];
|
||||
if(sum == MAX_NNZ_PER_WG){
|
||||
last_i = i+1;
|
||||
rowBlocks[ctr++] = i+1;
|
||||
sum = 0;
|
||||
|
||||
}
|
||||
else if( sum > MAX_NNZ_PER_WG){
|
||||
if(i - last_i > 1){
|
||||
rowBlocks[ctr++] = i-1 +1;
|
||||
i--;
|
||||
}
|
||||
else if(i - last_i == 1){
|
||||
rowBlocks[ctr++] = i +1;
|
||||
}
|
||||
last_i = i+1;
|
||||
sum = 0;
|
||||
}
|
||||
}
|
||||
|
||||
//printf("%d %d\n",ctr,totalRows);
|
||||
*numBlocks = ctr;
|
||||
rowBlocks[ctr++] = totalRows;
|
||||
return ;
|
||||
}
|
||||
|
||||
int d_CSGAHost2Device(d_Cmat *Matrix,int nr, int nc, int nz,
|
||||
int *irp, int *ja, double *val, int numBlocks, int *rowBlocks)
|
||||
{
|
||||
int rc=0;
|
||||
d_CSRGDeviceMat *cMat= Matrix->mat;
|
||||
|
||||
if (cMat!=NULL) {
|
||||
if ((rc=d_CSRGHost2Device(Matrix,nr,nc,nz,irp,ja,val))
|
||||
!= SPGPU_SUCCESS)
|
||||
return(rc);
|
||||
cMat->numBlocks = numBlocks;
|
||||
// fprintf(stderr," CSGAH2D: %d (%d:%d) %p\n",numBlocks,
|
||||
// rowBlocks[0],rowBlocks[1],cMat->rowBlocks);
|
||||
if ((rc=writeRemoteBuffer((void *) rowBlocks,(void *) cMat->rowBlocks,
|
||||
(numBlocks+1)*sizeof(int)))
|
||||
!= SPGPU_SUCCESS)
|
||||
return(rc);
|
||||
//fprintf(stderr," CSGAH2D ok\n");
|
||||
} else {
|
||||
return(-1);
|
||||
}
|
||||
return(CUSPARSE_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
int d_CSGADevice2Host(d_Cmat *Matrix,int nr, int nc, int nz,
|
||||
int *irp, int *ja, double *val, int *numBlocks, int *rowBlocks)
|
||||
{
|
||||
int rc=0;
|
||||
d_CSRGDeviceMat *cMat= Matrix->mat;
|
||||
|
||||
if (cMat!=NULL) {
|
||||
if ((rc=d_CSRGDevice2Host(Matrix,nr,nc,nz,irp,ja,val))
|
||||
!= SPGPU_SUCCESS)
|
||||
return(rc);
|
||||
*numBlocks = cMat->numBlocks ;
|
||||
if ((rc=readRemoteBuffer((void *) rowBlocks,(void *) cMat->rowBlocks,
|
||||
((*numBlocks)+1)*sizeof(int)))
|
||||
!= SPGPU_SUCCESS)
|
||||
return(rc);
|
||||
|
||||
}
|
||||
return(CUSPARSE_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
|
||||
int d_spmvCSGADevice(d_Cmat *Matrix, double alpha, void* deviceX,
|
||||
double beta, void* deviceY, int *rb)
|
||||
{
|
||||
d_CSRGDeviceMat *devMat = (d_CSRGDeviceMat *) Matrix->mat;
|
||||
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
|
||||
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
|
||||
spgpuHandle_t handle=psb_cudaGetHandle();
|
||||
int indexBase=1;
|
||||
|
||||
#if 0
|
||||
fprintf(stderr,"devMat %p m %d n %d nB %d rB %p x %p y %p \n",devMat,
|
||||
devMat->m,devMat->n,devMat->numBlocks,devMat->rowBlocks,x->v_,y->v_);
|
||||
fprintf(stderr,"x_count %d y_count %d xsize %d ysize %d \n",x->count_,y->count_,
|
||||
x->size_,y->size_);
|
||||
#endif
|
||||
#if 0&&defined(VERBOSE)
|
||||
__assert(x->count_ == y->count_, "ERROR: x and y don't share the same number of vectors");
|
||||
__assert(x->size_ >= devMat->n, "ERROR: x vector's size is not >= to matrix size (columns)");
|
||||
__assert(y->size_ >= devMat->m, "ERROR: y vector's size is not >= to matrix size (rows)");
|
||||
#endif
|
||||
//fprintf(stderr,"Calling dCSGAMV \n");
|
||||
dCSGAMV(handle, beta,(double *)y->v_, alpha,
|
||||
(double *)devMat->val, devMat->ja, devMat->irp,
|
||||
devMat->m,devMat->n,y->count_, devMat->numBlocks, devMat->rowBlocks,
|
||||
(double *)x->v_, 1, rb);
|
||||
|
||||
|
||||
}
|
||||
|
||||
@ -0,0 +1,65 @@
|
||||
/* Parallel Sparse BLAS GPU plugin */
|
||||
/* (C) Copyright 2013 */
|
||||
|
||||
/* Salvatore Filippone */
|
||||
/* Alessandro Fanfarillo */
|
||||
|
||||
/* Redistribution and use in source and binary forms, with or without */
|
||||
/* modification, are permitted provided that the following conditions */
|
||||
/* are met: */
|
||||
/* 1. Redistributions of source code must retain the above copyright */
|
||||
/* notice, this list of conditions and the following disclaimer. */
|
||||
/* 2. Redistributions in binary form must reproduce the above copyright */
|
||||
/* notice, this list of conditions, and the following disclaimer in the */
|
||||
/* documentation and/or other materials provided with the distribution. */
|
||||
/* 3. The name of the PSBLAS group or the names of its contributors may */
|
||||
/* not be used to endorse or promote products derived from this */
|
||||
/* software without specific written permission. */
|
||||
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
|
||||
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
|
||||
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
||||
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
|
||||
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
|
||||
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
|
||||
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
|
||||
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
|
||||
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
|
||||
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
|
||||
#ifndef DCSGA_
|
||||
#define DCSGA_
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cusparse_v2.h>
|
||||
#include "cintrf.h"
|
||||
|
||||
#include "dcusparse.h"
|
||||
#include "fcusparse.h"
|
||||
#include "fcusparse_dat.h"
|
||||
|
||||
#define MAX_NNZ_PER_WG 4096
|
||||
|
||||
int d_CSGADeviceFree(d_Cmat *Matrix);
|
||||
int d_CSGADeviceAlloc(d_Cmat *Matrix,int nr, int nc, int nz);
|
||||
void d_CSGAComputeRowBlocks(int totalRows, int* irp, int* numBlocks, int *rowBlocks);
|
||||
int d_CSGAHost2Device(d_Cmat *Matrix,int nr, int nc, int nz,
|
||||
int *irp, int *ja, double *val, int numBlocks, int *rowBlocks);
|
||||
int d_CSGADevice2Host(d_Cmat *Matrix,int nr, int nc, int nz,
|
||||
int *irp, int *ja, double *val, int *numBlocks, int *rowBlocks);
|
||||
int d_spmvCSGADevice(d_Cmat *Matrix, double alpha, void* deviceX,
|
||||
double beta, void* deviceY, int *rb);
|
||||
|
||||
int dCSGAMV(spgpuHandle_t handle, double beta, double* y, double alpha,
|
||||
const double* as, const int* ja, const int* irp,
|
||||
int m, int n, int ncol, int numBlocks,
|
||||
const int* rowBlocks, const double *x,
|
||||
int baseIndex, int *rb);
|
||||
|
||||
#endif
|
||||
|
||||
@ -0,0 +1,68 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
|
||||
subroutine psb_d_cuda_csga_from_gpu(a,info)
|
||||
|
||||
use psb_base_mod
|
||||
use elldev_mod
|
||||
use psb_vectordev_mod
|
||||
use psb_d_cuda_csga_mat_mod, psb_protect_name => psb_d_cuda_csga_from_gpu
|
||||
implicit none
|
||||
class(psb_d_cuda_csga_sparse_mat), intent(inout) :: a
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
integer(psb_ipk_) :: m, n, nz
|
||||
|
||||
info = 0
|
||||
|
||||
if (.not.(c_associated(a%deviceMat%mat))) then
|
||||
call a%free()
|
||||
return
|
||||
end if
|
||||
|
||||
info = CSRGDeviceGetParms(a%deviceMat,m,n,nz)
|
||||
if (info /= psb_success_) return
|
||||
|
||||
if (info == 0) call psb_realloc(m+1,a%irp,info)
|
||||
if (info == 0) call psb_realloc(nz,a%ja,info)
|
||||
if (info == 0) call psb_realloc(nz,a%val,info)
|
||||
if (info == 0) call psb_realloc(m+1,a%rowBlocks,info)
|
||||
if (info == 0) info = &
|
||||
& CSGADevice2Host(a%deviceMat,m,n,nz,a%irp,a%ja,a%val,a%numBlocks,a%rowBlocks)
|
||||
#if (CUDA_SHORT_VERSION <= 10) || (CUDA_VERSION < 11030)
|
||||
a%irp(:) = a%irp(:)+1
|
||||
a%ja(:) = a%ja(:)+1
|
||||
#endif
|
||||
|
||||
call a%set_sync()
|
||||
|
||||
end subroutine psb_d_cuda_csga_from_gpu
|
||||
@ -0,0 +1,65 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
|
||||
subroutine psb_d_cuda_csga_mold(a,b,info)
|
||||
|
||||
use psb_base_mod
|
||||
use psb_d_cuda_csga_mat_mod, psb_protect_name => psb_d_cuda_csga_mold
|
||||
implicit none
|
||||
class(psb_d_cuda_csga_sparse_mat), intent(in) :: a
|
||||
class(psb_d_base_sparse_mat), intent(inout), allocatable :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
Integer(Psb_ipk_) :: err_act
|
||||
character(len=20) :: name='csga_mold'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
call psb_get_erraction(err_act)
|
||||
|
||||
info = 0
|
||||
if (allocated(b)) then
|
||||
call b%free()
|
||||
deallocate(b,stat=info)
|
||||
end if
|
||||
if (info == 0) allocate(psb_d_cuda_csga_sparse_mat :: b, stat=info)
|
||||
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_alloc_dealloc_
|
||||
call psb_errpush(info, name)
|
||||
goto 9999
|
||||
end if
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_d_cuda_csga_mold
|
||||
@ -0,0 +1,139 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
subroutine psb_d_cuda_csga_to_gpu(a,info,nzrm)
|
||||
|
||||
use psb_base_mod
|
||||
use cusparse_mod
|
||||
use d_csga_mod
|
||||
use psb_d_cuda_csga_mat_mod, psb_protect_name => psb_d_cuda_csga_to_gpu
|
||||
implicit none
|
||||
class(psb_d_cuda_csga_sparse_mat), intent(inout) :: a
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
integer(psb_ipk_), intent(in), optional :: nzrm
|
||||
|
||||
integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz
|
||||
integer(psb_ipk_) :: nzdi,i,j,k,nrz
|
||||
integer(psb_ipk_), allocatable :: irpdi(:),jadi(:),rbi(:)
|
||||
real(psb_dpk_), allocatable :: valdi(:)
|
||||
|
||||
info = 0
|
||||
|
||||
if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return
|
||||
|
||||
m = a%get_nrows()
|
||||
n = a%get_ncols()
|
||||
nz = a%get_nzeros()
|
||||
if (c_associated(a%deviceMat%Mat)) then
|
||||
info = CSGADeviceFree(a%deviceMat)
|
||||
end if
|
||||
call psb_realloc(m+1,a%rowBlocks,info)
|
||||
if (a%is_unit()) then
|
||||
!
|
||||
! CUSPARSE has the habit of storing the diagonal and then ignoring,
|
||||
! whereas we do not store it. Hence this adapter code.
|
||||
!
|
||||
nzdi = nz + m
|
||||
if (info == 0) info = CSGADeviceAlloc(a%deviceMat,m,n,nzdi)
|
||||
if (info == 0) then
|
||||
if (a%is_unit()) then
|
||||
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit)
|
||||
else
|
||||
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
|
||||
end if
|
||||
end if
|
||||
!!! We are explicitly adding the diagonal
|
||||
if ((info == 0) .and. a%is_triangle()) then
|
||||
if ((info == 0).and.a%is_upper()) then
|
||||
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
|
||||
else
|
||||
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower)
|
||||
end if
|
||||
end if
|
||||
if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info)
|
||||
if (info == 0) then
|
||||
irpdi(1) = 1
|
||||
if (a%is_triangle().and.a%is_upper()) then
|
||||
do i=1,m
|
||||
j = irpdi(i)
|
||||
jadi(j) = i
|
||||
valdi(j) = done
|
||||
nrz = a%irp(i+1)-a%irp(i)
|
||||
jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)
|
||||
valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1)
|
||||
irpdi(i+1) = j + nrz + 1
|
||||
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
|
||||
end do
|
||||
else
|
||||
do i=1,m
|
||||
j = irpdi(i)
|
||||
nrz = a%irp(i+1)-a%irp(i)
|
||||
jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)
|
||||
valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1)
|
||||
jadi(j+nrz) = i
|
||||
valdi(j+nrz) = done
|
||||
irpdi(i+1) = j + nrz + 1
|
||||
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
|
||||
end do
|
||||
end if
|
||||
end if
|
||||
call CSGAComputeRowBlocks(m,a%irp,a%numBlocks,a%rowBlocks)
|
||||
a%irp(:) = a%irp(:) -1
|
||||
a%ja(:) = a%ja(:) -1
|
||||
a%rowBlocks(:) = a%rowBlocks(:) -1
|
||||
if (info == 0) info = CSGAHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi,&
|
||||
& a%numBlocks,a%rowBlocks)
|
||||
a%irp(:) = a%irp(:) +1
|
||||
a%ja(:) = a%ja(:) +1
|
||||
a%rowBlocks(:) = a%rowBlocks(:) +1
|
||||
|
||||
else
|
||||
if (info == 0) info = CSGADeviceAlloc(a%deviceMat,m,n,nz)
|
||||
call CSGAComputeRowBlocks(m,a%irp,a%numBlocks,a%rowBlocks)
|
||||
!!$ write(0,*) 'to_gpu: ',a%numBlocks,':',&
|
||||
!!$ & a%rowBlocks(1:2),a%rowBlocks(a%numBlocks:a%numBlocks+1)
|
||||
a%irp(:) = a%irp(:) -1
|
||||
a%ja(:) = a%ja(:) -1
|
||||
a%rowBlocks(:) = a%rowBlocks(:) -1
|
||||
if (info == 0) info = CSGAHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val,&
|
||||
& a%numBlocks,a%rowBlocks)
|
||||
a%irp(:) = a%irp(:) +1
|
||||
a%ja(:) = a%ja(:) +1
|
||||
a%rowBlocks(:) = a%rowBlocks(:) +1
|
||||
|
||||
endif
|
||||
|
||||
call a%set_sync()
|
||||
|
||||
if (info /= 0) then
|
||||
write(0,*) 'Error in CSGA_TO_GPU ',info
|
||||
end if
|
||||
|
||||
end subroutine psb_d_cuda_csga_to_gpu
|
||||
@ -0,0 +1,119 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
|
||||
subroutine psb_d_cuda_csga_vect_mv(alpha,a,x,beta,y,info,trans)
|
||||
|
||||
use psb_base_mod
|
||||
use cusparse_mod
|
||||
use elldev_mod
|
||||
use psb_vectordev_mod
|
||||
use psb_d_cuda_csga_mat_mod, psb_protect_name => psb_d_cuda_csga_vect_mv
|
||||
use psb_d_cuda_vect_mod
|
||||
implicit none
|
||||
class(psb_d_cuda_csga_sparse_mat), intent(in) :: a
|
||||
real(psb_dpk_), intent(in) :: alpha, beta
|
||||
class(psb_d_base_vect_type), intent(inout) :: x
|
||||
class(psb_d_base_vect_type), intent(inout) :: y
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, optional, intent(in) :: trans
|
||||
|
||||
real(psb_dpk_), allocatable :: rx(:), ry(:)
|
||||
logical :: tra
|
||||
character :: trans_
|
||||
Integer(Psb_ipk_) :: err_act
|
||||
character(len=20) :: name='d_cuda_csga_vect_mv'
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
if (present(trans)) then
|
||||
trans_ = trans
|
||||
else
|
||||
trans_ = 'N'
|
||||
end if
|
||||
|
||||
if (.not.a%is_asb()) then
|
||||
info = psb_err_invalid_mat_state_
|
||||
call psb_errpush(info,name)
|
||||
goto 9999
|
||||
endif
|
||||
|
||||
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
|
||||
|
||||
if (tra) then
|
||||
if (.not.x%is_host()) call x%sync()
|
||||
if (beta /= dzero) then
|
||||
if (.not.y%is_host()) call y%sync()
|
||||
end if
|
||||
call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans)
|
||||
call y%set_host()
|
||||
else
|
||||
if (a%is_host()) call a%sync()
|
||||
select type (xx => x)
|
||||
type is (psb_d_vect_cuda)
|
||||
select type(yy => y)
|
||||
type is (psb_d_vect_cuda)
|
||||
if (xx%is_host()) call xx%sync()
|
||||
if (beta /= dzero) then
|
||||
if (yy%is_host()) call yy%sync()
|
||||
end if
|
||||
info = spmvCSGADevice(a%deviceMat,alpha,xx%deviceVect,&
|
||||
& beta,yy%deviceVect,a%rowBlocks)
|
||||
!!$ info = spmvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,&
|
||||
!!$ & beta,yy%deviceVect)
|
||||
if (info /= 0) then
|
||||
call psb_errpush(psb_err_from_subroutine_ai_,name,&
|
||||
& a_err='spmvCSRGDevice',i_err=(/info,izero,izero,izero,izero/))
|
||||
info = psb_err_from_subroutine_ai_
|
||||
goto 9999
|
||||
end if
|
||||
call yy%set_dev()
|
||||
class default
|
||||
rx = xx%get_vect()
|
||||
ry = y%get_vect()
|
||||
call a%psb_d_csr_sparse_mat%spmm(alpha,rx,beta,ry,info)
|
||||
call y%bld(ry)
|
||||
end select
|
||||
class default
|
||||
rx = x%get_vect()
|
||||
ry = y%get_vect()
|
||||
call a%psb_d_csr_sparse_mat%spmm(alpha,rx,beta,ry,info)
|
||||
call y%bld(ry)
|
||||
end select
|
||||
end if
|
||||
if (info /= 0) goto 9999
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
end subroutine psb_d_cuda_csga_vect_mv
|
||||
@ -0,0 +1,49 @@
|
||||
#pragma once
|
||||
|
||||
/*
|
||||
* spGPU - Sparse matrices on GPU library.
|
||||
*
|
||||
* Copyright (C) 2010 - 2013
|
||||
* Davide Barbieri - University of Rome Tor Vergata
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* version 3 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#include "core.h"
|
||||
#include "cuComplex.h"
|
||||
|
||||
/** \addtogroup diaFun DIA/HDIA Format
|
||||
* @{
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
int dCSGAMV(spgpuHandle_t handle,
|
||||
double beta,
|
||||
double* y,
|
||||
double alpha,
|
||||
const double* as,
|
||||
const int* ja,
|
||||
const int* irp,
|
||||
int m,
|
||||
int n,
|
||||
int ncol,
|
||||
int numBlocks,
|
||||
const int* rowBlocks,
|
||||
const double *x,
|
||||
int baseIndex,
|
||||
int *rb);
|
||||
|
||||
/** @}*/
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
Loading…
Reference in New Issue