Missing impl files

repack-csga
Salvatore Filippone 2 years ago
parent 173ffec2d3
commit fe87ca52e3

@ -0,0 +1,171 @@
/* Parallel Sparse BLAS GPU plugin */
/* (C) Copyright 2013 */
/* Salvatore Filippone */
/* Alessandro Fanfarillo */
/* Redistribution and use in source and binary forms, with or without */
/* modification, are permitted provided that the following conditions */
/* are met: */
/* 1. Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* 2. Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions, and the following disclaimer in the */
/* documentation and/or other materials provided with the distribution. */
/* 3. The name of the PSBLAS group or the names of its contributors may */
/* not be used to endorse or promote products derived from this */
/* software without specific written permission. */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#include <cusparse_v2.h>
#include "cintrf.h"
#include "dcsga.h"
int d_CSGADeviceFree(d_Cmat *Matrix)
{
d_CSRGDeviceMat *cMat= Matrix->mat;
if (cMat!=NULL) d_CSRGDeviceFree(cMat);
return(CUSPARSE_STATUS_SUCCESS);
}
int d_CSGADeviceAlloc(d_Cmat *Matrix,int nr, int nc, int nz)
{
int rc=0;
d_CSRGDeviceMat *cMat;
if ((rc=d_CSRGDeviceAlloc(Matrix,nr,nc,nz))!=0)
return(rc);
cMat = Matrix->mat;
if (nr <= 0) nr = 1;
if ((rc= allocRemoteBuffer(((void **) &(cMat->rowBlocks)), ((nr+1)*sizeof(int)))) != 0)
return(rc);
return(CUSPARSE_STATUS_SUCCESS);
}
void d_CSGAComputeRowBlocks(int totalRows, int* irp, int* numBlocks, int *rowBlocks){
rowBlocks[0] = 1;
int sum = 0, last_i= 1, ctr=1;
for(int i = 1; i < totalRows; i++){
sum += irp[i]-irp[i-1];
if(sum == MAX_NNZ_PER_WG){
last_i = i+1;
rowBlocks[ctr++] = i+1;
sum = 0;
}
else if( sum > MAX_NNZ_PER_WG){
if(i - last_i > 1){
rowBlocks[ctr++] = i-1 +1;
i--;
}
else if(i - last_i == 1){
rowBlocks[ctr++] = i +1;
}
last_i = i+1;
sum = 0;
}
}
//printf("%d %d\n",ctr,totalRows);
*numBlocks = ctr;
rowBlocks[ctr++] = totalRows;
return ;
}
int d_CSGAHost2Device(d_Cmat *Matrix,int nr, int nc, int nz,
int *irp, int *ja, double *val, int numBlocks, int *rowBlocks)
{
int rc=0;
d_CSRGDeviceMat *cMat= Matrix->mat;
if (cMat!=NULL) {
if ((rc=d_CSRGHost2Device(Matrix,nr,nc,nz,irp,ja,val))
!= SPGPU_SUCCESS)
return(rc);
cMat->numBlocks = numBlocks;
// fprintf(stderr," CSGAH2D: %d (%d:%d) %p\n",numBlocks,
// rowBlocks[0],rowBlocks[1],cMat->rowBlocks);
if ((rc=writeRemoteBuffer((void *) rowBlocks,(void *) cMat->rowBlocks,
(numBlocks+1)*sizeof(int)))
!= SPGPU_SUCCESS)
return(rc);
//fprintf(stderr," CSGAH2D ok\n");
} else {
return(-1);
}
return(CUSPARSE_STATUS_SUCCESS);
}
int d_CSGADevice2Host(d_Cmat *Matrix,int nr, int nc, int nz,
int *irp, int *ja, double *val, int *numBlocks, int *rowBlocks)
{
int rc=0;
d_CSRGDeviceMat *cMat= Matrix->mat;
if (cMat!=NULL) {
if ((rc=d_CSRGDevice2Host(Matrix,nr,nc,nz,irp,ja,val))
!= SPGPU_SUCCESS)
return(rc);
*numBlocks = cMat->numBlocks ;
if ((rc=readRemoteBuffer((void *) rowBlocks,(void *) cMat->rowBlocks,
((*numBlocks)+1)*sizeof(int)))
!= SPGPU_SUCCESS)
return(rc);
}
return(CUSPARSE_STATUS_SUCCESS);
}
int d_spmvCSGADevice(d_Cmat *Matrix, double alpha, void* deviceX,
double beta, void* deviceY, int *rb)
{
d_CSRGDeviceMat *devMat = (d_CSRGDeviceMat *) Matrix->mat;
struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX;
struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY;
spgpuHandle_t handle=psb_cudaGetHandle();
int indexBase=1;
#if 0
fprintf(stderr,"devMat %p m %d n %d nB %d rB %p x %p y %p \n",devMat,
devMat->m,devMat->n,devMat->numBlocks,devMat->rowBlocks,x->v_,y->v_);
fprintf(stderr,"x_count %d y_count %d xsize %d ysize %d \n",x->count_,y->count_,
x->size_,y->size_);
#endif
#if 0&&defined(VERBOSE)
__assert(x->count_ == y->count_, "ERROR: x and y don't share the same number of vectors");
__assert(x->size_ >= devMat->n, "ERROR: x vector's size is not >= to matrix size (columns)");
__assert(y->size_ >= devMat->m, "ERROR: y vector's size is not >= to matrix size (rows)");
#endif
//fprintf(stderr,"Calling dCSGAMV \n");
dCSGAMV(handle, beta,(double *)y->v_, alpha,
(double *)devMat->val, devMat->ja, devMat->irp,
devMat->m,devMat->n,y->count_, devMat->numBlocks, devMat->rowBlocks,
(double *)x->v_, 1, rb);
}

@ -0,0 +1,65 @@
/* Parallel Sparse BLAS GPU plugin */
/* (C) Copyright 2013 */
/* Salvatore Filippone */
/* Alessandro Fanfarillo */
/* Redistribution and use in source and binary forms, with or without */
/* modification, are permitted provided that the following conditions */
/* are met: */
/* 1. Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* 2. Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions, and the following disclaimer in the */
/* documentation and/or other materials provided with the distribution. */
/* 3. The name of the PSBLAS group or the names of its contributors may */
/* not be used to endorse or promote products derived from this */
/* software without specific written permission. */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
/* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
/* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
/* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */
/* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */
/* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */
/* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */
/* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
#ifndef DCSGA_
#define DCSGA_
#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#include <cusparse_v2.h>
#include "cintrf.h"
#include "dcusparse.h"
#include "fcusparse.h"
#include "fcusparse_dat.h"
#define MAX_NNZ_PER_WG 4096
int d_CSGADeviceFree(d_Cmat *Matrix);
int d_CSGADeviceAlloc(d_Cmat *Matrix,int nr, int nc, int nz);
void d_CSGAComputeRowBlocks(int totalRows, int* irp, int* numBlocks, int *rowBlocks);
int d_CSGAHost2Device(d_Cmat *Matrix,int nr, int nc, int nz,
int *irp, int *ja, double *val, int numBlocks, int *rowBlocks);
int d_CSGADevice2Host(d_Cmat *Matrix,int nr, int nc, int nz,
int *irp, int *ja, double *val, int *numBlocks, int *rowBlocks);
int d_spmvCSGADevice(d_Cmat *Matrix, double alpha, void* deviceX,
double beta, void* deviceY, int *rb);
int dCSGAMV(spgpuHandle_t handle, double beta, double* y, double alpha,
const double* as, const int* ja, const int* irp,
int m, int n, int ncol, int numBlocks,
const int* rowBlocks, const double *x,
int baseIndex, int *rb);
#endif

@ -0,0 +1,68 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_d_cuda_csga_from_gpu(a,info)
use psb_base_mod
use elldev_mod
use psb_vectordev_mod
use psb_d_cuda_csga_mat_mod, psb_protect_name => psb_d_cuda_csga_from_gpu
implicit none
class(psb_d_cuda_csga_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: m, n, nz
info = 0
if (.not.(c_associated(a%deviceMat%mat))) then
call a%free()
return
end if
info = CSRGDeviceGetParms(a%deviceMat,m,n,nz)
if (info /= psb_success_) return
if (info == 0) call psb_realloc(m+1,a%irp,info)
if (info == 0) call psb_realloc(nz,a%ja,info)
if (info == 0) call psb_realloc(nz,a%val,info)
if (info == 0) call psb_realloc(m+1,a%rowBlocks,info)
if (info == 0) info = &
& CSGADevice2Host(a%deviceMat,m,n,nz,a%irp,a%ja,a%val,a%numBlocks,a%rowBlocks)
#if (CUDA_SHORT_VERSION <= 10) || (CUDA_VERSION < 11030)
a%irp(:) = a%irp(:)+1
a%ja(:) = a%ja(:)+1
#endif
call a%set_sync()
end subroutine psb_d_cuda_csga_from_gpu

@ -0,0 +1,65 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_d_cuda_csga_mold(a,b,info)
use psb_base_mod
use psb_d_cuda_csga_mat_mod, psb_protect_name => psb_d_cuda_csga_mold
implicit none
class(psb_d_cuda_csga_sparse_mat), intent(in) :: a
class(psb_d_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
Integer(Psb_ipk_) :: err_act
character(len=20) :: name='csga_mold'
logical, parameter :: debug=.false.
call psb_get_erraction(err_act)
info = 0
if (allocated(b)) then
call b%free()
deallocate(b,stat=info)
end if
if (info == 0) allocate(psb_d_cuda_csga_sparse_mat :: b, stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
goto 9999
end if
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_d_cuda_csga_mold

@ -0,0 +1,139 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_d_cuda_csga_to_gpu(a,info,nzrm)
use psb_base_mod
use cusparse_mod
use d_csga_mod
use psb_d_cuda_csga_mat_mod, psb_protect_name => psb_d_cuda_csga_to_gpu
implicit none
class(psb_d_cuda_csga_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_), intent(in), optional :: nzrm
integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz
integer(psb_ipk_) :: nzdi,i,j,k,nrz
integer(psb_ipk_), allocatable :: irpdi(:),jadi(:),rbi(:)
real(psb_dpk_), allocatable :: valdi(:)
info = 0
if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return
m = a%get_nrows()
n = a%get_ncols()
nz = a%get_nzeros()
if (c_associated(a%deviceMat%Mat)) then
info = CSGADeviceFree(a%deviceMat)
end if
call psb_realloc(m+1,a%rowBlocks,info)
if (a%is_unit()) then
!
! CUSPARSE has the habit of storing the diagonal and then ignoring,
! whereas we do not store it. Hence this adapter code.
!
nzdi = nz + m
if (info == 0) info = CSGADeviceAlloc(a%deviceMat,m,n,nzdi)
if (info == 0) then
if (a%is_unit()) then
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit)
else
info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit)
end if
end if
!!! We are explicitly adding the diagonal
if ((info == 0) .and. a%is_triangle()) then
if ((info == 0).and.a%is_upper()) then
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper)
else
info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower)
end if
end if
if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info)
if (info == 0) then
irpdi(1) = 1
if (a%is_triangle().and.a%is_upper()) then
do i=1,m
j = irpdi(i)
jadi(j) = i
valdi(j) = done
nrz = a%irp(i+1)-a%irp(i)
jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)
valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1)
irpdi(i+1) = j + nrz + 1
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
end do
else
do i=1,m
j = irpdi(i)
nrz = a%irp(i+1)-a%irp(i)
jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)
valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1)
jadi(j+nrz) = i
valdi(j+nrz) = done
irpdi(i+1) = j + nrz + 1
! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz)
end do
end if
end if
call CSGAComputeRowBlocks(m,a%irp,a%numBlocks,a%rowBlocks)
a%irp(:) = a%irp(:) -1
a%ja(:) = a%ja(:) -1
a%rowBlocks(:) = a%rowBlocks(:) -1
if (info == 0) info = CSGAHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi,&
& a%numBlocks,a%rowBlocks)
a%irp(:) = a%irp(:) +1
a%ja(:) = a%ja(:) +1
a%rowBlocks(:) = a%rowBlocks(:) +1
else
if (info == 0) info = CSGADeviceAlloc(a%deviceMat,m,n,nz)
call CSGAComputeRowBlocks(m,a%irp,a%numBlocks,a%rowBlocks)
!!$ write(0,*) 'to_gpu: ',a%numBlocks,':',&
!!$ & a%rowBlocks(1:2),a%rowBlocks(a%numBlocks:a%numBlocks+1)
a%irp(:) = a%irp(:) -1
a%ja(:) = a%ja(:) -1
a%rowBlocks(:) = a%rowBlocks(:) -1
if (info == 0) info = CSGAHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val,&
& a%numBlocks,a%rowBlocks)
a%irp(:) = a%irp(:) +1
a%ja(:) = a%ja(:) +1
a%rowBlocks(:) = a%rowBlocks(:) +1
endif
call a%set_sync()
if (info /= 0) then
write(0,*) 'Error in CSGA_TO_GPU ',info
end if
end subroutine psb_d_cuda_csga_to_gpu

@ -0,0 +1,119 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_d_cuda_csga_vect_mv(alpha,a,x,beta,y,info,trans)
use psb_base_mod
use cusparse_mod
use elldev_mod
use psb_vectordev_mod
use psb_d_cuda_csga_mat_mod, psb_protect_name => psb_d_cuda_csga_vect_mv
use psb_d_cuda_vect_mod
implicit none
class(psb_d_cuda_csga_sparse_mat), intent(in) :: a
real(psb_dpk_), intent(in) :: alpha, beta
class(psb_d_base_vect_type), intent(inout) :: x
class(psb_d_base_vect_type), intent(inout) :: y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
real(psb_dpk_), allocatable :: rx(:), ry(:)
logical :: tra
character :: trans_
Integer(Psb_ipk_) :: err_act
character(len=20) :: name='d_cuda_csga_vect_mv'
call psb_erractionsave(err_act)
info = psb_success_
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info,name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
if (tra) then
if (.not.x%is_host()) call x%sync()
if (beta /= dzero) then
if (.not.y%is_host()) call y%sync()
end if
call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans)
call y%set_host()
else
if (a%is_host()) call a%sync()
select type (xx => x)
type is (psb_d_vect_cuda)
select type(yy => y)
type is (psb_d_vect_cuda)
if (xx%is_host()) call xx%sync()
if (beta /= dzero) then
if (yy%is_host()) call yy%sync()
end if
info = spmvCSGADevice(a%deviceMat,alpha,xx%deviceVect,&
& beta,yy%deviceVect,a%rowBlocks)
!!$ info = spmvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,&
!!$ & beta,yy%deviceVect)
if (info /= 0) then
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='spmvCSRGDevice',i_err=(/info,izero,izero,izero,izero/))
info = psb_err_from_subroutine_ai_
goto 9999
end if
call yy%set_dev()
class default
rx = xx%get_vect()
ry = y%get_vect()
call a%psb_d_csr_sparse_mat%spmm(alpha,rx,beta,ry,info)
call y%bld(ry)
end select
class default
rx = x%get_vect()
ry = y%get_vect()
call a%psb_d_csr_sparse_mat%spmm(alpha,rx,beta,ry,info)
call y%bld(ry)
end select
end if
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_d_cuda_csga_vect_mv

@ -0,0 +1,49 @@
#pragma once
/*
* spGPU - Sparse matrices on GPU library.
*
* Copyright (C) 2010 - 2013
* Davide Barbieri - University of Rome Tor Vergata
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 3 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include "core.h"
#include "cuComplex.h"
/** \addtogroup diaFun DIA/HDIA Format
* @{
*/
#ifdef __cplusplus
extern "C" {
#endif
int dCSGAMV(spgpuHandle_t handle,
double beta,
double* y,
double alpha,
const double* as,
const int* ja,
const int* irp,
int m,
int n,
int ncol,
int numBlocks,
const int* rowBlocks,
const double *x,
int baseIndex,
int *rb);
/** @}*/
#ifdef __cplusplus
}
#endif
Loading…
Cancel
Save