Adding support to HDIAG SpMM
parent
0490dd77db
commit
bdd04a6911
@ -0,0 +1,123 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
subroutine psb_c_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans)
|
||||
|
||||
use psb_base_mod
|
||||
use hdiagdev_mod
|
||||
use psb_vectordev_mod
|
||||
use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_hdiag_csmm
|
||||
implicit none
|
||||
class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a
|
||||
complex(psb_spk_), intent(in) :: alpha, beta, x(:,:)
|
||||
complex(psb_spk_), intent(inout) :: y(:,:)
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, optional, intent(in) :: trans
|
||||
|
||||
character :: trans_
|
||||
integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy
|
||||
complex(psb_spk_), allocatable :: acc(:)
|
||||
type(c_ptr) :: gpX, gpY
|
||||
logical :: tra
|
||||
Integer(Psb_ipk_) :: err_act
|
||||
character(len=20) :: name='c_cuda_hdiag_csmm'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
info = psb_success_
|
||||
call psb_erractionsave(err_act)
|
||||
|
||||
if (present(trans)) then
|
||||
trans_ = trans
|
||||
else
|
||||
trans_ = 'N'
|
||||
end if
|
||||
|
||||
if (.not.a%is_asb()) then
|
||||
info = psb_err_invalid_mat_state_
|
||||
call psb_errpush(info,name)
|
||||
goto 9999
|
||||
endif
|
||||
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
|
||||
|
||||
if (tra) then
|
||||
m = a%get_ncols()
|
||||
n = a%get_nrows()
|
||||
else
|
||||
n = a%get_ncols()
|
||||
m = a%get_nrows()
|
||||
end if
|
||||
|
||||
if (size(x,1)<n) then
|
||||
info = 36
|
||||
call psb_errpush(info,name,i_err=(/3*ione,n,izero,izero,izero/))
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
if (size(y,1)<m) then
|
||||
info = 36
|
||||
call psb_errpush(info,name,i_err=(/5*ione,m,izero,izero,izero/))
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
if (tra) then
|
||||
if (a%is_dev()) call a%sync()
|
||||
call a%psb_d_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans)
|
||||
else
|
||||
!
|
||||
! Just to test, move X/Y to/from the GPU.
|
||||
!
|
||||
nxy = min(size(x,2),size(y,2))
|
||||
if (info == 0) &
|
||||
& info = FallocMultiVecDevice(gpX,nxy,size(x,1),spgpu_type_double)
|
||||
if (info == 0) &
|
||||
& info = writeMultiVecDevice(gpX,x,size(x,1))
|
||||
if (info == 0) &
|
||||
& info = FallocMultiVecDevice(gpY,nxy,size(y,1),spgpu_type_double)
|
||||
if (info == 0) &
|
||||
& info = writeMultiVecDevice(gpY,y,size(y,1))
|
||||
|
||||
if (info == 0) &
|
||||
& info = spmmHdiagDevice(a%deviceMat,alpha,gpX,beta,gpY)
|
||||
if (info == 0) &
|
||||
& info = readMultiVecDevice(gpY,y,size(y,1))
|
||||
if (info /= 0) goto 9999
|
||||
call freeMultiVecDevice(gpX)
|
||||
call freeMultiVecDevice(gpY)
|
||||
endif
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_c_cuda_hdiag_csmm
|
@ -0,0 +1,123 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
subroutine psb_d_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans)
|
||||
|
||||
use psb_base_mod
|
||||
use hdiagdev_mod
|
||||
use psb_vectordev_mod
|
||||
use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_hdiag_csmm
|
||||
implicit none
|
||||
class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a
|
||||
real(psb_dpk_), intent(in) :: alpha, beta, x(:,:)
|
||||
real(psb_dpk_), intent(inout) :: y(:,:)
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, optional, intent(in) :: trans
|
||||
|
||||
character :: trans_
|
||||
integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy
|
||||
real(psb_dpk_), allocatable :: acc(:)
|
||||
type(c_ptr) :: gpX, gpY
|
||||
logical :: tra
|
||||
Integer(Psb_ipk_) :: err_act
|
||||
character(len=20) :: name='d_cuda_hdiag_csmm'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
info = psb_success_
|
||||
call psb_erractionsave(err_act)
|
||||
|
||||
if (present(trans)) then
|
||||
trans_ = trans
|
||||
else
|
||||
trans_ = 'N'
|
||||
end if
|
||||
|
||||
if (.not.a%is_asb()) then
|
||||
info = psb_err_invalid_mat_state_
|
||||
call psb_errpush(info,name)
|
||||
goto 9999
|
||||
endif
|
||||
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
|
||||
|
||||
if (tra) then
|
||||
m = a%get_ncols()
|
||||
n = a%get_nrows()
|
||||
else
|
||||
n = a%get_ncols()
|
||||
m = a%get_nrows()
|
||||
end if
|
||||
|
||||
if (size(x,1)<n) then
|
||||
info = 36
|
||||
call psb_errpush(info,name,i_err=(/3*ione,n,izero,izero,izero/))
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
if (size(y,1)<m) then
|
||||
info = 36
|
||||
call psb_errpush(info,name,i_err=(/5*ione,m,izero,izero,izero/))
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
if (tra) then
|
||||
if (a%is_dev()) call a%sync()
|
||||
call a%psb_d_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans)
|
||||
else
|
||||
!
|
||||
! Just to test, move X/Y to/from the GPU.
|
||||
!
|
||||
nxy = min(size(x,2),size(y,2))
|
||||
if (info == 0) &
|
||||
& info = FallocMultiVecDevice(gpX,nxy,size(x,1),spgpu_type_double)
|
||||
if (info == 0) &
|
||||
& info = writeMultiVecDevice(gpX,x,size(x,1))
|
||||
if (info == 0) &
|
||||
& info = FallocMultiVecDevice(gpY,nxy,size(y,1),spgpu_type_double)
|
||||
if (info == 0) &
|
||||
& info = writeMultiVecDevice(gpY,y,size(y,1))
|
||||
|
||||
if (info == 0) &
|
||||
& info = spmmHdiagDevice(a%deviceMat,alpha,gpX,beta,gpY)
|
||||
if (info == 0) &
|
||||
& info = readMultiVecDevice(gpY,y,size(y,1))
|
||||
if (info /= 0) goto 9999
|
||||
call freeMultiVecDevice(gpX)
|
||||
call freeMultiVecDevice(gpY)
|
||||
endif
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_d_cuda_hdiag_csmm
|
@ -0,0 +1,123 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
subroutine psb_s_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans)
|
||||
|
||||
use psb_base_mod
|
||||
use hdiagdev_mod
|
||||
use psb_vectordev_mod
|
||||
use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_hdiag_csmm
|
||||
implicit none
|
||||
class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a
|
||||
real(psb_spk_), intent(in) :: alpha, beta, x(:,:)
|
||||
real(psb_spk_), intent(inout) :: y(:,:)
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, optional, intent(in) :: trans
|
||||
|
||||
character :: trans_
|
||||
integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy
|
||||
real(psb_spk_), allocatable :: acc(:)
|
||||
type(c_ptr) :: gpX, gpY
|
||||
logical :: tra
|
||||
Integer(Psb_ipk_) :: err_act
|
||||
character(len=20) :: name='s_cuda_hdiag_csmm'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
info = psb_success_
|
||||
call psb_erractionsave(err_act)
|
||||
|
||||
if (present(trans)) then
|
||||
trans_ = trans
|
||||
else
|
||||
trans_ = 'N'
|
||||
end if
|
||||
|
||||
if (.not.a%is_asb()) then
|
||||
info = psb_err_invalid_mat_state_
|
||||
call psb_errpush(info,name)
|
||||
goto 9999
|
||||
endif
|
||||
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
|
||||
|
||||
if (tra) then
|
||||
m = a%get_ncols()
|
||||
n = a%get_nrows()
|
||||
else
|
||||
n = a%get_ncols()
|
||||
m = a%get_nrows()
|
||||
end if
|
||||
|
||||
if (size(x,1)<n) then
|
||||
info = 36
|
||||
call psb_errpush(info,name,i_err=(/3*ione,n,izero,izero,izero/))
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
if (size(y,1)<m) then
|
||||
info = 36
|
||||
call psb_errpush(info,name,i_err=(/5*ione,m,izero,izero,izero/))
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
if (tra) then
|
||||
if (a%is_dev()) call a%sync()
|
||||
call a%psb_d_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans)
|
||||
else
|
||||
!
|
||||
! Just to test, move X/Y to/from the GPU.
|
||||
!
|
||||
nxy = min(size(x,2),size(y,2))
|
||||
if (info == 0) &
|
||||
& info = FallocMultiVecDevice(gpX,nxy,size(x,1),spgpu_type_double)
|
||||
if (info == 0) &
|
||||
& info = writeMultiVecDevice(gpX,x,size(x,1))
|
||||
if (info == 0) &
|
||||
& info = FallocMultiVecDevice(gpY,nxy,size(y,1),spgpu_type_double)
|
||||
if (info == 0) &
|
||||
& info = writeMultiVecDevice(gpY,y,size(y,1))
|
||||
|
||||
if (info == 0) &
|
||||
& info = spmmHdiagDevice(a%deviceMat,alpha,gpX,beta,gpY)
|
||||
if (info == 0) &
|
||||
& info = readMultiVecDevice(gpY,y,size(y,1))
|
||||
if (info /= 0) goto 9999
|
||||
call freeMultiVecDevice(gpX)
|
||||
call freeMultiVecDevice(gpY)
|
||||
endif
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_s_cuda_hdiag_csmm
|
@ -0,0 +1,123 @@
|
||||
! Parallel Sparse BLAS GPU plugin
|
||||
! (C) Copyright 2013
|
||||
!
|
||||
! Salvatore Filippone
|
||||
! Alessandro Fanfarillo
|
||||
!
|
||||
! Redistribution and use in source and binary forms, with or without
|
||||
! modification, are permitted provided that the following conditions
|
||||
! are met:
|
||||
! 1. Redistributions of source code must retain the above copyright
|
||||
! notice, this list of conditions and the following disclaimer.
|
||||
! 2. Redistributions in binary form must reproduce the above copyright
|
||||
! notice, this list of conditions, and the following disclaimer in the
|
||||
! documentation and/or other materials provided with the distribution.
|
||||
! 3. The name of the PSBLAS group or the names of its contributors may
|
||||
! not be used to endorse or promote products derived from this
|
||||
! software without specific written permission.
|
||||
!
|
||||
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
||||
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
! POSSIBILITY OF SUCH DAMAGE.
|
||||
!
|
||||
|
||||
subroutine psb_z_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans)
|
||||
|
||||
use psb_base_mod
|
||||
use hdiagdev_mod
|
||||
use psb_vectordev_mod
|
||||
use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_hdiag_csmm
|
||||
implicit none
|
||||
class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a
|
||||
complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:)
|
||||
complex(psb_dpk_), intent(inout) :: y(:,:)
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, optional, intent(in) :: trans
|
||||
|
||||
character :: trans_
|
||||
integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy
|
||||
complex(psb_dpk_), allocatable :: acc(:)
|
||||
type(c_ptr) :: gpX, gpY
|
||||
logical :: tra
|
||||
Integer(Psb_ipk_) :: err_act
|
||||
character(len=20) :: name='z_cuda_hdiag_csmm'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
info = psb_success_
|
||||
call psb_erractionsave(err_act)
|
||||
|
||||
if (present(trans)) then
|
||||
trans_ = trans
|
||||
else
|
||||
trans_ = 'N'
|
||||
end if
|
||||
|
||||
if (.not.a%is_asb()) then
|
||||
info = psb_err_invalid_mat_state_
|
||||
call psb_errpush(info,name)
|
||||
goto 9999
|
||||
endif
|
||||
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
|
||||
|
||||
if (tra) then
|
||||
m = a%get_ncols()
|
||||
n = a%get_nrows()
|
||||
else
|
||||
n = a%get_ncols()
|
||||
m = a%get_nrows()
|
||||
end if
|
||||
|
||||
if (size(x,1)<n) then
|
||||
info = 36
|
||||
call psb_errpush(info,name,i_err=(/3*ione,n,izero,izero,izero/))
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
if (size(y,1)<m) then
|
||||
info = 36
|
||||
call psb_errpush(info,name,i_err=(/5*ione,m,izero,izero,izero/))
|
||||
goto 9999
|
||||
end if
|
||||
|
||||
if (tra) then
|
||||
if (a%is_dev()) call a%sync()
|
||||
call a%psb_d_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans)
|
||||
else
|
||||
!
|
||||
! Just to test, move X/Y to/from the GPU.
|
||||
!
|
||||
nxy = min(size(x,2),size(y,2))
|
||||
if (info == 0) &
|
||||
& info = FallocMultiVecDevice(gpX,nxy,size(x,1),spgpu_type_double)
|
||||
if (info == 0) &
|
||||
& info = writeMultiVecDevice(gpX,x,size(x,1))
|
||||
if (info == 0) &
|
||||
& info = FallocMultiVecDevice(gpY,nxy,size(y,1),spgpu_type_double)
|
||||
if (info == 0) &
|
||||
& info = writeMultiVecDevice(gpY,y,size(y,1))
|
||||
|
||||
if (info == 0) &
|
||||
& info = spmmHdiagDevice(a%deviceMat,alpha,gpX,beta,gpY)
|
||||
if (info == 0) &
|
||||
& info = readMultiVecDevice(gpY,y,size(y,1))
|
||||
if (info /= 0) goto 9999
|
||||
call freeMultiVecDevice(gpX)
|
||||
call freeMultiVecDevice(gpY)
|
||||
endif
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_z_cuda_hdiag_csmm
|
@ -0,0 +1,35 @@
|
||||
/*
|
||||
* spGPU - Sparse matrices on GPU library.
|
||||
*
|
||||
* Copyright (C) 2010 - 2015
|
||||
* Davide Barbieri - University of Rome Tor Vergata
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* version 3 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#include "cudadebug.h"
|
||||
#include "cudalang.h"
|
||||
#include "cuComplex.h"
|
||||
|
||||
extern "C"
|
||||
{
|
||||
#include "core.h"
|
||||
#include "hdia.h"
|
||||
int getGPUSharedMemPerBlock();
|
||||
int getGPUMultiProcessors();
|
||||
int getGPUMaxThreadsPerMP();
|
||||
}
|
||||
|
||||
#include "debug.h"
|
||||
|
||||
#define VALUE_TYPE cuFloatComplex
|
||||
#define TYPE_SYMBOL C
|
||||
#define TEX_FETCH_TYPE cuFloatComplex
|
||||
#include "hdia_spmm_base.cuh"
|
@ -0,0 +1,36 @@
|
||||
/*
|
||||
* spGPU - Sparse matrices on GPU library.
|
||||
*
|
||||
* Copyright (C) 2010 - 2014
|
||||
* Davide Barbieri - University of Rome Tor Vergata
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* version 3 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#include "cudadebug.h"
|
||||
#include "cudalang.h"
|
||||
|
||||
extern "C"
|
||||
{
|
||||
#include "core.h"
|
||||
#include "hdia.h"
|
||||
int getGPUSharedMemPerBlock();
|
||||
int getGPUMultiProcessors();
|
||||
int getGPUMaxThreadsPerMP();
|
||||
}
|
||||
|
||||
#include "debug.h"
|
||||
|
||||
//#define ENABLE_CACHE
|
||||
#define VALUE_TYPE double
|
||||
#define TYPE_SYMBOL D
|
||||
//#define TEX_FETCH_TYPE int2
|
||||
#include "hdia_spmm_base.cuh"
|
||||
|
@ -0,0 +1,100 @@
|
||||
/*
|
||||
* spGPU - Sparse matrices on GPU library.
|
||||
*
|
||||
* Copyright (C) 2010 - 2015
|
||||
* Davide Barbieri - University of Rome Tor Vergata
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* version 3 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
|
||||
#define PRE_CONCAT(A, B) A ## B
|
||||
#define CONCAT(A, B) PRE_CONCAT(A, B)
|
||||
|
||||
#undef GEN_SPGPU_HDIA_NAME
|
||||
#undef X_TEX
|
||||
#define X_TEX CONCAT(x_tex_, FUNC_SUFFIX)
|
||||
|
||||
__device__ __host__ static float zero_float() { return 0.0f; }
|
||||
__device__ __host__ static cuFloatComplex zero_cuFloatComplex() { return make_cuFloatComplex(0.0, 0.0); }
|
||||
__device__ __host__ static bool float_isNotZero(float x) { return x != 0.0f; }
|
||||
|
||||
__device__ static float float_fma(float a, float b, float c) { return PREC_FADD(PREC_FMUL (a, b), c); }
|
||||
__device__ static float float_add(float a, float b) { return PREC_FADD (a, b); }
|
||||
__device__ static float float_mul(float a, float b) { return PREC_FMUL (a, b); }
|
||||
|
||||
__device__ static cuFloatComplex cuFloatComplex_fma(cuFloatComplex a, cuFloatComplex b, cuFloatComplex c) { return cuCfmaf(a, b, c); }
|
||||
__device__ static cuFloatComplex cuFloatComplex_add(cuFloatComplex a, cuFloatComplex b) { return cuCaddf(a, b); }
|
||||
__device__ static cuFloatComplex cuFloatComplex_mul(cuFloatComplex a, cuFloatComplex b) { return cuCmulf(a, b); }
|
||||
|
||||
__device__ static float readValue_float(float fetch) { return fetch; }
|
||||
__device__ static cuFloatComplex readValue_cuFloatComplex(cuFloatComplex fetch) { return fetch; }
|
||||
|
||||
// host or c.c >= 1.3
|
||||
#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__)
|
||||
__device__ __host__ static double zero_double() { return 0.0; }
|
||||
__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); }
|
||||
__device__ __host__ static bool double_isNotZero(double x) { return x != 0.0; }
|
||||
|
||||
__device__ static double double_fma(double a, double b, double c) { return PREC_DADD(PREC_DMUL (a, b), c); }
|
||||
__device__ static double double_add(double a, double b) { return PREC_DADD (a, b); }
|
||||
__device__ static double double_mul(double a, double b) { return PREC_DMUL (a, b); }
|
||||
|
||||
__device__ static cuDoubleComplex cuDoubleComplex_fma(cuDoubleComplex a, cuDoubleComplex b, cuDoubleComplex c) { return cuCfma(a, b, c); }
|
||||
__device__ static cuDoubleComplex cuDoubleComplex_add(cuDoubleComplex a, cuDoubleComplex b) { return cuCadd(a, b); }
|
||||
__device__ static cuDoubleComplex cuDoubleComplex_mul(cuDoubleComplex a, cuDoubleComplex b) { return cuCmul(a, b); }
|
||||
|
||||
__device__ static double readValue_double(int2 fetch) { return __hiloint2double (fetch.y, fetch.x); }
|
||||
__device__ static cuDoubleComplex readValue_cuDoubleComplex(int4 fetch)
|
||||
{
|
||||
cuDoubleComplex c;
|
||||
c.x = __hiloint2double (fetch.y, fetch.x);
|
||||
c.y = __hiloint2double (fetch.w, fetch.z);
|
||||
return c;
|
||||
}
|
||||
#endif
|
||||
#if 0
|
||||
// Texture cache management
|
||||
texture < TEX_FETCH_TYPE, 1, cudaReadModeElementType > X_TEX;
|
||||
|
||||
#define bind_tex_x(x) cudaBindTexture(NULL, X_TEX, x)
|
||||
#define unbind_tex_x(x) cudaUnbindTexture(X_TEX)
|
||||
|
||||
__device__ static VALUE_TYPE
|
||||
fetchTex (int pointer)
|
||||
{
|
||||
TEX_FETCH_TYPE fetch = tex1Dfetch (X_TEX, pointer);
|
||||
return CONCAT(readValue_,VALUE_TYPE) (fetch);
|
||||
}
|
||||
#endif
|
||||
#define GEN_SPGPU_HDIA_NAME(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_vanilla)
|
||||
#define GEN_SPGPU_HDIA_NAME_VANILLA(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_vanilla)
|
||||
#include "hdia_spmv_base_template.cuh"
|
||||
#if 0
|
||||
#undef GEN_SPGPU_HDIA_NAME
|
||||
#define GEN_SPGPU_HDIA_NAME(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_prefetch)
|
||||
#define GEN_SPGPU_HDIA_NAME_PREFETCH(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_prefetch)
|
||||
#undef USE_PREFETCHING
|
||||
#define USE_PREFETCHING
|
||||
#include "hdia_spmv_base_template.cuh"
|
||||
#define ENABLE_CACHE
|
||||
#undef GEN_SPGPU_HDIA_NAME
|
||||
#define GEN_SPGPU_HDIA_NAME(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_texcache_prefetch)
|
||||
#define GEN_SPGPU_HDIA_NAME_TEX_PREFETCH(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_texcache_prefetch)
|
||||
#include "hdia_spmv_base_template.cuh"
|
||||
#undef GEN_SPGPU_HDIA_NAME
|
||||
#undef USE_PREFETCHING
|
||||
#define GEN_SPGPU_HDIA_NAME(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_texcache)
|
||||
#define GEN_SPGPU_HDIA_NAME_TEX(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_texcache)
|
||||
#include "hdia_spmv_base_template.cuh"
|
||||
#endif
|
||||
#undef GEN_SPGPU_HDIA_NAME
|
||||
#define GEN_SPGPU_HDIA_NAME(x) CONCAT(CONCAT(spgpu,x),hdiaspmv)
|
||||
|
@ -0,0 +1,18 @@
|
||||
/*
|
||||
* spGPU - Sparse matrices on GPU library.
|
||||
*
|
||||
* Copyright (C) 2010 - 2015
|
||||
* Davide Barbieri - University of Rome Tor Vergata
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* version 3 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#define THREAD_BLOCK 128
|
||||
|
@ -0,0 +1,35 @@
|
||||
/*
|
||||
* spGPU - Sparse matrices on GPU library.
|
||||
*
|
||||
* Copyright (C) 2010 - 2015
|
||||
* Davide Barbieri - University of Rome Tor Vergata
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* version 3 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#include "cudadebug.h"
|
||||
#include "cudalang.h"
|
||||
|
||||
extern "C"
|
||||
{
|
||||
#include "core.h"
|
||||
#include "hdia.h"
|
||||
int getGPUSharedMemPerBlock();
|
||||
int getGPUMultiProcessors();
|
||||
int getGPUMaxThreadsPerMP();
|
||||
}
|
||||
|
||||
#include "debug.h"
|
||||
|
||||
#define VALUE_TYPE float
|
||||
#define TYPE_SYMBOL S
|
||||
#define TEX_FETCH_TYPE float
|
||||
#include "hdia_spmm_base.cuh"
|
||||
|
@ -0,0 +1,36 @@
|
||||
/*
|
||||
* spGPU - Sparse matrices on GPU library.
|
||||
*
|
||||
* Copyright (C) 2010 - 2015
|
||||
* Davide Barbieri - University of Rome Tor Vergata
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* version 3 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#include "cudadebug.h"
|
||||
#include "cudalang.h"
|
||||
#include "cuComplex.h"
|
||||
|
||||
extern "C"
|
||||
{
|
||||
#include "core.h"
|
||||
#include "hdia.h"
|
||||
int getGPUSharedMemPerBlock();
|
||||
int getGPUMultiProcessors();
|
||||
int getGPUMaxThreadsPerMP();
|
||||
}
|
||||
|
||||
#include "debug.h"
|
||||
|
||||
#define VALUE_TYPE cuDoubleComplex
|
||||
#define TYPE_SYMBOL Z
|
||||
#define TEX_FETCH_TYPE int4
|
||||
#include "hdia_spmm_base.cuh"
|
||||
|
Loading…
Reference in New Issue