Adding support to HDIAG SpMM

cuda-multivect
gabrielequatrana 5 months ago
parent 0490dd77db
commit bdd04a6911

@ -275,19 +275,26 @@ psb_d_cuda_cp_hdiag_from_coo.o \
psb_d_cuda_mv_hdiag_from_coo.o \
psb_d_cuda_hdiag_to_gpu.o \
psb_d_cuda_hdiag_csmv.o \
psb_d_cuda_hdiag_csmm.o \
psb_d_cuda_hdiag_mold.o \
psb_d_cuda_hdiag_vect_mv.o \
psb_s_cuda_cp_hdiag_from_coo.o \
psb_s_cuda_mv_hdiag_from_coo.o \
psb_s_cuda_hdiag_to_gpu.o \
psb_s_cuda_hdiag_csmv.o \
psb_s_cuda_hdiag_csmm.o \
psb_s_cuda_hdiag_mold.o \
psb_s_cuda_hdiag_vect_mv.o \
psb_s_cuda_dnsg_mat_impl.o \
psb_d_cuda_dnsg_mat_impl.o \
psb_c_cuda_dnsg_mat_impl.o \
psb_z_cuda_dnsg_mat_impl.o
psb_z_cuda_dnsg_mat_impl.o \
psb_z_cuda_hdiag_csmv.o \
psb_z_cuda_hdiag_csmm.o \
psb_z_cuda_hdiag_vect_mv.o \
psb_c_cuda_hdiag_csmv.o \
psb_c_cuda_hdiag_csmm.o \
psb_c_cuda_hdiag_vect_mv.o
objs: $(OBJS)
lib: objs

@ -0,0 +1,123 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_c_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans)
use psb_base_mod
use hdiagdev_mod
use psb_vectordev_mod
use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_hdiag_csmm
implicit none
class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a
complex(psb_spk_), intent(in) :: alpha, beta, x(:,:)
complex(psb_spk_), intent(inout) :: y(:,:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
character :: trans_
integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy
complex(psb_spk_), allocatable :: acc(:)
type(c_ptr) :: gpX, gpY
logical :: tra
Integer(Psb_ipk_) :: err_act
character(len=20) :: name='c_cuda_hdiag_csmm'
logical, parameter :: debug=.false.
info = psb_success_
call psb_erractionsave(err_act)
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info,name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
if (tra) then
m = a%get_ncols()
n = a%get_nrows()
else
n = a%get_ncols()
m = a%get_nrows()
end if
if (size(x,1)<n) then
info = 36
call psb_errpush(info,name,i_err=(/3*ione,n,izero,izero,izero/))
goto 9999
end if
if (size(y,1)<m) then
info = 36
call psb_errpush(info,name,i_err=(/5*ione,m,izero,izero,izero/))
goto 9999
end if
if (tra) then
if (a%is_dev()) call a%sync()
call a%psb_d_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans)
else
!
! Just to test, move X/Y to/from the GPU.
!
nxy = min(size(x,2),size(y,2))
if (info == 0) &
& info = FallocMultiVecDevice(gpX,nxy,size(x,1),spgpu_type_double)
if (info == 0) &
& info = writeMultiVecDevice(gpX,x,size(x,1))
if (info == 0) &
& info = FallocMultiVecDevice(gpY,nxy,size(y,1),spgpu_type_double)
if (info == 0) &
& info = writeMultiVecDevice(gpY,y,size(y,1))
if (info == 0) &
& info = spmmHdiagDevice(a%deviceMat,alpha,gpX,beta,gpY)
if (info == 0) &
& info = readMultiVecDevice(gpY,y,size(y,1))
if (info /= 0) goto 9999
call freeMultiVecDevice(gpX)
call freeMultiVecDevice(gpY)
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_hdiag_csmm

@ -115,3 +115,94 @@ subroutine psb_c_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans)
return
end subroutine psb_c_cuda_hdiag_vect_mv
subroutine psb_c_cuda_hdiag_multivect_mv(alpha,a,x,beta,y,info,trans)
use psb_base_mod
use hdiagdev_mod
use psb_vectordev_mod
use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_hdiag_multivect_mv
use psb_c_cuda_multivect_mod
implicit none
class(psb_c_cuda_hdiag_mat_mod), intent(in) :: a
complex(psb_spk_), intent(in) :: alpha, beta
class(psb_c_base_multivect_type), intent(inout) :: x
class(psb_c_base_multivect_type), intent(inout) :: y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
complex(psb_spk_), allocatable :: rx(:,:), ry(:,:)
logical :: tra
character :: trans_
Integer(Psb_ipk_) :: err_act
character(len=20) :: name='c_cuda_hdiag_multivect_mv'
call psb_erractionsave(err_act)
info = psb_success_
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info,name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
if (tra) then
if (a%is_dev()) call a%sync()
if (.not.x%is_host()) call x%sync()
if (beta /= dzero) then
if (.not.y%is_host()) call y%sync()
end if
call a%psb_d_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans)
call y%set_host()
else
if (a%is_host()) call a%sync()
select type (xx => x)
type is (psb_c_multivect_cuda)
select type(yy => y)
type is (psb_c_multivect_cuda)
if (a%is_host()) call a%sync()
if (xx%is_host()) call xx%sync()
if (beta /= dzero) then
if (yy%is_host()) call yy%sync()
end if
info = spmmHdiagDevice(a%deviceMat,alpha,xx%deviceVect,&
& beta,yy%deviceVect)
if (info /= 0) then
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='spmmHDIAGDevice',i_err=(/info,izero,izero,izero,izero/))
info = psb_err_from_subroutine_ai_
goto 9999
end if
call yy%set_dev()
class default
if (a%is_dev()) call a%sync()
rx = xx%get_vect()
ry = y%get_vect()
call a%spmm(alpha,rx,beta,ry,info)
call y%bld(ry)
end select
class default
if (a%is_dev()) call a%sync()
rx = x%get_vect()
ry = y%get_vect()
call a%spmm(alpha,rx,beta,ry,info)
call y%bld(ry)
end select
end if
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_cuda_hdiag_multivect_mv

@ -0,0 +1,123 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_d_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans)
use psb_base_mod
use hdiagdev_mod
use psb_vectordev_mod
use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_hdiag_csmm
implicit none
class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a
real(psb_dpk_), intent(in) :: alpha, beta, x(:,:)
real(psb_dpk_), intent(inout) :: y(:,:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
character :: trans_
integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy
real(psb_dpk_), allocatable :: acc(:)
type(c_ptr) :: gpX, gpY
logical :: tra
Integer(Psb_ipk_) :: err_act
character(len=20) :: name='d_cuda_hdiag_csmm'
logical, parameter :: debug=.false.
info = psb_success_
call psb_erractionsave(err_act)
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info,name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
if (tra) then
m = a%get_ncols()
n = a%get_nrows()
else
n = a%get_ncols()
m = a%get_nrows()
end if
if (size(x,1)<n) then
info = 36
call psb_errpush(info,name,i_err=(/3*ione,n,izero,izero,izero/))
goto 9999
end if
if (size(y,1)<m) then
info = 36
call psb_errpush(info,name,i_err=(/5*ione,m,izero,izero,izero/))
goto 9999
end if
if (tra) then
if (a%is_dev()) call a%sync()
call a%psb_d_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans)
else
!
! Just to test, move X/Y to/from the GPU.
!
nxy = min(size(x,2),size(y,2))
if (info == 0) &
& info = FallocMultiVecDevice(gpX,nxy,size(x,1),spgpu_type_double)
if (info == 0) &
& info = writeMultiVecDevice(gpX,x,size(x,1))
if (info == 0) &
& info = FallocMultiVecDevice(gpY,nxy,size(y,1),spgpu_type_double)
if (info == 0) &
& info = writeMultiVecDevice(gpY,y,size(y,1))
if (info == 0) &
& info = spmmHdiagDevice(a%deviceMat,alpha,gpX,beta,gpY)
if (info == 0) &
& info = readMultiVecDevice(gpY,y,size(y,1))
if (info /= 0) goto 9999
call freeMultiVecDevice(gpX)
call freeMultiVecDevice(gpY)
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_d_cuda_hdiag_csmm

@ -115,3 +115,94 @@ subroutine psb_d_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans)
return
end subroutine psb_d_cuda_hdiag_vect_mv
subroutine psb_d_cuda_hdiag_multivect_mv(alpha,a,x,beta,y,info,trans)
use psb_base_mod
use hdiagdev_mod
use psb_vectordev_mod
use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_hdiag_multivect_mv
use psb_d_cuda_multivect_mod
implicit none
class(psb_d_cuda_hdiag_mat_mod), intent(in) :: a
real(psb_dpk_), intent(in) :: alpha, beta
class(psb_d_base_multivect_type), intent(inout) :: x
class(psb_d_base_multivect_type), intent(inout) :: y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
real(psb_dpk_), allocatable :: rx(:,:), ry(:,:)
logical :: tra
character :: trans_
Integer(Psb_ipk_) :: err_act
character(len=20) :: name='d_cuda_hdiag_multivect_mv'
call psb_erractionsave(err_act)
info = psb_success_
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info,name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
if (tra) then
if (a%is_dev()) call a%sync()
if (.not.x%is_host()) call x%sync()
if (beta /= dzero) then
if (.not.y%is_host()) call y%sync()
end if
call a%psb_d_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans)
call y%set_host()
else
if (a%is_host()) call a%sync()
select type (xx => x)
type is (psb_d_multivect_cuda)
select type(yy => y)
type is (psb_d_multivect_cuda)
if (a%is_host()) call a%sync()
if (xx%is_host()) call xx%sync()
if (beta /= dzero) then
if (yy%is_host()) call yy%sync()
end if
info = spmmHdiagDevice(a%deviceMat,alpha,xx%deviceVect,&
& beta,yy%deviceVect)
if (info /= 0) then
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='spmmHDIAGDevice',i_err=(/info,izero,izero,izero,izero/))
info = psb_err_from_subroutine_ai_
goto 9999
end if
call yy%set_dev()
class default
if (a%is_dev()) call a%sync()
rx = xx%get_vect()
ry = y%get_vect()
call a%spmm(alpha,rx,beta,ry,info)
call y%bld(ry)
end select
class default
if (a%is_dev()) call a%sync()
rx = x%get_vect()
ry = y%get_vect()
call a%spmm(alpha,rx,beta,ry,info)
call y%bld(ry)
end select
end if
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_d_cuda_hdiag_multivect_mv

@ -0,0 +1,123 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_s_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans)
use psb_base_mod
use hdiagdev_mod
use psb_vectordev_mod
use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_hdiag_csmm
implicit none
class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a
real(psb_spk_), intent(in) :: alpha, beta, x(:,:)
real(psb_spk_), intent(inout) :: y(:,:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
character :: trans_
integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy
real(psb_spk_), allocatable :: acc(:)
type(c_ptr) :: gpX, gpY
logical :: tra
Integer(Psb_ipk_) :: err_act
character(len=20) :: name='s_cuda_hdiag_csmm'
logical, parameter :: debug=.false.
info = psb_success_
call psb_erractionsave(err_act)
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info,name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
if (tra) then
m = a%get_ncols()
n = a%get_nrows()
else
n = a%get_ncols()
m = a%get_nrows()
end if
if (size(x,1)<n) then
info = 36
call psb_errpush(info,name,i_err=(/3*ione,n,izero,izero,izero/))
goto 9999
end if
if (size(y,1)<m) then
info = 36
call psb_errpush(info,name,i_err=(/5*ione,m,izero,izero,izero/))
goto 9999
end if
if (tra) then
if (a%is_dev()) call a%sync()
call a%psb_d_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans)
else
!
! Just to test, move X/Y to/from the GPU.
!
nxy = min(size(x,2),size(y,2))
if (info == 0) &
& info = FallocMultiVecDevice(gpX,nxy,size(x,1),spgpu_type_double)
if (info == 0) &
& info = writeMultiVecDevice(gpX,x,size(x,1))
if (info == 0) &
& info = FallocMultiVecDevice(gpY,nxy,size(y,1),spgpu_type_double)
if (info == 0) &
& info = writeMultiVecDevice(gpY,y,size(y,1))
if (info == 0) &
& info = spmmHdiagDevice(a%deviceMat,alpha,gpX,beta,gpY)
if (info == 0) &
& info = readMultiVecDevice(gpY,y,size(y,1))
if (info /= 0) goto 9999
call freeMultiVecDevice(gpX)
call freeMultiVecDevice(gpY)
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_cuda_hdiag_csmm

@ -115,3 +115,94 @@ subroutine psb_s_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans)
return
end subroutine psb_s_cuda_hdiag_vect_mv
subroutine psb_s_cuda_hdiag_multivect_mv(alpha,a,x,beta,y,info,trans)
use psb_base_mod
use hdiagdev_mod
use psb_vectordev_mod
use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_hdiag_multivect_mv
use psb_s_cuda_multivect_mod
implicit none
class(psb_s_cuda_hdiag_mat_mod), intent(in) :: a
real(psb_spk_), intent(in) :: alpha, beta
class(psb_s_base_multivect_type), intent(inout) :: x
class(psb_s_base_multivect_type), intent(inout) :: y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
real(psb_spk_), allocatable :: rx(:,:), ry(:,:)
logical :: tra
character :: trans_
Integer(Psb_ipk_) :: err_act
character(len=20) :: name='s_cuda_hdiag_multivect_mv'
call psb_erractionsave(err_act)
info = psb_success_
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info,name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
if (tra) then
if (a%is_dev()) call a%sync()
if (.not.x%is_host()) call x%sync()
if (beta /= dzero) then
if (.not.y%is_host()) call y%sync()
end if
call a%psb_d_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans)
call y%set_host()
else
if (a%is_host()) call a%sync()
select type (xx => x)
type is (psb_s_multivect_cuda)
select type(yy => y)
type is (psb_s_multivect_cuda)
if (a%is_host()) call a%sync()
if (xx%is_host()) call xx%sync()
if (beta /= dzero) then
if (yy%is_host()) call yy%sync()
end if
info = spmmHdiagDevice(a%deviceMat,alpha,xx%deviceVect,&
& beta,yy%deviceVect)
if (info /= 0) then
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='spmmHDIAGDevice',i_err=(/info,izero,izero,izero,izero/))
info = psb_err_from_subroutine_ai_
goto 9999
end if
call yy%set_dev()
class default
if (a%is_dev()) call a%sync()
rx = xx%get_vect()
ry = y%get_vect()
call a%spmm(alpha,rx,beta,ry,info)
call y%bld(ry)
end select
class default
if (a%is_dev()) call a%sync()
rx = x%get_vect()
ry = y%get_vect()
call a%spmm(alpha,rx,beta,ry,info)
call y%bld(ry)
end select
end if
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_cuda_hdiag_multivect_mv

@ -0,0 +1,123 @@
! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_z_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans)
use psb_base_mod
use hdiagdev_mod
use psb_vectordev_mod
use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_hdiag_csmm
implicit none
class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a
complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:)
complex(psb_dpk_), intent(inout) :: y(:,:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
character :: trans_
integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy
complex(psb_dpk_), allocatable :: acc(:)
type(c_ptr) :: gpX, gpY
logical :: tra
Integer(Psb_ipk_) :: err_act
character(len=20) :: name='z_cuda_hdiag_csmm'
logical, parameter :: debug=.false.
info = psb_success_
call psb_erractionsave(err_act)
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info,name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
if (tra) then
m = a%get_ncols()
n = a%get_nrows()
else
n = a%get_ncols()
m = a%get_nrows()
end if
if (size(x,1)<n) then
info = 36
call psb_errpush(info,name,i_err=(/3*ione,n,izero,izero,izero/))
goto 9999
end if
if (size(y,1)<m) then
info = 36
call psb_errpush(info,name,i_err=(/5*ione,m,izero,izero,izero/))
goto 9999
end if
if (tra) then
if (a%is_dev()) call a%sync()
call a%psb_d_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans)
else
!
! Just to test, move X/Y to/from the GPU.
!
nxy = min(size(x,2),size(y,2))
if (info == 0) &
& info = FallocMultiVecDevice(gpX,nxy,size(x,1),spgpu_type_double)
if (info == 0) &
& info = writeMultiVecDevice(gpX,x,size(x,1))
if (info == 0) &
& info = FallocMultiVecDevice(gpY,nxy,size(y,1),spgpu_type_double)
if (info == 0) &
& info = writeMultiVecDevice(gpY,y,size(y,1))
if (info == 0) &
& info = spmmHdiagDevice(a%deviceMat,alpha,gpX,beta,gpY)
if (info == 0) &
& info = readMultiVecDevice(gpY,y,size(y,1))
if (info /= 0) goto 9999
call freeMultiVecDevice(gpX)
call freeMultiVecDevice(gpY)
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_z_cuda_hdiag_csmm

@ -115,3 +115,94 @@ subroutine psb_z_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans)
return
end subroutine psb_z_cuda_hdiag_vect_mv
subroutine psb_z_cuda_hdiag_multivect_mv(alpha,a,x,beta,y,info,trans)
use psb_base_mod
use hdiagdev_mod
use psb_vectordev_mod
use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_hdiag_multivect_mv
use psb_z_cuda_multivect_mod
implicit none
class(psb_z_cuda_hdiag_mat_mod), intent(in) :: a
complex(psb_dpk_), intent(in) :: alpha, beta
class(psb_z_base_multivect_type), intent(inout) :: x
class(psb_z_base_multivect_type), intent(inout) :: y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
complex(psb_dpk_), allocatable :: rx(:,:), ry(:,:)
logical :: tra
character :: trans_
Integer(Psb_ipk_) :: err_act
character(len=20) :: name='z_cuda_hdiag_multivect_mv'
call psb_erractionsave(err_act)
info = psb_success_
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info,name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C')
if (tra) then
if (a%is_dev()) call a%sync()
if (.not.x%is_host()) call x%sync()
if (beta /= dzero) then
if (.not.y%is_host()) call y%sync()
end if
call a%psb_d_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans)
call y%set_host()
else
if (a%is_host()) call a%sync()
select type (xx => x)
type is (psb_z_multivect_cuda)
select type(yy => y)
type is (psb_z_multivect_cuda)
if (a%is_host()) call a%sync()
if (xx%is_host()) call xx%sync()
if (beta /= dzero) then
if (yy%is_host()) call yy%sync()
end if
info = spmmHdiagDevice(a%deviceMat,alpha,xx%deviceVect,&
& beta,yy%deviceVect)
if (info /= 0) then
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='spmmHDIAGDevice',i_err=(/info,izero,izero,izero,izero/))
info = psb_err_from_subroutine_ai_
goto 9999
end if
call yy%set_dev()
class default
if (a%is_dev()) call a%sync()
rx = xx%get_vect()
ry = y%get_vect()
call a%spmm(alpha,rx,beta,ry,info)
call y%bld(ry)
end select
class default
if (a%is_dev()) call a%sync()
rx = x%get_vect()
ry = y%get_vect()
call a%spmm(alpha,rx,beta,ry,info)
call y%bld(ry)
end select
end if
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_z_cuda_hdiag_multivect_mv

@ -44,7 +44,8 @@ module psb_c_cuda_hdiag_mat_mod
procedure, nopass :: get_fmt => c_cuda_hdiag_get_fmt
! procedure, pass(a) :: sizeof => c_cuda_hdiag_sizeof
procedure, pass(a) :: vect_mv => psb_c_cuda_hdiag_vect_mv
! procedure, pass(a) :: csmm => psb_c_cuda_hdiag_csmm
procedure, pass(a) :: multivect_mv => psb_c_cuda_hdiag_multivect_mv
procedure, pass(a) :: csmm => psb_c_cuda_hdiag_csmm
procedure, pass(a) :: csmv => psb_c_cuda_hdiag_csmv
! procedure, pass(a) :: in_vect_sv => psb_c_cuda_hdiag_inner_vect_sv
! procedure, pass(a) :: scals => psb_c_cuda_hdiag_scals
@ -77,6 +78,15 @@ module psb_c_cuda_hdiag_mat_mod
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
end subroutine psb_c_cuda_hdiag_vect_mv
subroutine psb_c_cuda_hdiag_multivect_mv(alpha,a,x,beta,y,info,trans)
import :: psb_c_cuda_hdiag_sparse_mat, psb_spk_, psb_c_base_multivect_type, psb_ipk_
class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a
complex(psb_spk_), intent(in) :: alpha, beta
class(psb_c_base_multivect_type), intent(inout) :: x
class(psb_c_base_multivect_type), intent(inout) :: y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
end subroutine psb_c_cuda_hdiag_multivect_mv
end interface
!!$ interface
@ -172,17 +182,17 @@ module psb_c_cuda_hdiag_mat_mod
end subroutine psb_c_cuda_hdiag_csmv
end interface
!!$ interface
!!$ subroutine psb_c_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans)
!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_
!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a
!!$ complex(psb_spk_), intent(in) :: alpha, beta, x(:,:)
!!$ complex(psb_spk_), intent(inout) :: y(:,:)
!!$ integer(psb_ipk_), intent(out) :: info
!!$ character, optional, intent(in) :: trans
!!$ end subroutine psb_c_cuda_hdiag_csmm
!!$ end interface
!!$
interface
subroutine psb_c_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans)
import :: psb_c_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_
class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a
complex(psb_spk_), intent(in) :: alpha, beta, x(:,:)
complex(psb_spk_), intent(inout) :: y(:,:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
end subroutine psb_c_cuda_hdiag_csmm
end interface
!!$ interface
!!$ subroutine psb_c_cuda_hdiag_scal(d,a,info, side)
!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_

@ -44,7 +44,8 @@ module psb_d_cuda_hdiag_mat_mod
procedure, nopass :: get_fmt => d_cuda_hdiag_get_fmt
! procedure, pass(a) :: sizeof => d_cuda_hdiag_sizeof
procedure, pass(a) :: vect_mv => psb_d_cuda_hdiag_vect_mv
! procedure, pass(a) :: csmm => psb_d_cuda_hdiag_csmm
procedure, pass(a) :: multivect_mv => psb_d_cuda_hdiag_multivect_mv
procedure, pass(a) :: csmm => psb_d_cuda_hdiag_csmm
procedure, pass(a) :: csmv => psb_d_cuda_hdiag_csmv
! procedure, pass(a) :: in_vect_sv => psb_d_cuda_hdiag_inner_vect_sv
! procedure, pass(a) :: scals => psb_d_cuda_hdiag_scals
@ -77,6 +78,15 @@ module psb_d_cuda_hdiag_mat_mod
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
end subroutine psb_d_cuda_hdiag_vect_mv
subroutine psb_d_cuda_hdiag_multivect_mv(alpha,a,x,beta,y,info,trans)
import :: psb_d_cuda_hdiag_sparse_mat, psb_dpk_, psb_d_base_multivect_type, psb_ipk_
class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a
real(psb_dpk_), intent(in) :: alpha, beta
class(psb_d_base_multivect_type), intent(inout) :: x
class(psb_d_base_multivect_type), intent(inout) :: y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
end subroutine psb_d_cuda_hdiag_multivect_mv
end interface
!!$ interface
@ -172,17 +182,17 @@ module psb_d_cuda_hdiag_mat_mod
end subroutine psb_d_cuda_hdiag_csmv
end interface
!!$ interface
!!$ subroutine psb_d_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans)
!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_
!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a
!!$ real(psb_dpk_), intent(in) :: alpha, beta, x(:,:)
!!$ real(psb_dpk_), intent(inout) :: y(:,:)
!!$ integer(psb_ipk_), intent(out) :: info
!!$ character, optional, intent(in) :: trans
!!$ end subroutine psb_d_cuda_hdiag_csmm
!!$ end interface
!!$
interface
subroutine psb_d_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans)
import :: psb_d_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_
class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a
real(psb_dpk_), intent(in) :: alpha, beta, x(:,:)
real(psb_dpk_), intent(inout) :: y(:,:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
end subroutine psb_d_cuda_hdiag_csmm
end interface
!!$ interface
!!$ subroutine psb_d_cuda_hdiag_scal(d,a,info, side)
!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_

@ -44,7 +44,8 @@ module psb_s_cuda_hdiag_mat_mod
procedure, nopass :: get_fmt => s_cuda_hdiag_get_fmt
! procedure, pass(a) :: sizeof => s_cuda_hdiag_sizeof
procedure, pass(a) :: vect_mv => psb_s_cuda_hdiag_vect_mv
! procedure, pass(a) :: csmm => psb_s_cuda_hdiag_csmm
procedure, pass(a) :: multivect_mv => psb_s_cuda_hdiag_multivect_mv
procedure, pass(a) :: csmm => psb_s_cuda_hdiag_csmm
procedure, pass(a) :: csmv => psb_s_cuda_hdiag_csmv
! procedure, pass(a) :: in_vect_sv => psb_s_cuda_hdiag_inner_vect_sv
! procedure, pass(a) :: scals => psb_s_cuda_hdiag_scals
@ -77,6 +78,15 @@ module psb_s_cuda_hdiag_mat_mod
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
end subroutine psb_s_cuda_hdiag_vect_mv
subroutine psb_s_cuda_hdiag_multivect_mv(alpha,a,x,beta,y,info,trans)
import :: psb_s_cuda_hdiag_sparse_mat, psb_spk_, psb_s_base_multivect_type, psb_ipk_
class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a
real(psb_spk_), intent(in) :: alpha, beta
class(psb_s_base_multivect_type), intent(inout) :: x
class(psb_s_base_multivect_type), intent(inout) :: y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
end subroutine psb_s_cuda_hdiag_multivect_mv
end interface
!!$ interface
@ -172,17 +182,17 @@ module psb_s_cuda_hdiag_mat_mod
end subroutine psb_s_cuda_hdiag_csmv
end interface
!!$ interface
!!$ subroutine psb_s_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans)
!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_
!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a
!!$ real(psb_spk_), intent(in) :: alpha, beta, x(:,:)
!!$ real(psb_spk_), intent(inout) :: y(:,:)
!!$ integer(psb_ipk_), intent(out) :: info
!!$ character, optional, intent(in) :: trans
!!$ end subroutine psb_s_cuda_hdiag_csmm
!!$ end interface
!!$
interface
subroutine psb_s_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans)
import :: psb_s_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_
class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a
real(psb_spk_), intent(in) :: alpha, beta, x(:,:)
real(psb_spk_), intent(inout) :: y(:,:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
end subroutine psb_s_cuda_hdiag_csmm
end interface
!!$ interface
!!$ subroutine psb_s_cuda_hdiag_scal(d,a,info, side)
!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_

@ -44,7 +44,8 @@ module psb_z_cuda_hdiag_mat_mod
procedure, nopass :: get_fmt => z_cuda_hdiag_get_fmt
! procedure, pass(a) :: sizeof => z_cuda_hdiag_sizeof
procedure, pass(a) :: vect_mv => psb_z_cuda_hdiag_vect_mv
! procedure, pass(a) :: csmm => psb_z_cuda_hdiag_csmm
procedure, pass(a) :: multivect_mv => psb_z_cuda_hdiag_multivect_mv
procedure, pass(a) :: csmm => psb_z_cuda_hdiag_csmm
procedure, pass(a) :: csmv => psb_z_cuda_hdiag_csmv
! procedure, pass(a) :: in_vect_sv => psb_z_cuda_hdiag_inner_vect_sv
! procedure, pass(a) :: scals => psb_z_cuda_hdiag_scals
@ -77,6 +78,15 @@ module psb_z_cuda_hdiag_mat_mod
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
end subroutine psb_z_cuda_hdiag_vect_mv
subroutine psb_z_cuda_hdiag_multivect_mv(alpha,a,x,beta,y,info,trans)
import :: psb_z_cuda_hdiag_sparse_mat, psb_dpk_, psb_z_base_multivect_type, psb_ipk_
class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a
complex(psb_dpk_), intent(in) :: alpha, beta
class(psb_z_base_multivect_type), intent(inout) :: x
class(psb_z_base_multivect_type), intent(inout) :: y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
end subroutine psb_z_cuda_hdiag_multivect_mv
end interface
!!$ interface
@ -172,17 +182,17 @@ module psb_z_cuda_hdiag_mat_mod
end subroutine psb_z_cuda_hdiag_csmv
end interface
!!$ interface
!!$ subroutine psb_z_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans)
!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_
!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a
!!$ complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:)
!!$ complex(psb_dpk_), intent(inout) :: y(:,:)
!!$ integer(psb_ipk_), intent(out) :: info
!!$ character, optional, intent(in) :: trans
!!$ end subroutine psb_z_cuda_hdiag_csmm
!!$ end interface
!!$
interface
subroutine psb_z_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans)
import :: psb_z_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_
class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a
complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:)
complex(psb_dpk_), intent(inout) :: y(:,:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
end subroutine psb_z_cuda_hdiag_csmm
end interface
!!$ interface
!!$ subroutine psb_z_cuda_hdiag_scal(d,a,info, side)
!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_

@ -17,8 +17,10 @@ OBJS=cabs.o camax.o casum.o caxpby.o caxy.o cdot.o cgath.o \
dscal.o dscat.o dsetscal.o ell_ccsput.o ell_cspmv.o ell_dcsput.o \
ell_dspmv.o ell_scsput.o ell_sspmv.o ell_zcsput.o ell_zspmv.o \
ell_cspmm.o ell_dspmm.o ell_sspmm.o ell_zspmm.o \
hdia_cspmv.o hdia_dspmv.o hdia_sspmv.o hdia_zspmv.o hell_cspmv.o hell_dspmv.o \
hell_sspmv.o hell_zspmv.o hell_cspmm.o hell_dspmm.o hell_sspmm.o hell_zspmm.o \
hdia_cspmv.o hdia_cspmm.o hdia_dspmv.o hdia_dspmm.o \
hdia_sspmv.o hdia_sspmm.o hdia_zspmv.o hdia_zspmm.o \
hell_cspmv.o hell_dspmv.o hell_sspmv.o hell_zspmv.o \
hell_cspmm.o hell_dspmm.o hell_sspmm.o hell_zspmm.o \
igath.o iscat.o isetscal.o sabs.o samax.o sasum.o \
saxpby.o saxy.o sdot.o sgath.o snrm2.o sscal.o sscat.o ssetscal.o zabs.o zamax.o sabgdxyz.o\
zasum.o zaxpby.o zaxy.o zdot.o zgath.o znrm2.o zscal.o zscat.o zsetscal.o zabgdxyz.o \

@ -25,7 +25,6 @@ CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn)
const VALUE_TYPE *x, int xPitch,
VALUE_TYPE beta, int baseIndex)
{
// TODO
VALUE_TYPE *pz,*px,*py;
VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)();
VALUE_TYPE yVal;

@ -0,0 +1,35 @@
/*
* spGPU - Sparse matrices on GPU library.
*
* Copyright (C) 2010 - 2015
* Davide Barbieri - University of Rome Tor Vergata
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 3 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include "cudadebug.h"
#include "cudalang.h"
#include "cuComplex.h"
extern "C"
{
#include "core.h"
#include "hdia.h"
int getGPUSharedMemPerBlock();
int getGPUMultiProcessors();
int getGPUMaxThreadsPerMP();
}
#include "debug.h"
#define VALUE_TYPE cuFloatComplex
#define TYPE_SYMBOL C
#define TEX_FETCH_TYPE cuFloatComplex
#include "hdia_spmm_base.cuh"

@ -0,0 +1,36 @@
/*
* spGPU - Sparse matrices on GPU library.
*
* Copyright (C) 2010 - 2014
* Davide Barbieri - University of Rome Tor Vergata
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 3 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include "cudadebug.h"
#include "cudalang.h"
extern "C"
{
#include "core.h"
#include "hdia.h"
int getGPUSharedMemPerBlock();
int getGPUMultiProcessors();
int getGPUMaxThreadsPerMP();
}
#include "debug.h"
//#define ENABLE_CACHE
#define VALUE_TYPE double
#define TYPE_SYMBOL D
//#define TEX_FETCH_TYPE int2
#include "hdia_spmm_base.cuh"

@ -0,0 +1,100 @@
/*
* spGPU - Sparse matrices on GPU library.
*
* Copyright (C) 2010 - 2015
* Davide Barbieri - University of Rome Tor Vergata
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 3 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#define PRE_CONCAT(A, B) A ## B
#define CONCAT(A, B) PRE_CONCAT(A, B)
#undef GEN_SPGPU_HDIA_NAME
#undef X_TEX
#define X_TEX CONCAT(x_tex_, FUNC_SUFFIX)
__device__ __host__ static float zero_float() { return 0.0f; }
__device__ __host__ static cuFloatComplex zero_cuFloatComplex() { return make_cuFloatComplex(0.0, 0.0); }
__device__ __host__ static bool float_isNotZero(float x) { return x != 0.0f; }
__device__ static float float_fma(float a, float b, float c) { return PREC_FADD(PREC_FMUL (a, b), c); }
__device__ static float float_add(float a, float b) { return PREC_FADD (a, b); }
__device__ static float float_mul(float a, float b) { return PREC_FMUL (a, b); }
__device__ static cuFloatComplex cuFloatComplex_fma(cuFloatComplex a, cuFloatComplex b, cuFloatComplex c) { return cuCfmaf(a, b, c); }
__device__ static cuFloatComplex cuFloatComplex_add(cuFloatComplex a, cuFloatComplex b) { return cuCaddf(a, b); }
__device__ static cuFloatComplex cuFloatComplex_mul(cuFloatComplex a, cuFloatComplex b) { return cuCmulf(a, b); }
__device__ static float readValue_float(float fetch) { return fetch; }
__device__ static cuFloatComplex readValue_cuFloatComplex(cuFloatComplex fetch) { return fetch; }
// host or c.c >= 1.3
#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__)
__device__ __host__ static double zero_double() { return 0.0; }
__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); }
__device__ __host__ static bool double_isNotZero(double x) { return x != 0.0; }
__device__ static double double_fma(double a, double b, double c) { return PREC_DADD(PREC_DMUL (a, b), c); }
__device__ static double double_add(double a, double b) { return PREC_DADD (a, b); }
__device__ static double double_mul(double a, double b) { return PREC_DMUL (a, b); }
__device__ static cuDoubleComplex cuDoubleComplex_fma(cuDoubleComplex a, cuDoubleComplex b, cuDoubleComplex c) { return cuCfma(a, b, c); }
__device__ static cuDoubleComplex cuDoubleComplex_add(cuDoubleComplex a, cuDoubleComplex b) { return cuCadd(a, b); }
__device__ static cuDoubleComplex cuDoubleComplex_mul(cuDoubleComplex a, cuDoubleComplex b) { return cuCmul(a, b); }
__device__ static double readValue_double(int2 fetch) { return __hiloint2double (fetch.y, fetch.x); }
__device__ static cuDoubleComplex readValue_cuDoubleComplex(int4 fetch)
{
cuDoubleComplex c;
c.x = __hiloint2double (fetch.y, fetch.x);
c.y = __hiloint2double (fetch.w, fetch.z);
return c;
}
#endif
#if 0
// Texture cache management
texture < TEX_FETCH_TYPE, 1, cudaReadModeElementType > X_TEX;
#define bind_tex_x(x) cudaBindTexture(NULL, X_TEX, x)
#define unbind_tex_x(x) cudaUnbindTexture(X_TEX)
__device__ static VALUE_TYPE
fetchTex (int pointer)
{
TEX_FETCH_TYPE fetch = tex1Dfetch (X_TEX, pointer);
return CONCAT(readValue_,VALUE_TYPE) (fetch);
}
#endif
#define GEN_SPGPU_HDIA_NAME(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_vanilla)
#define GEN_SPGPU_HDIA_NAME_VANILLA(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_vanilla)
#include "hdia_spmv_base_template.cuh"
#if 0
#undef GEN_SPGPU_HDIA_NAME
#define GEN_SPGPU_HDIA_NAME(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_prefetch)
#define GEN_SPGPU_HDIA_NAME_PREFETCH(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_prefetch)
#undef USE_PREFETCHING
#define USE_PREFETCHING
#include "hdia_spmv_base_template.cuh"
#define ENABLE_CACHE
#undef GEN_SPGPU_HDIA_NAME
#define GEN_SPGPU_HDIA_NAME(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_texcache_prefetch)
#define GEN_SPGPU_HDIA_NAME_TEX_PREFETCH(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_texcache_prefetch)
#include "hdia_spmv_base_template.cuh"
#undef GEN_SPGPU_HDIA_NAME
#undef USE_PREFETCHING
#define GEN_SPGPU_HDIA_NAME(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_texcache)
#define GEN_SPGPU_HDIA_NAME_TEX(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_texcache)
#include "hdia_spmv_base_template.cuh"
#endif
#undef GEN_SPGPU_HDIA_NAME
#define GEN_SPGPU_HDIA_NAME(x) CONCAT(CONCAT(spgpu,x),hdiaspmv)

@ -0,0 +1,18 @@
/*
* spGPU - Sparse matrices on GPU library.
*
* Copyright (C) 2010 - 2015
* Davide Barbieri - University of Rome Tor Vergata
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 3 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#define THREAD_BLOCK 128

@ -0,0 +1,35 @@
/*
* spGPU - Sparse matrices on GPU library.
*
* Copyright (C) 2010 - 2015
* Davide Barbieri - University of Rome Tor Vergata
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 3 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include "cudadebug.h"
#include "cudalang.h"
extern "C"
{
#include "core.h"
#include "hdia.h"
int getGPUSharedMemPerBlock();
int getGPUMultiProcessors();
int getGPUMaxThreadsPerMP();
}
#include "debug.h"
#define VALUE_TYPE float
#define TYPE_SYMBOL S
#define TEX_FETCH_TYPE float
#include "hdia_spmm_base.cuh"

@ -0,0 +1,36 @@
/*
* spGPU - Sparse matrices on GPU library.
*
* Copyright (C) 2010 - 2015
* Davide Barbieri - University of Rome Tor Vergata
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 3 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include "cudadebug.h"
#include "cudalang.h"
#include "cuComplex.h"
extern "C"
{
#include "core.h"
#include "hdia.h"
int getGPUSharedMemPerBlock();
int getGPUMultiProcessors();
int getGPUMaxThreadsPerMP();
}
#include "debug.h"
#define VALUE_TYPE cuDoubleComplex
#define TYPE_SYMBOL Z
#define TEX_FETCH_TYPE int4
#include "hdia_spmm_base.cuh"
Loading…
Cancel
Save