You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
351 lines
10 KiB
Fortran
351 lines
10 KiB
Fortran
! Parallel Sparse BLAS GPU plugin
|
|
! (C) Copyright 2013
|
|
!
|
|
! Salvatore Filippone
|
|
! Alessandro Fanfarillo
|
|
!
|
|
! Redistribution and use in source and binary forms, with or without
|
|
! modification, are permitted provided that the following conditions
|
|
! are met:
|
|
! 1. Redistributions of source code must retain the above copyright
|
|
! notice, this list of conditions and the following disclaimer.
|
|
! 2. Redistributions in binary form must reproduce the above copyright
|
|
! notice, this list of conditions, and the following disclaimer in the
|
|
! documentation and/or other materials provided with the distribution.
|
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
|
! not be used to endorse or promote products derived from this
|
|
! software without specific written permission.
|
|
!
|
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
! POSSIBILITY OF SUCH DAMAGE.
|
|
!
|
|
|
|
|
|
module psb_cuda_env_mod
|
|
use psb_const_mod
|
|
use iso_c_binding
|
|
use base_cusparse_mod
|
|
! interface psb_cuda_init
|
|
! module procedure psb_cuda_init
|
|
! end interface
|
|
use core_mod
|
|
|
|
interface
|
|
function psb_cudaGetHandle() &
|
|
& result(res) bind(c,name='psb_cudaGetHandle')
|
|
use iso_c_binding
|
|
type(c_ptr) :: res
|
|
end function psb_cudaGetHandle
|
|
end interface
|
|
|
|
interface
|
|
function psb_cudaGetStream() &
|
|
& result(res) bind(c,name='psb_cudaGetStream')
|
|
use iso_c_binding
|
|
type(c_ptr) :: res
|
|
end function psb_cudaGetStream
|
|
end interface
|
|
|
|
interface
|
|
function psb_C_gpu_init(dev) &
|
|
& result(res) bind(c,name='gpuInit')
|
|
use iso_c_binding
|
|
integer(c_int),value :: dev
|
|
integer(c_int) :: res
|
|
end function psb_C_gpu_init
|
|
end interface
|
|
|
|
interface
|
|
function psb_cuda_inner_getDeviceCount() &
|
|
& result(res) bind(c,name='getDeviceCount')
|
|
use iso_c_binding
|
|
integer(c_int) :: res
|
|
end function psb_cuda_inner_getDeviceCount
|
|
end interface
|
|
|
|
interface
|
|
function psb_cuda_getDevice() &
|
|
& result(res) bind(c,name='getDevice')
|
|
use iso_c_binding
|
|
integer(c_int) :: res
|
|
end function psb_cuda_getDevice
|
|
end interface
|
|
|
|
interface
|
|
function psb_cuda_setDevice(dev) &
|
|
& result(res) bind(c,name='setDevice')
|
|
use iso_c_binding
|
|
integer(c_int), value :: dev
|
|
integer(c_int) :: res
|
|
end function psb_cuda_setDevice
|
|
end interface
|
|
|
|
|
|
interface
|
|
subroutine psb_cudaCreateHandle() &
|
|
& bind(c,name='psb_cudaCreateHandle')
|
|
use iso_c_binding
|
|
end subroutine psb_cudaCreateHandle
|
|
end interface
|
|
|
|
interface
|
|
subroutine psb_cudaSetStream(handle,stream) &
|
|
& bind(c,name='psb_cudaSetStream')
|
|
use iso_c_binding
|
|
type(c_ptr), value :: handle, stream
|
|
end subroutine psb_cudaSetStream
|
|
end interface
|
|
|
|
interface
|
|
subroutine psb_cudaDestroyHandle() &
|
|
& bind(c,name='psb_cudaDestroyHandle')
|
|
use iso_c_binding
|
|
end subroutine psb_cudaDestroyHandle
|
|
end interface
|
|
|
|
interface
|
|
subroutine psb_cuda_innerReset() &
|
|
& bind(c,name='cudaReset')
|
|
use iso_c_binding
|
|
end subroutine psb_cuda_innerReset
|
|
end interface
|
|
|
|
interface
|
|
subroutine psb_cuda_innerClose() &
|
|
& bind(c,name='gpuClose')
|
|
use iso_c_binding
|
|
end subroutine psb_cuda_innerClose
|
|
end interface
|
|
|
|
interface
|
|
function psb_C_DeviceHasUVA() &
|
|
& result(res) bind(c,name='DeviceHasUVA')
|
|
use iso_c_binding
|
|
integer(c_int) :: res
|
|
end function psb_C_DeviceHasUVA
|
|
end interface
|
|
|
|
interface
|
|
function psb_C_get_MultiProcessors() &
|
|
& result(res) bind(c,name='getGPUMultiProcessors')
|
|
use iso_c_binding
|
|
integer(c_int) :: res
|
|
end function psb_C_get_MultiProcessors
|
|
function psb_C_get_MemoryBusWidth() &
|
|
& result(res) bind(c,name='getGPUMemoryBusWidth')
|
|
use iso_c_binding
|
|
integer(c_int) :: res
|
|
end function psb_C_get_MemoryBusWidth
|
|
function psb_C_get_MemoryClockRate() &
|
|
& result(res) bind(c,name='getGPUMemoryClockRate')
|
|
use iso_c_binding
|
|
integer(c_int) :: res
|
|
end function psb_C_get_MemoryClockRate
|
|
function psb_C_get_WarpSize() &
|
|
& result(res) bind(c,name='getGPUWarpSize')
|
|
use iso_c_binding
|
|
integer(c_int) :: res
|
|
end function psb_C_get_WarpSize
|
|
function psb_C_get_MaxThreadsPerMP() &
|
|
& result(res) bind(c,name='getGPUMaxThreadsPerMP')
|
|
use iso_c_binding
|
|
integer(c_int) :: res
|
|
end function psb_C_get_MaxThreadsPerMP
|
|
function psb_C_get_MaxRegistersPerBlock() &
|
|
& result(res) bind(c,name='getGPUMaxRegistersPerBlock')
|
|
use iso_c_binding
|
|
integer(c_int) :: res
|
|
end function psb_C_get_MaxRegistersPerBlock
|
|
end interface
|
|
interface
|
|
subroutine psb_C_cpy_NameString(cstring) &
|
|
& bind(c,name='cpyGPUNameString')
|
|
use iso_c_binding
|
|
character(c_char) :: cstring(*)
|
|
end subroutine psb_C_cpy_NameString
|
|
end interface
|
|
|
|
logical, private :: gpu_do_maybe_free_buffer = .false.
|
|
|
|
integer(psb_epk_), save :: total_cuda_mem=0
|
|
|
|
Contains
|
|
|
|
subroutine trackCudaAlloc(data,size)
|
|
integer(psb_epk_), intent(in) :: size
|
|
character(len=*), intent(in) :: data
|
|
|
|
total_cuda_mem = total_cuda_mem + size
|
|
write(0,*) 'Tracking cuda Alloc for data ',&
|
|
& data,' size ',size,' total ',total_cuda_mem
|
|
end subroutine trackCudaAlloc
|
|
|
|
subroutine trackCudaFree(data,size)
|
|
integer(psb_epk_), intent(in) :: size
|
|
character(len=*), intent(in) :: data
|
|
|
|
total_cuda_mem = total_cuda_mem - size
|
|
write(0,*) 'Tracking cuda Free for data ',&
|
|
& data,' size ',size,' total ',total_cuda_mem
|
|
end subroutine trackCudaFree
|
|
|
|
function psb_cuda_get_maybe_free_buffer() result(res)
|
|
logical :: res
|
|
res = gpu_do_maybe_free_buffer
|
|
end function psb_cuda_get_maybe_free_buffer
|
|
|
|
subroutine psb_cuda_set_maybe_free_buffer(val)
|
|
logical, intent(in) :: val
|
|
gpu_do_maybe_free_buffer = val
|
|
end subroutine psb_cuda_set_maybe_free_buffer
|
|
|
|
! !!!!!!!!!!!!!!!!!!!!!!
|
|
!
|
|
! Environment handling
|
|
!
|
|
! !!!!!!!!!!!!!!!!!!!!!!
|
|
|
|
|
|
subroutine psb_cuda_init(ctxt,dev)
|
|
use psb_penv_mod
|
|
use psb_const_mod
|
|
use psb_error_mod
|
|
type(psb_ctxt_type), intent(in) :: ctxt
|
|
integer, intent(in), optional :: dev
|
|
|
|
integer :: np, npavail, iam, info, count, dev_
|
|
Integer(Psb_ipk_) :: err_act
|
|
|
|
info = psb_success_
|
|
call psb_erractionsave(err_act)
|
|
#if defined(SERIAL_MPI)
|
|
iam = 0
|
|
#else
|
|
call psb_info(ctxt,iam,np)
|
|
#endif
|
|
|
|
count = psb_cuda_getDeviceCount()
|
|
|
|
if (present(dev)) then
|
|
info = psb_C_gpu_init(dev)
|
|
else
|
|
if (count >0) then
|
|
dev_ = mod(iam,count)
|
|
else
|
|
dev_ = 0
|
|
end if
|
|
info = psb_C_gpu_init(dev_)
|
|
end if
|
|
if (info == 0) info = initFcusparse()
|
|
if (info /= 0) then
|
|
call psb_errpush(psb_err_internal_error_,'psb_cuda_init')
|
|
goto 9999
|
|
end if
|
|
call psb_cudaCreateHandle()
|
|
call psb_erractionrestore(err_act)
|
|
return
|
|
9999 call psb_error_handler(ctxt,err_act)
|
|
|
|
return
|
|
|
|
end subroutine psb_cuda_init
|
|
|
|
|
|
subroutine psb_cuda_DeviceSync()
|
|
call psb_cudaSync()
|
|
end subroutine psb_cuda_DeviceSync
|
|
|
|
function psb_cuda_getDeviceCount() result(res)
|
|
integer :: res
|
|
res = psb_cuda_inner_getDeviceCount()
|
|
end function psb_cuda_getDeviceCount
|
|
|
|
subroutine psb_cuda_exit()
|
|
integer :: res
|
|
res = closeFcusparse()
|
|
call psb_cuda_innerClose()
|
|
call psb_cuda_innerReset()
|
|
end subroutine psb_cuda_exit
|
|
|
|
function psb_cuda_DeviceHasUVA() result(res)
|
|
logical :: res
|
|
res = (psb_C_DeviceHasUVA() == 1)
|
|
end function psb_cuda_DeviceHasUVA
|
|
|
|
function psb_cuda_MultiProcessors() result(res)
|
|
integer(psb_ipk_) :: res
|
|
res = psb_C_get_MultiProcessors()
|
|
end function psb_cuda_MultiProcessors
|
|
|
|
function psb_cuda_MaxRegistersPerBlock() result(res)
|
|
integer(psb_ipk_) :: res
|
|
res = psb_C_get_MaxRegistersPerBlock()
|
|
end function psb_cuda_MaxRegistersPerBlock
|
|
|
|
function psb_cuda_MaxThreadsPerMP() result(res)
|
|
integer(psb_ipk_) :: res
|
|
res = psb_C_get_MaxThreadsPerMP()
|
|
end function psb_cuda_MaxThreadsPerMP
|
|
|
|
function psb_cuda_WarpSize() result(res)
|
|
integer(psb_ipk_) :: res
|
|
res = psb_C_get_WarpSize()
|
|
end function psb_cuda_WarpSize
|
|
|
|
function psb_cuda_MemoryClockRate() result(res)
|
|
integer(psb_ipk_) :: res
|
|
res = psb_C_get_MemoryClockRate()
|
|
end function psb_cuda_MemoryClockRate
|
|
|
|
function psb_cuda_MemoryBusWidth() result(res)
|
|
integer(psb_ipk_) :: res
|
|
res = psb_C_get_MemoryBusWidth()
|
|
end function psb_cuda_MemoryBusWidth
|
|
|
|
function psb_cuda_MemoryPeakBandwidth() result(res)
|
|
real(psb_dpk_) :: res
|
|
! Formula here: 2*ClockRate(KHz)*BusWidth(bit)
|
|
! normalization: bit/byte, KHz/MHz
|
|
! output: MBytes/s
|
|
res = 2.d0*0.125d0*1.d-3*psb_C_get_MemoryBusWidth()*psb_C_get_MemoryClockRate()
|
|
end function psb_cuda_MemoryPeakBandwidth
|
|
|
|
function psb_cuda_DeviceName() result(res)
|
|
character(len=256) :: res
|
|
character :: cstring(256)
|
|
call psb_C_cpy_NameString(cstring)
|
|
call stringc2f(cstring,res)
|
|
end function psb_cuda_DeviceName
|
|
|
|
|
|
subroutine stringc2f(cstring,fstring)
|
|
character(c_char) :: cstring(*)
|
|
character(len=*) :: fstring
|
|
integer :: i
|
|
|
|
i = 1
|
|
do
|
|
if (cstring(i) == c_null_char) exit
|
|
if (i > len(fstring)) exit
|
|
fstring(i:i) = cstring(i)
|
|
i = i + 1
|
|
end do
|
|
do
|
|
if (i > len(fstring)) exit
|
|
fstring(i:i) = " "
|
|
i = i + 1
|
|
end do
|
|
return
|
|
end subroutine stringc2f
|
|
|
|
end module psb_cuda_env_mod
|