You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
psblas3/cuda/psb_cuda_env_mod.F90

351 lines
10 KiB
Fortran

! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
module psb_cuda_env_mod
use psb_const_mod
use iso_c_binding
use base_cusparse_mod
! interface psb_cuda_init
! module procedure psb_cuda_init
! end interface
use core_mod
interface
function psb_cudaGetHandle() &
& result(res) bind(c,name='psb_cudaGetHandle')
use iso_c_binding
type(c_ptr) :: res
end function psb_cudaGetHandle
end interface
interface
function psb_cudaGetStream() &
& result(res) bind(c,name='psb_cudaGetStream')
use iso_c_binding
type(c_ptr) :: res
end function psb_cudaGetStream
end interface
interface
function psb_C_gpu_init(dev) &
& result(res) bind(c,name='gpuInit')
use iso_c_binding
integer(c_int),value :: dev
integer(c_int) :: res
end function psb_C_gpu_init
end interface
interface
function psb_cuda_inner_getDeviceCount() &
& result(res) bind(c,name='getDeviceCount')
use iso_c_binding
integer(c_int) :: res
end function psb_cuda_inner_getDeviceCount
end interface
interface
function psb_cuda_getDevice() &
& result(res) bind(c,name='getDevice')
use iso_c_binding
integer(c_int) :: res
end function psb_cuda_getDevice
end interface
interface
function psb_cuda_setDevice(dev) &
& result(res) bind(c,name='setDevice')
use iso_c_binding
integer(c_int), value :: dev
integer(c_int) :: res
end function psb_cuda_setDevice
end interface
interface
subroutine psb_cudaCreateHandle() &
& bind(c,name='psb_cudaCreateHandle')
use iso_c_binding
end subroutine psb_cudaCreateHandle
end interface
interface
subroutine psb_cudaSetStream(handle,stream) &
& bind(c,name='psb_cudaSetStream')
use iso_c_binding
type(c_ptr), value :: handle, stream
end subroutine psb_cudaSetStream
end interface
interface
subroutine psb_cudaDestroyHandle() &
& bind(c,name='psb_cudaDestroyHandle')
use iso_c_binding
end subroutine psb_cudaDestroyHandle
end interface
interface
subroutine psb_cuda_innerReset() &
& bind(c,name='cudaReset')
use iso_c_binding
end subroutine psb_cuda_innerReset
end interface
interface
subroutine psb_cuda_innerClose() &
& bind(c,name='gpuClose')
use iso_c_binding
end subroutine psb_cuda_innerClose
end interface
interface
function psb_C_DeviceHasUVA() &
& result(res) bind(c,name='DeviceHasUVA')
use iso_c_binding
integer(c_int) :: res
end function psb_C_DeviceHasUVA
end interface
interface
function psb_C_get_MultiProcessors() &
& result(res) bind(c,name='getGPUMultiProcessors')
use iso_c_binding
integer(c_int) :: res
end function psb_C_get_MultiProcessors
function psb_C_get_MemoryBusWidth() &
& result(res) bind(c,name='getGPUMemoryBusWidth')
use iso_c_binding
integer(c_int) :: res
end function psb_C_get_MemoryBusWidth
function psb_C_get_MemoryClockRate() &
& result(res) bind(c,name='getGPUMemoryClockRate')
use iso_c_binding
integer(c_int) :: res
end function psb_C_get_MemoryClockRate
function psb_C_get_WarpSize() &
& result(res) bind(c,name='getGPUWarpSize')
use iso_c_binding
integer(c_int) :: res
end function psb_C_get_WarpSize
function psb_C_get_MaxThreadsPerMP() &
& result(res) bind(c,name='getGPUMaxThreadsPerMP')
use iso_c_binding
integer(c_int) :: res
end function psb_C_get_MaxThreadsPerMP
function psb_C_get_MaxRegistersPerBlock() &
& result(res) bind(c,name='getGPUMaxRegistersPerBlock')
use iso_c_binding
integer(c_int) :: res
end function psb_C_get_MaxRegistersPerBlock
end interface
interface
subroutine psb_C_cpy_NameString(cstring) &
& bind(c,name='cpyGPUNameString')
use iso_c_binding
character(c_char) :: cstring(*)
end subroutine psb_C_cpy_NameString
end interface
logical, private :: gpu_do_maybe_free_buffer = .false.
integer(psb_epk_), save :: total_cuda_mem=0
Contains
subroutine trackCudaAlloc(data,size)
integer(psb_epk_), intent(in) :: size
character(len=*), intent(in) :: data
total_cuda_mem = total_cuda_mem + size
write(0,*) 'Tracking cuda Alloc for data ',&
& data,' size ',size,' total ',total_cuda_mem
end subroutine trackCudaAlloc
subroutine trackCudaFree(data,size)
integer(psb_epk_), intent(in) :: size
character(len=*), intent(in) :: data
total_cuda_mem = total_cuda_mem - size
write(0,*) 'Tracking cuda Free for data ',&
& data,' size ',size,' total ',total_cuda_mem
end subroutine trackCudaFree
function psb_cuda_get_maybe_free_buffer() result(res)
logical :: res
res = gpu_do_maybe_free_buffer
end function psb_cuda_get_maybe_free_buffer
subroutine psb_cuda_set_maybe_free_buffer(val)
logical, intent(in) :: val
gpu_do_maybe_free_buffer = val
end subroutine psb_cuda_set_maybe_free_buffer
! !!!!!!!!!!!!!!!!!!!!!!
!
! Environment handling
!
! !!!!!!!!!!!!!!!!!!!!!!
subroutine psb_cuda_init(ctxt,dev)
use psb_penv_mod
use psb_const_mod
use psb_error_mod
type(psb_ctxt_type), intent(in) :: ctxt
integer, intent(in), optional :: dev
integer :: np, npavail, iam, info, count, dev_
Integer(Psb_ipk_) :: err_act
info = psb_success_
call psb_erractionsave(err_act)
#if defined(SERIAL_MPI)
iam = 0
#else
call psb_info(ctxt,iam,np)
#endif
count = psb_cuda_getDeviceCount()
if (present(dev)) then
info = psb_C_gpu_init(dev)
else
if (count >0) then
dev_ = mod(iam,count)
else
dev_ = 0
end if
info = psb_C_gpu_init(dev_)
end if
if (info == 0) info = initFcusparse()
if (info /= 0) then
call psb_errpush(psb_err_internal_error_,'psb_cuda_init')
goto 9999
end if
call psb_cudaCreateHandle()
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psb_cuda_init
subroutine psb_cuda_DeviceSync()
call psb_cudaSync()
end subroutine psb_cuda_DeviceSync
function psb_cuda_getDeviceCount() result(res)
integer :: res
res = psb_cuda_inner_getDeviceCount()
end function psb_cuda_getDeviceCount
subroutine psb_cuda_exit()
integer :: res
res = closeFcusparse()
call psb_cuda_innerClose()
call psb_cuda_innerReset()
end subroutine psb_cuda_exit
function psb_cuda_DeviceHasUVA() result(res)
logical :: res
res = (psb_C_DeviceHasUVA() == 1)
end function psb_cuda_DeviceHasUVA
function psb_cuda_MultiProcessors() result(res)
integer(psb_ipk_) :: res
res = psb_C_get_MultiProcessors()
end function psb_cuda_MultiProcessors
function psb_cuda_MaxRegistersPerBlock() result(res)
integer(psb_ipk_) :: res
res = psb_C_get_MaxRegistersPerBlock()
end function psb_cuda_MaxRegistersPerBlock
function psb_cuda_MaxThreadsPerMP() result(res)
integer(psb_ipk_) :: res
res = psb_C_get_MaxThreadsPerMP()
end function psb_cuda_MaxThreadsPerMP
function psb_cuda_WarpSize() result(res)
integer(psb_ipk_) :: res
res = psb_C_get_WarpSize()
end function psb_cuda_WarpSize
function psb_cuda_MemoryClockRate() result(res)
integer(psb_ipk_) :: res
res = psb_C_get_MemoryClockRate()
end function psb_cuda_MemoryClockRate
function psb_cuda_MemoryBusWidth() result(res)
integer(psb_ipk_) :: res
res = psb_C_get_MemoryBusWidth()
end function psb_cuda_MemoryBusWidth
function psb_cuda_MemoryPeakBandwidth() result(res)
real(psb_dpk_) :: res
! Formula here: 2*ClockRate(KHz)*BusWidth(bit)
! normalization: bit/byte, KHz/MHz
! output: MBytes/s
res = 2.d0*0.125d0*1.d-3*psb_C_get_MemoryBusWidth()*psb_C_get_MemoryClockRate()
end function psb_cuda_MemoryPeakBandwidth
function psb_cuda_DeviceName() result(res)
character(len=256) :: res
character :: cstring(256)
call psb_C_cpy_NameString(cstring)
call stringc2f(cstring,res)
end function psb_cuda_DeviceName
subroutine stringc2f(cstring,fstring)
character(c_char) :: cstring(*)
character(len=*) :: fstring
integer :: i
i = 1
do
if (cstring(i) == c_null_char) exit
if (i > len(fstring)) exit
fstring(i:i) = cstring(i)
i = i + 1
end do
do
if (i > len(fstring)) exit
fstring(i:i) = " "
i = i + 1
end do
return
end subroutine stringc2f
end module psb_cuda_env_mod