Updated docs.

rename-cuda
sfilippone 1 year ago
parent 9b713c177b
commit 655c86caed

@ -1,4 +1,4 @@
PSBLAS library, version 3.8 PSBLAS library, version 3.9
=========================== ===========================
The architecture of the Fortran 2003 sparse BLAS is described in: The architecture of the Fortran 2003 sparse BLAS is described in:
@ -40,6 +40,15 @@ The main reference for the serial sparse BLAS is:
>linear algebra subprograms for sparse matrices: a user level interface, >linear algebra subprograms for sparse matrices: a user level interface,
>ACM Trans. Math. Softw., 23(3), 379-401, 1997. >ACM Trans. Math. Softw., 23(3), 379-401, 1997.
CUDA and GPU support
--------------------
This version of PSBLAS incorporates into a single package three
entities that were previouslty separated:
1. PSBLAS -- the base library
2. PSBLAS-EXT -- a library providing additional storage formats
3. SPGPU -- a package of kernels for NVIDIA GPUs originally
written by Davide Barbieri and Salvatore Filippone;
see the license file cuda/License-spgpu.md
INSTALLING INSTALLING
---------- ----------
@ -61,6 +70,11 @@ prerequisites (see also SERIAL below):
specify `--with-amd` (see `./configure --help` for more details). specify `--with-amd` (see `./configure --help` for more details).
We use the C interface to AMD. We use the C interface to AMD.
5. If you have CUDA available, use
--with-cuda=<path> to specify the CUDA toolkit location
--with-cudacc=XX,YY,ZZ to specify a list of target CCs (compute
capabilities) to compile the CUDA code for.
The configure script will generate a Make.inc file suitable for building The configure script will generate a Make.inc file suitable for building
the library. The script is capable of recognizing the needed libraries the library. The script is capable of recognizing the needed libraries
with their default names; if they are in unusual places consider adding with their default names; if they are in unusual places consider adding

@ -0,0 +1,21 @@
(c) Copyright 2011-2021 Davide Barbieri, Salvatore Filippone
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

@ -631,7 +631,7 @@ program pdgenmv
stop stop
endif endif
if(psb_get_errstatus() /= 0) goto 9999 if(psb_get_errstatus() /= 0) goto 9999
name='pdegenmv-gpu' name='pdegenmv-cuda'
! !
! Hello world ! Hello world
! !
@ -974,7 +974,7 @@ contains
if (iam == 0) then if (iam == 0) then
write(*,*) 'CPU side format?' write(*,*) 'CPU side format?'
read(psb_inp_unit,*) acfmt read(psb_inp_unit,*) acfmt
write(*,*) 'GPU side format?' write(*,*) 'CUDA side format?'
read(psb_inp_unit,*) agfmt read(psb_inp_unit,*) agfmt
write(*,*) 'Size of discretization cube?' write(*,*) 'Size of discretization cube?'
read(psb_inp_unit,*) idim read(psb_inp_unit,*) idim

@ -548,7 +548,7 @@ program pdgenmv
use psb_util_mod use psb_util_mod
use psb_ext_mod use psb_ext_mod
#ifdef HAVE_GPU #ifdef HAVE_GPU
use psb_gpu_mod use psb_cuda_mod
#endif #endif
use psb_s_pde3d_mod use psb_s_pde3d_mod
implicit none implicit none
@ -570,8 +570,8 @@ program pdgenmv
! dense matrices ! dense matrices
type(psb_s_vect_type), target :: xv,bv, xg, bg type(psb_s_vect_type), target :: xv,bv, xg, bg
#ifdef HAVE_GPU #ifdef HAVE_GPU
type(psb_s_vect_gpu) :: vmold type(psb_s_vect_cuda) :: vmold
type(psb_i_vect_gpu) :: imold type(psb_i_vect_cuda) :: imold
#endif #endif
real(psb_spk_), allocatable :: x1(:), x2(:), x0(:) real(psb_spk_), allocatable :: x1(:), x2(:), x0(:)
! blacs parameters ! blacs parameters
@ -589,14 +589,14 @@ program pdgenmv
type(psb_s_dia_sparse_mat), target :: adia type(psb_s_dia_sparse_mat), target :: adia
type(psb_s_hdia_sparse_mat), target :: ahdia type(psb_s_hdia_sparse_mat), target :: ahdia
#ifdef HAVE_GPU #ifdef HAVE_GPU
type(psb_s_elg_sparse_mat), target :: aelg type(psb_s_cuda_elg_sparse_mat), target :: aelg
type(psb_s_csrg_sparse_mat), target :: acsrg type(psb_s_cuda_csrg_sparse_mat), target :: acsrg
#if CUDA_SHORT_VERSION <= 10 #if CUDA_SHORT_VERSION <= 10
type(psb_s_hybg_sparse_mat), target :: ahybg type(psb_s_cuda_hybg_sparse_mat), target :: ahybg
#endif #endif
type(psb_s_hlg_sparse_mat), target :: ahlg type(psb_s_cuda_hlg_sparse_mat), target :: ahlg
type(psb_s_dnsg_sparse_mat), target :: adnsg type(psb_s_cuda_hdiag_sparse_mat), target :: ahdiag
type(psb_s_hdiag_sparse_mat), target :: ahdiag type(psb_s_cuda_dnsg_sparse_mat), target :: adnsg
#endif #endif
class(psb_s_base_sparse_mat), pointer :: agmold, acmold class(psb_s_base_sparse_mat), pointer :: agmold, acmold
! other variables ! other variables
@ -613,7 +613,10 @@ program pdgenmv
call psb_info(ctxt,iam,np) call psb_info(ctxt,iam,np)
#ifdef HAVE_GPU #ifdef HAVE_GPU
call psb_gpu_init(ctxt) call psb_cuda_init(ctxt)
#endif
#ifdef HAVE_RSB
call psb_rsb_init()
#endif #endif
if (iam < 0) then if (iam < 0) then
@ -622,7 +625,7 @@ program pdgenmv
stop stop
endif endif
if(psb_get_errstatus() /= 0) goto 9999 if(psb_get_errstatus() /= 0) goto 9999
name='pdegenmv-gpu' name='pdegenmv-cuda'
! !
! Hello world ! Hello world
! !
@ -632,7 +635,7 @@ program pdgenmv
end if end if
#ifdef HAVE_GPU #ifdef HAVE_GPU
write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount()
write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName()) write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_cuda_DeviceName())
#endif #endif
! !
! get parameters ! get parameters
@ -752,7 +755,7 @@ program pdgenmv
call psb_barrier(ctxt) call psb_barrier(ctxt)
t1 = psb_wtime() t1 = psb_wtime()
call agpu%cscnv(info,mold=agmold) call agpu%cscnv(info,mold=agmold)
call psb_gpu_DeviceSync() call psb_cuda_DeviceSync()
t2 = psb_Wtime() -t1 t2 = psb_Wtime() -t1
call psb_amx(ctxt,t2) call psb_amx(ctxt,t2)
if (j==1) tcnvg1 = t2 if (j==1) tcnvg1 = t2
@ -789,7 +792,7 @@ program pdgenmv
end if end if
end do end do
call psb_gpu_DeviceSync() call psb_cuda_DeviceSync()
call psb_barrier(ctxt) call psb_barrier(ctxt)
tt2 = psb_wtime() - tt1 tt2 = psb_wtime() - tt1
call psb_amx(ctxt,tt2) call psb_amx(ctxt,tt2)
@ -817,7 +820,7 @@ program pdgenmv
end if end if
end do end do
call psb_gpu_DeviceSync() call psb_cuda_DeviceSync()
call psb_barrier(ctxt) call psb_barrier(ctxt)
gt2 = psb_wtime() - gt1 gt2 = psb_wtime() - gt1
call psb_amx(ctxt,gt2) call psb_amx(ctxt,gt2)
@ -919,7 +922,7 @@ program pdgenmv
#ifdef HAVE_GPU #ifdef HAVE_GPU
bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) bdwdth = ngpu*ntests*nbytes/(gt2*1.d6)
write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (GPU) : ",F20.3)') bdwdth write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (GPU) : ",F20.3)') bdwdth
bdwdth = psb_gpu_MemoryPeakBandwidth() bdwdth = psb_cuda_MemoryPeakBandwidth()
write(psb_out_unit,'("MBYTES/S peak bandwidth (GPU) : ",F20.3)') bdwdth write(psb_out_unit,'("MBYTES/S peak bandwidth (GPU) : ",F20.3)') bdwdth
#endif #endif
write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt()
@ -941,7 +944,7 @@ program pdgenmv
goto 9999 goto 9999
end if end if
#ifdef HAVE_GPU #ifdef HAVE_GPU
call psb_gpu_exit() call psb_cuda_exit()
#endif #endif
call psb_exit(ctxt) call psb_exit(ctxt)
stop stop
@ -965,7 +968,7 @@ contains
if (iam == 0) then if (iam == 0) then
write(*,*) 'CPU side format?' write(*,*) 'CPU side format?'
read(psb_inp_unit,*) acfmt read(psb_inp_unit,*) acfmt
write(*,*) 'GPU side format?' write(*,*) 'CUDA side format?'
read(psb_inp_unit,*) agfmt read(psb_inp_unit,*) agfmt
write(*,*) 'Size of discretization cube?' write(*,*) 'Size of discretization cube?'
read(psb_inp_unit,*) idim read(psb_inp_unit,*) idim

Loading…
Cancel
Save