From 655c86caeda10756c110adf8863280bfab85da63 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 29 Nov 2023 10:20:38 +0100 Subject: [PATCH] Updated docs. --- README.md | 16 ++++++++++++++- cuda/License-spgpu.md | 21 ++++++++++++++++++++ test/cudakern/dpdegenmv.F90 | 4 ++-- test/cudakern/spdegenmv.F90 | 39 ++++++++++++++++++++----------------- 4 files changed, 59 insertions(+), 21 deletions(-) create mode 100644 cuda/License-spgpu.md diff --git a/README.md b/README.md index a9813f5e..afab1646 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -PSBLAS library, version 3.8 +PSBLAS library, version 3.9 =========================== The architecture of the Fortran 2003 sparse BLAS is described in: @@ -40,6 +40,15 @@ The main reference for the serial sparse BLAS is: >linear algebra subprograms for sparse matrices: a user level interface, >ACM Trans. Math. Softw., 23(3), 379-401, 1997. +CUDA and GPU support +-------------------- +This version of PSBLAS incorporates into a single package three +entities that were previouslty separated: +1. PSBLAS -- the base library +2. PSBLAS-EXT -- a library providing additional storage formats +3. SPGPU -- a package of kernels for NVIDIA GPUs originally + written by Davide Barbieri and Salvatore Filippone; + see the license file cuda/License-spgpu.md INSTALLING ---------- @@ -61,6 +70,11 @@ prerequisites (see also SERIAL below): specify `--with-amd` (see `./configure --help` for more details). We use the C interface to AMD. +5. If you have CUDA available, use + --with-cuda= to specify the CUDA toolkit location + --with-cudacc=XX,YY,ZZ to specify a list of target CCs (compute + capabilities) to compile the CUDA code for. + The configure script will generate a Make.inc file suitable for building the library. The script is capable of recognizing the needed libraries with their default names; if they are in unusual places consider adding diff --git a/cuda/License-spgpu.md b/cuda/License-spgpu.md new file mode 100644 index 00000000..7f4b8ff4 --- /dev/null +++ b/cuda/License-spgpu.md @@ -0,0 +1,21 @@ +(c) Copyright 2011-2021 Davide Barbieri, Salvatore Filippone + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY + EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/test/cudakern/dpdegenmv.F90 b/test/cudakern/dpdegenmv.F90 index d2cc2172..bde57f5f 100644 --- a/test/cudakern/dpdegenmv.F90 +++ b/test/cudakern/dpdegenmv.F90 @@ -631,7 +631,7 @@ program pdgenmv stop endif if(psb_get_errstatus() /= 0) goto 9999 - name='pdegenmv-gpu' + name='pdegenmv-cuda' ! ! Hello world ! @@ -974,7 +974,7 @@ contains if (iam == 0) then write(*,*) 'CPU side format?' read(psb_inp_unit,*) acfmt - write(*,*) 'GPU side format?' + write(*,*) 'CUDA side format?' read(psb_inp_unit,*) agfmt write(*,*) 'Size of discretization cube?' read(psb_inp_unit,*) idim diff --git a/test/cudakern/spdegenmv.F90 b/test/cudakern/spdegenmv.F90 index 1c7d646f..9644d8c7 100644 --- a/test/cudakern/spdegenmv.F90 +++ b/test/cudakern/spdegenmv.F90 @@ -548,7 +548,7 @@ program pdgenmv use psb_util_mod use psb_ext_mod #ifdef HAVE_GPU - use psb_gpu_mod + use psb_cuda_mod #endif use psb_s_pde3d_mod implicit none @@ -570,8 +570,8 @@ program pdgenmv ! dense matrices type(psb_s_vect_type), target :: xv,bv, xg, bg #ifdef HAVE_GPU - type(psb_s_vect_gpu) :: vmold - type(psb_i_vect_gpu) :: imold + type(psb_s_vect_cuda) :: vmold + type(psb_i_vect_cuda) :: imold #endif real(psb_spk_), allocatable :: x1(:), x2(:), x0(:) ! blacs parameters @@ -589,14 +589,14 @@ program pdgenmv type(psb_s_dia_sparse_mat), target :: adia type(psb_s_hdia_sparse_mat), target :: ahdia #ifdef HAVE_GPU - type(psb_s_elg_sparse_mat), target :: aelg - type(psb_s_csrg_sparse_mat), target :: acsrg + type(psb_s_cuda_elg_sparse_mat), target :: aelg + type(psb_s_cuda_csrg_sparse_mat), target :: acsrg #if CUDA_SHORT_VERSION <= 10 - type(psb_s_hybg_sparse_mat), target :: ahybg + type(psb_s_cuda_hybg_sparse_mat), target :: ahybg #endif - type(psb_s_hlg_sparse_mat), target :: ahlg - type(psb_s_dnsg_sparse_mat), target :: adnsg - type(psb_s_hdiag_sparse_mat), target :: ahdiag + type(psb_s_cuda_hlg_sparse_mat), target :: ahlg + type(psb_s_cuda_hdiag_sparse_mat), target :: ahdiag + type(psb_s_cuda_dnsg_sparse_mat), target :: adnsg #endif class(psb_s_base_sparse_mat), pointer :: agmold, acmold ! other variables @@ -613,7 +613,10 @@ program pdgenmv call psb_info(ctxt,iam,np) #ifdef HAVE_GPU - call psb_gpu_init(ctxt) + call psb_cuda_init(ctxt) +#endif +#ifdef HAVE_RSB + call psb_rsb_init() #endif if (iam < 0) then @@ -622,7 +625,7 @@ program pdgenmv stop endif if(psb_get_errstatus() /= 0) goto 9999 - name='pdegenmv-gpu' + name='pdegenmv-cuda' ! ! Hello world ! @@ -632,7 +635,7 @@ program pdgenmv end if #ifdef HAVE_GPU write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() - write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName()) + write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_cuda_DeviceName()) #endif ! ! get parameters @@ -752,7 +755,7 @@ program pdgenmv call psb_barrier(ctxt) t1 = psb_wtime() call agpu%cscnv(info,mold=agmold) - call psb_gpu_DeviceSync() + call psb_cuda_DeviceSync() t2 = psb_Wtime() -t1 call psb_amx(ctxt,t2) if (j==1) tcnvg1 = t2 @@ -789,7 +792,7 @@ program pdgenmv end if end do - call psb_gpu_DeviceSync() + call psb_cuda_DeviceSync() call psb_barrier(ctxt) tt2 = psb_wtime() - tt1 call psb_amx(ctxt,tt2) @@ -817,7 +820,7 @@ program pdgenmv end if end do - call psb_gpu_DeviceSync() + call psb_cuda_DeviceSync() call psb_barrier(ctxt) gt2 = psb_wtime() - gt1 call psb_amx(ctxt,gt2) @@ -919,7 +922,7 @@ program pdgenmv #ifdef HAVE_GPU bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (GPU) : ",F20.3)') bdwdth - bdwdth = psb_gpu_MemoryPeakBandwidth() + bdwdth = psb_cuda_MemoryPeakBandwidth() write(psb_out_unit,'("MBYTES/S peak bandwidth (GPU) : ",F20.3)') bdwdth #endif write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() @@ -941,7 +944,7 @@ program pdgenmv goto 9999 end if #ifdef HAVE_GPU - call psb_gpu_exit() + call psb_cuda_exit() #endif call psb_exit(ctxt) stop @@ -965,7 +968,7 @@ contains if (iam == 0) then write(*,*) 'CPU side format?' read(psb_inp_unit,*) acfmt - write(*,*) 'GPU side format?' + write(*,*) 'CUDA side format?' read(psb_inp_unit,*) agfmt write(*,*) 'Size of discretization cube?' read(psb_inp_unit,*) idim