You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
176 lines
6.0 KiB
Fortran
176 lines
6.0 KiB
Fortran
! Parallel Sparse BLAS GPU plugin
|
|
! (C) Copyright 2013
|
|
!
|
|
! Salvatore Filippone
|
|
! Alessandro Fanfarillo
|
|
!
|
|
! Redistribution and use in source and binary forms, with or without
|
|
! modification, are permitted provided that the following conditions
|
|
! are met:
|
|
! 1. Redistributions of source code must retain the above copyright
|
|
! notice, this list of conditions and the following disclaimer.
|
|
! 2. Redistributions in binary form must reproduce the above copyright
|
|
! notice, this list of conditions, and the following disclaimer in the
|
|
! documentation and/or other materials provided with the distribution.
|
|
! 3. The name of the PSBLAS group or the names of its contributors may
|
|
! not be used to endorse or promote products derived from this
|
|
! software without specific written permission.
|
|
!
|
|
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
|
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
|
|
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
! POSSIBILITY OF SUCH DAMAGE.
|
|
!
|
|
|
|
|
|
module psi_i_ext_util_mod
|
|
|
|
use psb_base_mod, only : psb_ipk_
|
|
!
|
|
! Hack size for HLL format.
|
|
!
|
|
integer(psb_ipk_), parameter :: psb_hksz_def_ = 32
|
|
integer(psb_ipk_), private, save :: psb_hksz = psb_hksz_def_
|
|
logical, private, save :: psb_hll_use_vector = .true.
|
|
contains
|
|
|
|
function psi_get_hksz() result(res)
|
|
implicit none
|
|
integer(psb_ipk_) :: res
|
|
res = psb_hksz
|
|
end function psi_get_hksz
|
|
|
|
subroutine psi_set_hksz(size)
|
|
implicit none
|
|
integer(psb_ipk_), intent(in) :: size
|
|
if (size > 0) psb_hksz = size
|
|
end subroutine psi_set_hksz
|
|
|
|
subroutine psi_set_hll_vector(val)
|
|
implicit none
|
|
logical, optional :: val
|
|
if (present(val)) then
|
|
psb_hll_use_vector = val
|
|
else
|
|
psb_hll_use_vector = .true.
|
|
end if
|
|
|
|
end subroutine psi_set_hll_vector
|
|
|
|
function psi_get_hll_vector() result(res)
|
|
implicit none
|
|
logical :: res
|
|
|
|
res = psb_hll_use_vector
|
|
end function psi_get_hll_vector
|
|
|
|
|
|
!
|
|
! Compute offsets and allocation for DIAgonal storage.
|
|
! Input:
|
|
! nr,nc,nz,ia,ja: the matrix pattern in COO
|
|
! Note: This routine is designed to be called
|
|
! with either a full matrix or an horizontal stripe,
|
|
! with the COO entries sorted in row major order, hence
|
|
! it will handle the conversion of a strip, so it can
|
|
! be used by both DIA and HDIA. In both cases NR and NC
|
|
! *MUST* be the *GLOBAL* number of rows/columns, not those
|
|
! of the strips, i.e. it must be that all entris in IA <=NR
|
|
! and JA <= NC.
|
|
! Output:
|
|
! nd: number of nonzero diagonals
|
|
! d: d(k) contains the index inside offset of diagonal k,
|
|
! which is, if A(I,J) /= 0 then K=NR+J-I, or (optionally) 0.
|
|
! *MUST* be allocated on the *global* size NR+NC-1
|
|
!
|
|
! offset: for each of the ND nonzero diagonals, its offset J-I
|
|
!
|
|
! Notes: D and OFFSET together represent the set of diagonals;
|
|
! D can be used outside to quickly find which entry of OFFSET
|
|
! a given a(i,j) corresponds to, without doing a search.
|
|
!
|
|
! 1. Optionally init D vector to zeros
|
|
! 2. Walk through the NZ pairs (I,J):
|
|
! a. if it is a new diagonal add to a heap;
|
|
! b. increase its population count stored in D(J-I+NR)
|
|
! c. Keep track of maximum population count.
|
|
! 3. Go through the ND diagonals, getting them K out of the heap in order:
|
|
! a. Set offset(i) to K-NR == J-I
|
|
! b. Set D(K) = i or 0 (depending on cleard)
|
|
!
|
|
! Setting to 0 allows to reuse this function in a loop in a dry run
|
|
! to estimate the allocation size for HDIA; without settng to 0 we
|
|
! would need to zero the whole vector, resulting
|
|
! in a quadratic overall cost. Outside this subroutine, it is possible
|
|
! to zero selectively the entres in D by using the indices in OFFSET.
|
|
!
|
|
!
|
|
subroutine psi_dia_offset_from_coo(nr,nc,nz,ia,ja,nd,d,offset,info,&
|
|
& initd,cleard)
|
|
use psb_base_mod
|
|
|
|
implicit none
|
|
|
|
integer(psb_ipk_), intent(in) :: nr, nc, nz, ia(:), ja(:)
|
|
integer(psb_ipk_), intent(inout) :: d(:)
|
|
integer(psb_ipk_), intent(out) :: offset(:)
|
|
integer(psb_ipk_), intent(out) :: nd
|
|
integer(psb_ipk_), intent(out) :: info
|
|
logical, intent(in), optional :: initd,cleard
|
|
|
|
type(psb_i_heap) :: heap
|
|
integer(psb_ipk_) :: k,i,j,ir,ic, ndiag, id
|
|
logical :: initd_, cleard_
|
|
character(len=20) :: name
|
|
|
|
info = psb_success_
|
|
initd_ = .true.
|
|
if (present(initd)) initd_ = initd
|
|
cleard_ = .false.
|
|
if (present(cleard)) cleard_ = cleard
|
|
|
|
if (initd_) d(:) = 0
|
|
|
|
ndiag = nr+nc-1
|
|
if (size(d)<ndiag) then
|
|
info = -8
|
|
return
|
|
end if
|
|
call heap%init(info)
|
|
if (info /= psb_success_) return
|
|
|
|
do i=1,nz
|
|
k = nr+ja(i)-ia(i)
|
|
if (d(k) == 0) call heap%insert(k,info)
|
|
d(k) = d(k) + 1
|
|
enddo
|
|
nd = heap%howmany()
|
|
if (size(offset)<nd) then
|
|
info = -9
|
|
return
|
|
end if
|
|
if (cleard_) then
|
|
do i=1, nd
|
|
call heap%get_first(id,info)
|
|
offset(i) = id - nr
|
|
d(id) = 0
|
|
end do
|
|
else
|
|
do i=1, nd
|
|
call heap%get_first(id,info)
|
|
offset(i) = id - nr
|
|
d(id) = i
|
|
end do
|
|
end if
|
|
|
|
end subroutine psi_dia_offset_from_coo
|
|
|
|
end module psi_i_ext_util_mod
|