You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
psblas3/ext/impl/psb_s_cp_hdia_from_coo.f90

223 lines
7.7 KiB
Fortran

! Parallel Sparse BLAS GPU plugin
! (C) Copyright 2013
!
! Salvatore Filippone
! Alessandro Fanfarillo
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
subroutine psb_s_cp_hdia_from_coo(a,b,info)
use psb_base_mod
use psb_s_hdia_mat_mod, psb_protect_name => psb_s_cp_hdia_from_coo
implicit none
class(psb_s_hdia_sparse_mat), intent(inout) :: a
class(psb_s_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
!locals
type(psb_s_coo_sparse_mat) :: tmp
info = psb_success_
if (b%is_dev()) call b%sync()
if (b%is_by_rows()) then
call inner_cp_hdia_from_coo(a,b,info)
if (info /= psb_success_) goto 9999
else
call b%cp_to_coo(tmp,info)
if (info /= psb_success_) goto 9999
if (.not.tmp%is_by_rows()) call tmp%fix(info)
if (info /= psb_success_) goto 9999
call inner_cp_hdia_from_coo(a,tmp,info)
if (info /= psb_success_) goto 9999
call tmp%free()
end if
call a%set_host()
return
9999 continue
info = psb_err_alloc_dealloc_
return
contains
subroutine inner_cp_hdia_from_coo(a,tmp,info)
use psb_base_mod
use psi_ext_util_mod
implicit none
class(psb_s_hdia_sparse_mat), intent(inout) :: a
class(psb_s_coo_sparse_mat), intent(in) :: tmp
integer(psb_ipk_), intent(out) :: info
!locals
integer(psb_ipk_) :: ndiag,mi,mj,dm,bi,w
integer(psb_ipk_),allocatable :: d(:), offset(:), irsz(:)
integer(psb_ipk_) :: k,i,j,nc,nr,nza, nzd,nd,hacksize,nhacks,iszd,&
& ib, ir, kfirst, klast1, hackfirst, hacknext, nzout
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name
logical, parameter :: debug=.false.
nr = tmp%get_nrows()
nc = tmp%get_ncols()
nza = tmp%get_nzeros()
! If it is sorted then we can lessen memory impact
a%psb_s_base_sparse_mat = tmp%psb_s_base_sparse_mat
hacksize = a%hacksize
a%nhacks = (nr+hacksize-1)/hacksize
nhacks = a%nhacks
ndiag = nr+nc-1
if (info == psb_success_) call psb_realloc(nr,irsz,info)
if (info == psb_success_) call psb_realloc(ndiag,d,info)
if (info == psb_success_) call psb_realloc(ndiag,offset,info)
if (info == psb_success_) call psb_realloc(nhacks+1,a%hackoffsets,info)
if (info /= psb_success_) return
irsz = 0
do k=1,nza
ir = tmp%ia(k)
irsz(ir) = irsz(ir)+1
end do
a%nzeros = 0
d = 0
iszd = 0
a%hackOffsets(1)=0
klast1 = 1
do k=1, nhacks
i = (k-1)*hacksize + 1
ib = min(hacksize,nr-i+1)
kfirst = klast1
klast1 = kfirst + sum(irsz(i:i+ib-1))
! klast1 points to last element of chunk plus 1
if (debug) then
write(*,*) 'Loop iteration ',k,nhacks,i,ib,nr
write(*,*) 'RW:',tmp%ia(kfirst),tmp%ia(klast1-1)
write(*,*) 'CL:',tmp%ja(kfirst),tmp%ja(klast1-1)
end if
call psi_dia_offset_from_coo(nr,nc,(klast1-kfirst),&
& tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),&
& nd, d, offset, info, initd=.false., cleard=.true.)
iszd = iszd + nd
a%hackOffsets(k+1)=iszd
if (debug) write(*,*) 'From chunk ',k,i,ib,sum(irsz(i:i+ib-1)),': ',nd, iszd
if (debug) write(*,*) 'offset ', offset(1:nd)
end do
if (debug) then
write(*,*) 'Hackcount ',nhacks,' Allocation height ',iszd
write(*,*) 'Hackoffsets ',a%hackOffsets(:)
end if
if (info == psb_success_) call psb_realloc(hacksize*iszd,a%diaOffsets,info)
if (info == psb_success_) call psb_realloc(hacksize*iszd,a%val,info)
if (info /= psb_success_) return
klast1 = 1
!
! Second run: copy elements
!
do k=1, nhacks
i = (k-1)*hacksize + 1
ib = min(hacksize,nr-i+1)
kfirst = klast1
klast1 = kfirst + sum(irsz(i:i+ib-1))
! klast1 points to last element of chunk plus 1
hackfirst = a%hackoffsets(k)
hacknext = a%hackoffsets(k+1)
call psi_dia_offset_from_coo(nr,nc,(klast1-kfirst),&
& tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),&
& nd, d, a%diaOffsets(hackfirst+1:hacknext), info, &
& initd=.false., cleard=.false.)
if (debug) write(*,*) 'Out from dia_offset: ', a%diaOffsets(hackfirst+1:hacknext)
call psi_s_xtr_dia_from_coo(nr,nc,(klast1-kfirst),&
& tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),&
& tmp%val(kfirst:klast1-1), &
& d,hacksize,(hacknext-hackfirst),&
& a%val((hacksize*hackfirst)+1:hacksize*hacknext),info,&
& initdata=.true.,rdisp=(i-1))
call countnz(nr,nc,(i-1),hacksize,(hacknext-hackfirst),&
& a%diaOffsets(hackfirst+1:hacknext),nzout)
a%nzeros = a%nzeros + nzout
call cleand(nr,(hacknext-hackfirst),d,a%diaOffsets(hackfirst+1:hacknext))
end do
if (debug) then
write(*,*) 'NZEROS: ',a%nzeros, nza
write(*,*) 'diaoffsets: ',a%diaOffsets(1:iszd)
write(*,*) 'values: '
j=0
do k=1,nhacks
write(*,*) 'Hack No. ',k
do i=1,hacksize*(iszd/nhacks)
j = j + 1
write(*,*) j, a%val(j)
end do
end do
end if
end subroutine inner_cp_hdia_from_coo
subroutine countnz(nr,nc,rdisp,nrd,ncd,offsets,nz)
implicit none
integer(psb_ipk_), intent(in) :: nr,nc,nrd,ncd,rdisp,offsets(:)
integer(psb_ipk_), intent(out) :: nz
!
integer(psb_ipk_) :: i,j,k, ir, jc, m4, ir1, ir2, nrcmdisp, rdisp1
nz = 0
nrcmdisp = min(nr-rdisp,nc-rdisp)
rdisp1 = 1-rdisp
do j=1, ncd
if (offsets(j)>=0) then
ir1 = 1
! ir2 = min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_)
ir2 = min(nrd, nrcmdisp - offsets(j))
else
! ir1 = max(1,1-offsets(j)-rdisp_)
ir1 = max(1, rdisp1 - offsets(j))
ir2 = min(nrd, nrcmdisp)
end if
nz = nz + (ir2-ir1+1)
end do
end subroutine countnz
subroutine cleand(nr,nd,d,offset)
implicit none
integer(psb_ipk_), intent(in) :: nr,nd,offset(:)
integer(psb_ipk_), intent(inout) :: d(:)
integer(psb_ipk_) :: i,id
do i=1,nd
id = offset(i) + nr
d(id) = 0
end do
end subroutine cleand
end subroutine psb_s_cp_hdia_from_coo