Merge branch 'dev-openmp' into development

master
sfilippone 1 year ago
commit 815ecd5cc1

@ -104,6 +104,7 @@ subroutine psi_bld_tmphalo(desc,info)
call desc%indxmap%l2gip(helem(1:nh),info)
if (info == psb_success_) call desc%indxmap%fnd_owner(helem(1:nh),hproc,info)
!write(0,*) 'bld_tmphalo calling set_owner',hproc(:)
if (info == psb_success_) call desc%indxmap%set_halo_owner(hproc,info)
if (info == psb_success_) call desc%indxmap%xtnd_p_adjcncy(hproc)

@ -44,7 +44,6 @@ module psb_c_qsort_mod
use psb_const_mod
interface psb_qsort
subroutine psb_cqsort(x,ix,dir,flag)
import

@ -768,7 +768,7 @@ Contains
integer(psb_ipk_) :: info
! ...Local Variables
integer(psb_ipk_) :: isz,err_act,lb
integer(psb_ipk_) :: isz,err_act,lb, i
character(len=30) :: name, char_err
logical, parameter :: debug=.false.
@ -790,7 +790,11 @@ Contains
call psb_errpush(info,name,a_err=char_err)
goto 9999
else
vout(:) = vin(:)
!$omp parallel do private(i)
do i=lb,lb+isz-1
vout(i) = vin(i)
end do
!$omp end parallel do
endif
endif
@ -836,7 +840,9 @@ Contains
call psb_errpush(info,name,a_err=char_err)
goto 9999
else
!$omp workshare
vout(:,:) = vin(:,:)
!$omp end workshare
endif
endif
@ -991,24 +997,6 @@ Contains
goto 9999
end if
!!$ If (len > psb_size(v)) Then
!!$ if (present(newsz)) then
!!$ isz = (max(len+1,newsz))
!!$ else
!!$ if (present(addsz)) then
!!$ isz = len+max(1,addsz)
!!$ else
!!$ isz = max(len+10, int(1.25*len))
!!$ endif
!!$ endif
!!$
!!$ call psb_realloc(isz,v,info,pad=pad)
!!$ if (info /= psb_success_) then
!!$ info=psb_err_from_subroutine_
!!$ call psb_errpush(info,name,a_err='psb_realloc')
!!$ goto 9999
!!$ End If
!!$ end If
isz = psb_size(v)
If (len > isz) Then
#if defined(OPENMP)

@ -43,14 +43,13 @@
module psb_d_qsort_mod
use psb_const_mod
interface psb_bsrch
function psb_dbsrch(key,n,v) result(ipos)
function psb_dbsrch(key,n,v,dir,find) result(ipos)
import
integer(psb_ipk_) :: ipos, n
real(psb_dpk_) :: key
real(psb_dpk_) :: v(:)
integer(psb_ipk_), optional :: dir, find
end function psb_dbsrch
end interface psb_bsrch

@ -768,7 +768,7 @@ Contains
integer(psb_ipk_) :: info
! ...Local Variables
integer(psb_ipk_) :: isz,err_act,lb
integer(psb_ipk_) :: isz,err_act,lb, i
character(len=30) :: name, char_err
logical, parameter :: debug=.false.
@ -790,7 +790,11 @@ Contains
call psb_errpush(info,name,a_err=char_err)
goto 9999
else
vout(:) = vin(:)
!$omp parallel do private(i)
do i=lb,lb+isz-1
vout(i) = vin(i)
end do
!$omp end parallel do
endif
endif
@ -836,7 +840,9 @@ Contains
call psb_errpush(info,name,a_err=char_err)
goto 9999
else
!$omp workshare
vout(:,:) = vin(:,:)
!$omp end workshare
endif
endif
@ -991,24 +997,6 @@ Contains
goto 9999
end if
!!$ If (len > psb_size(v)) Then
!!$ if (present(newsz)) then
!!$ isz = (max(len+1,newsz))
!!$ else
!!$ if (present(addsz)) then
!!$ isz = len+max(1,addsz)
!!$ else
!!$ isz = max(len+10, int(1.25*len))
!!$ endif
!!$ endif
!!$
!!$ call psb_realloc(isz,v,info,pad=pad)
!!$ if (info /= psb_success_) then
!!$ info=psb_err_from_subroutine_
!!$ call psb_errpush(info,name,a_err='psb_realloc')
!!$ goto 9999
!!$ End If
!!$ end If
isz = psb_size(v)
If (len > isz) Then
#if defined(OPENMP)

@ -43,14 +43,13 @@
module psb_e_qsort_mod
use psb_const_mod
interface psb_bsrch
function psb_ebsrch(key,n,v) result(ipos)
function psb_ebsrch(key,n,v,dir,find) result(ipos)
import
integer(psb_ipk_) :: ipos, n
integer(psb_epk_) :: key
integer(psb_epk_) :: v(:)
integer(psb_ipk_), optional :: dir, find
end function psb_ebsrch
end interface psb_bsrch

@ -768,7 +768,7 @@ Contains
integer(psb_ipk_) :: info
! ...Local Variables
integer(psb_ipk_) :: isz,err_act,lb
integer(psb_ipk_) :: isz,err_act,lb, i
character(len=30) :: name, char_err
logical, parameter :: debug=.false.
@ -790,7 +790,11 @@ Contains
call psb_errpush(info,name,a_err=char_err)
goto 9999
else
vout(:) = vin(:)
!$omp parallel do private(i)
do i=lb,lb+isz-1
vout(i) = vin(i)
end do
!$omp end parallel do
endif
endif
@ -836,7 +840,9 @@ Contains
call psb_errpush(info,name,a_err=char_err)
goto 9999
else
!$omp workshare
vout(:,:) = vin(:,:)
!$omp end workshare
endif
endif
@ -991,24 +997,6 @@ Contains
goto 9999
end if
!!$ If (len > psb_size(v)) Then
!!$ if (present(newsz)) then
!!$ isz = (max(len+1,newsz))
!!$ else
!!$ if (present(addsz)) then
!!$ isz = len+max(1,addsz)
!!$ else
!!$ isz = max(len+10, int(1.25*len))
!!$ endif
!!$ endif
!!$
!!$ call psb_realloc(isz,v,info,pad=pad)
!!$ if (info /= psb_success_) then
!!$ info=psb_err_from_subroutine_
!!$ call psb_errpush(info,name,a_err='psb_realloc')
!!$ goto 9999
!!$ End If
!!$ end If
isz = psb_size(v)
If (len > isz) Then
#if defined(OPENMP)

@ -43,14 +43,13 @@
module psb_i2_qsort_mod
use psb_const_mod
interface psb_bsrch
function psb_i2bsrch(key,n,v) result(ipos)
function psb_i2bsrch(key,n,v,dir,find) result(ipos)
import
integer(psb_ipk_) :: ipos, n
integer(psb_i2pk_) :: key
integer(psb_i2pk_) :: v(:)
integer(psb_ipk_), optional :: dir, find
end function psb_i2bsrch
end interface psb_bsrch

@ -768,7 +768,7 @@ Contains
integer(psb_ipk_) :: info
! ...Local Variables
integer(psb_ipk_) :: isz,err_act,lb
integer(psb_ipk_) :: isz,err_act,lb, i
character(len=30) :: name, char_err
logical, parameter :: debug=.false.
@ -790,7 +790,11 @@ Contains
call psb_errpush(info,name,a_err=char_err)
goto 9999
else
vout(:) = vin(:)
!$omp parallel do private(i)
do i=lb,lb+isz-1
vout(i) = vin(i)
end do
!$omp end parallel do
endif
endif
@ -836,7 +840,9 @@ Contains
call psb_errpush(info,name,a_err=char_err)
goto 9999
else
!$omp workshare
vout(:,:) = vin(:,:)
!$omp end workshare
endif
endif
@ -991,24 +997,6 @@ Contains
goto 9999
end if
!!$ If (len > psb_size(v)) Then
!!$ if (present(newsz)) then
!!$ isz = (max(len+1,newsz))
!!$ else
!!$ if (present(addsz)) then
!!$ isz = len+max(1,addsz)
!!$ else
!!$ isz = max(len+10, int(1.25*len))
!!$ endif
!!$ endif
!!$
!!$ call psb_realloc(isz,v,info,pad=pad)
!!$ if (info /= psb_success_) then
!!$ info=psb_err_from_subroutine_
!!$ call psb_errpush(info,name,a_err='psb_realloc')
!!$ goto 9999
!!$ End If
!!$ end If
isz = psb_size(v)
If (len > isz) Then
#if defined(OPENMP)

@ -43,14 +43,13 @@
module psb_m_qsort_mod
use psb_const_mod
interface psb_bsrch
function psb_mbsrch(key,n,v) result(ipos)
function psb_mbsrch(key,n,v,dir,find) result(ipos)
import
integer(psb_ipk_) :: ipos, n
integer(psb_mpk_) :: key
integer(psb_mpk_) :: v(:)
integer(psb_ipk_), optional :: dir, find
end function psb_mbsrch
end interface psb_bsrch

@ -768,7 +768,7 @@ Contains
integer(psb_ipk_) :: info
! ...Local Variables
integer(psb_ipk_) :: isz,err_act,lb
integer(psb_ipk_) :: isz,err_act,lb, i
character(len=30) :: name, char_err
logical, parameter :: debug=.false.
@ -790,7 +790,11 @@ Contains
call psb_errpush(info,name,a_err=char_err)
goto 9999
else
vout(:) = vin(:)
!$omp parallel do private(i)
do i=lb,lb+isz-1
vout(i) = vin(i)
end do
!$omp end parallel do
endif
endif
@ -836,7 +840,9 @@ Contains
call psb_errpush(info,name,a_err=char_err)
goto 9999
else
!$omp workshare
vout(:,:) = vin(:,:)
!$omp end workshare
endif
endif
@ -991,24 +997,6 @@ Contains
goto 9999
end if
!!$ If (len > psb_size(v)) Then
!!$ if (present(newsz)) then
!!$ isz = (max(len+1,newsz))
!!$ else
!!$ if (present(addsz)) then
!!$ isz = len+max(1,addsz)
!!$ else
!!$ isz = max(len+10, int(1.25*len))
!!$ endif
!!$ endif
!!$
!!$ call psb_realloc(isz,v,info,pad=pad)
!!$ if (info /= psb_success_) then
!!$ info=psb_err_from_subroutine_
!!$ call psb_errpush(info,name,a_err='psb_realloc')
!!$ goto 9999
!!$ End If
!!$ end If
isz = psb_size(v)
If (len > isz) Then
#if defined(OPENMP)

@ -43,14 +43,13 @@
module psb_s_qsort_mod
use psb_const_mod
interface psb_bsrch
function psb_sbsrch(key,n,v) result(ipos)
function psb_sbsrch(key,n,v,dir,find) result(ipos)
import
integer(psb_ipk_) :: ipos, n
real(psb_spk_) :: key
real(psb_spk_) :: v(:)
integer(psb_ipk_), optional :: dir, find
end function psb_sbsrch
end interface psb_bsrch

@ -768,7 +768,7 @@ Contains
integer(psb_ipk_) :: info
! ...Local Variables
integer(psb_ipk_) :: isz,err_act,lb
integer(psb_ipk_) :: isz,err_act,lb, i
character(len=30) :: name, char_err
logical, parameter :: debug=.false.
@ -790,7 +790,11 @@ Contains
call psb_errpush(info,name,a_err=char_err)
goto 9999
else
vout(:) = vin(:)
!$omp parallel do private(i)
do i=lb,lb+isz-1
vout(i) = vin(i)
end do
!$omp end parallel do
endif
endif
@ -836,7 +840,9 @@ Contains
call psb_errpush(info,name,a_err=char_err)
goto 9999
else
!$omp workshare
vout(:,:) = vin(:,:)
!$omp end workshare
endif
endif
@ -991,24 +997,6 @@ Contains
goto 9999
end if
!!$ If (len > psb_size(v)) Then
!!$ if (present(newsz)) then
!!$ isz = (max(len+1,newsz))
!!$ else
!!$ if (present(addsz)) then
!!$ isz = len+max(1,addsz)
!!$ else
!!$ isz = max(len+10, int(1.25*len))
!!$ endif
!!$ endif
!!$
!!$ call psb_realloc(isz,v,info,pad=pad)
!!$ if (info /= psb_success_) then
!!$ info=psb_err_from_subroutine_
!!$ call psb_errpush(info,name,a_err='psb_realloc')
!!$ goto 9999
!!$ End If
!!$ end If
isz = psb_size(v)
If (len > isz) Then
#if defined(OPENMP)

@ -44,7 +44,6 @@ module psb_z_qsort_mod
use psb_const_mod
interface psb_qsort
subroutine psb_zqsort(x,ix,dir,flag)
import

@ -768,7 +768,7 @@ Contains
integer(psb_ipk_) :: info
! ...Local Variables
integer(psb_ipk_) :: isz,err_act,lb
integer(psb_ipk_) :: isz,err_act,lb, i
character(len=30) :: name, char_err
logical, parameter :: debug=.false.
@ -790,7 +790,11 @@ Contains
call psb_errpush(info,name,a_err=char_err)
goto 9999
else
vout(:) = vin(:)
!$omp parallel do private(i)
do i=lb,lb+isz-1
vout(i) = vin(i)
end do
!$omp end parallel do
endif
endif
@ -836,7 +840,9 @@ Contains
call psb_errpush(info,name,a_err=char_err)
goto 9999
else
!$omp workshare
vout(:,:) = vin(:,:)
!$omp end workshare
endif
endif
@ -991,24 +997,6 @@ Contains
goto 9999
end if
!!$ If (len > psb_size(v)) Then
!!$ if (present(newsz)) then
!!$ isz = (max(len+1,newsz))
!!$ else
!!$ if (present(addsz)) then
!!$ isz = len+max(1,addsz)
!!$ else
!!$ isz = max(len+10, int(1.25*len))
!!$ endif
!!$ endif
!!$
!!$ call psb_realloc(isz,v,info,pad=pad)
!!$ if (info /= psb_success_) then
!!$ info=psb_err_from_subroutine_
!!$ call psb_errpush(info,name,a_err='psb_realloc')
!!$ goto 9999
!!$ End If
!!$ end If
isz = psb_size(v)
If (len > isz) Then
#if defined(OPENMP)

@ -156,4 +156,15 @@ module psi_c_serial_mod
end subroutine psi_csctv
end interface psi_sct
interface psi_exscan
subroutine psi_c_exscanv(n,x,info,shift)
import :: psb_ipk_, psb_spk_
implicit none
integer(psb_ipk_), intent(in) :: n
complex(psb_spk_), intent (inout) :: x(:)
integer(psb_ipk_), intent(out) :: info
complex(psb_spk_), intent(in), optional :: shift
end subroutine psi_c_exscanv
end interface psi_exscan
end module psi_c_serial_mod

@ -156,4 +156,15 @@ module psi_d_serial_mod
end subroutine psi_dsctv
end interface psi_sct
interface psi_exscan
subroutine psi_d_exscanv(n,x,info,shift)
import :: psb_ipk_, psb_dpk_
implicit none
integer(psb_ipk_), intent(in) :: n
real(psb_dpk_), intent (inout) :: x(:)
integer(psb_ipk_), intent(out) :: info
real(psb_dpk_), intent(in), optional :: shift
end subroutine psi_d_exscanv
end interface psi_exscan
end module psi_d_serial_mod

@ -156,4 +156,15 @@ module psi_e_serial_mod
end subroutine psi_esctv
end interface psi_sct
interface psi_exscan
subroutine psi_e_exscanv(n,x,info,shift)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: n
integer(psb_epk_), intent (inout) :: x(:)
integer(psb_ipk_), intent(out) :: info
integer(psb_epk_), intent(in), optional :: shift
end subroutine psi_e_exscanv
end interface psi_exscan
end module psi_e_serial_mod

@ -156,4 +156,15 @@ module psi_i2_serial_mod
end subroutine psi_i2sctv
end interface psi_sct
interface psi_exscan
subroutine psi_i2_exscanv(n,x,info,shift)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: n
integer(psb_i2pk_), intent (inout) :: x(:)
integer(psb_ipk_), intent(out) :: info
integer(psb_i2pk_), intent(in), optional :: shift
end subroutine psi_i2_exscanv
end interface psi_exscan
end module psi_i2_serial_mod

@ -156,4 +156,15 @@ module psi_m_serial_mod
end subroutine psi_msctv
end interface psi_sct
interface psi_exscan
subroutine psi_m_exscanv(n,x,info,shift)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: n
integer(psb_mpk_), intent (inout) :: x(:)
integer(psb_ipk_), intent(out) :: info
integer(psb_mpk_), intent(in), optional :: shift
end subroutine psi_m_exscanv
end interface psi_exscan
end module psi_m_serial_mod

@ -156,4 +156,15 @@ module psi_s_serial_mod
end subroutine psi_ssctv
end interface psi_sct
interface psi_exscan
subroutine psi_s_exscanv(n,x,info,shift)
import :: psb_ipk_, psb_spk_
implicit none
integer(psb_ipk_), intent(in) :: n
real(psb_spk_), intent (inout) :: x(:)
integer(psb_ipk_), intent(out) :: info
real(psb_spk_), intent(in), optional :: shift
end subroutine psi_s_exscanv
end interface psi_exscan
end module psi_s_serial_mod

@ -156,4 +156,15 @@ module psi_z_serial_mod
end subroutine psi_zsctv
end interface psi_sct
interface psi_exscan
subroutine psi_z_exscanv(n,x,info,shift)
import :: psb_ipk_, psb_dpk_
implicit none
integer(psb_ipk_), intent(in) :: n
complex(psb_dpk_), intent (inout) :: x(:)
integer(psb_ipk_), intent(out) :: info
complex(psb_dpk_), intent(in), optional :: shift
end subroutine psi_z_exscanv
end interface psi_exscan
end module psi_z_serial_mod

@ -488,6 +488,7 @@ contains
integer(psb_ipk_) :: iam, np
logical :: owned_
!write(0,*) 'block_g2lv2'
info = 0
ctxt = idxmap%get_ctxt()
call psb_info(ctxt,iam,np)

@ -388,7 +388,7 @@ contains
info = HashOK
hsize = hash%hsize
hmask = hash%hmask
val = -1
hk = iand(psb_hashval(key),hmask)
if (hk == 0) then
hd = 1
@ -407,35 +407,49 @@ contains
if (hash%table(hk,1) == key) then
val = hash%table(hk,2)
info = HashDuplicate
!write(0,*) 'In searchinskey 1 : ', info, HashDuplicate
return
end if
if (hash%table(hk,1) == HashFreeEntry) then
if (hash%nk == hash%hsize -1) then
!
! Note: because of the way we allocate things at CDALL
! time this is really unlikely; if we get here, we
! have at least as many halo indices as internals, which
! means we're already in trouble. But we try to keep going.
!
call psb_hash_realloc(hash,info)
if (info /= HashOk) then
info = HashOutOfMemory
return
!$omp critical(hashsearchins)
if (hash%table(hk,1) == key) then
val = hash%table(hk,2)
info = HashDuplicate
else
if (hash%table(hk,1) == HashFreeEntry) then
if (hash%nk == hash%hsize -1) then
!
! Note: because of the way we allocate things at CDALL
! time this is really unlikely; if we get here, we
! have at least as many halo indices as internals, which
! means we're already in trouble. But we try to keep going.
!
call psb_hash_realloc(hash,info)
if (info /= HashOk) then
info = HashOutOfMemory
!return
else
call psb_hash_searchinskey(key,val,nextval,hash,info)
!return
end if
else
call psb_hash_searchinskey(key,val,nextval,hash,info)
return
hash%nk = hash%nk + 1
hash%table(hk,1) = key
hash%table(hk,2) = nextval
val = nextval
!return
end if
else
hash%nk = hash%nk + 1
hash%table(hk,1) = key
hash%table(hk,2) = nextval
val = nextval
return
end if
end if
!$omp end critical(hashsearchins)
if (info /= HashOk) then
write(0,*) 'In searchinskey 2: ', info
return
end if
if (val > 0) return
hk = hk - hd
if (hk < 0) hk = hk + hsize
end do
!write(0,*) 'In searchinskey 3: ', info
end subroutine psb_hash_lsearchinskey
recursive subroutine psb_hash_isearchinskey(key,val,nextval,hash,info)
@ -448,7 +462,7 @@ contains
info = HashOK
hsize = hash%hsize
hmask = hash%hmask
hk = iand(psb_hashval(key),hmask)
if (hk == 0) then
hd = 1
@ -460,7 +474,7 @@ contains
info = HashOutOfMemory
return
end if
val = -1
hash%nsrch = hash%nsrch + 1
do
hash%nacc = hash%nacc + 1
@ -469,6 +483,7 @@ contains
info = HashDuplicate
return
end if
!$OMP CRITICAL
if (hash%table(hk,1) == HashFreeEntry) then
if (hash%nk == hash%hsize -1) then
!
@ -480,19 +495,22 @@ contains
call psb_hash_realloc(hash,info)
if (info /= HashOk) then
info = HashOutOfMemory
return
!return
else
call psb_hash_searchinskey(key,val,nextval,hash,info)
return
!return
end if
else
hash%nk = hash%nk + 1
hash%table(hk,1) = key
hash%table(hk,2) = nextval
val = nextval
return
!return
end if
end if
!$OMP END CRITICAL
if (info /= HashOk) return
if (val > 0) return
hk = hk - hd
if (hk < 0) hk = hk + hsize
end do

@ -349,7 +349,6 @@ contains
logical :: owned_
info = 0
if (present(mask)) then
if (size(mask) < size(idxin)) then
info = -1
@ -501,19 +500,37 @@ contains
if (present(lidx)) then
if (present(mask)) then
do i=1, is
if (info /= 0) cycle
if (mask(i)) then
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
ix = idxmap%glob_to_loc(idx(i))
if (ix < 0) then
#if defined(OPENMP)
!$OMP CRITICAL(LISTINS)
ix = idxmap%glob_to_loc(idx(i))
if (ix < 0) then
ix = lidx(i)
call psb_ensure_size(ix,idxmap%loc_to_glob,info,addsz=laddsz)
if ((ix <= idxmap%local_rows).or.(info /= 0)) then
info = -4
else
idxmap%local_cols = max(ix,idxmap%local_cols)
idxmap%loc_to_glob(ix) = idx(i)
idxmap%glob_to_loc(idx(i)) = ix
end if
end if
!$OMP END CRITICAL(LISTINS)
#else
ix = lidx(i)
call psb_ensure_size(ix,idxmap%loc_to_glob,info,addsz=laddsz)
if ((ix <= idxmap%local_rows).or.(info /= 0)) then
if ((ix <= idxmap%local_rows).or.(info /= 0)) then
info = -4
return
else
idxmap%local_cols = max(ix,idxmap%local_cols)
idxmap%loc_to_glob(ix) = idx(i)
idxmap%glob_to_loc(idx(i)) = ix
end if
idxmap%local_cols = max(ix,idxmap%local_cols)
idxmap%loc_to_glob(ix) = idx(i)
idxmap%glob_to_loc(idx(i)) = ix
#endif
end if
idx(i) = ix
else
@ -525,18 +542,37 @@ contains
else if (.not.present(mask)) then
do i=1, is
if (info /= 0) cycle
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
ix = idxmap%glob_to_loc(idx(i))
if (ix < 0) then
#if defined(OPENMP)
!$OMP CRITICAL(LISTINS)
ix = idxmap%glob_to_loc(idx(i))
if (ix < 0) then
ix = lidx(i)
call psb_ensure_size(ix,idxmap%loc_to_glob,info,addsz=laddsz)
if ((ix <= idxmap%local_rows).or.(info /= 0)) then
info = -4
else
idxmap%local_cols = max(ix,idxmap%local_cols)
idxmap%loc_to_glob(ix) = idx(i)
idxmap%glob_to_loc(idx(i)) = ix
end if
end if
!$OMP END CRITICAL(LISTINS)
#else
ix = lidx(i)
call psb_ensure_size(ix,idxmap%loc_to_glob,info,addsz=laddsz)
if ((ix <= idxmap%local_rows).or.(info /= 0)) then
if ((ix <= idxmap%local_rows).or.(info /= 0)) then
info = -4
return
else
idxmap%local_cols = max(ix,idxmap%local_cols)
idxmap%loc_to_glob(ix) = idx(i)
idxmap%glob_to_loc(idx(i)) = ix
end if
idxmap%local_cols = max(ix,idxmap%local_cols)
idxmap%loc_to_glob(ix) = idx(i)
idxmap%glob_to_loc(idx(i)) = ix
#endif
end if
idx(i) = ix
else
@ -549,19 +585,37 @@ contains
if (present(mask)) then
do i=1, is
if (info /= 0) cycle
if (mask(i)) then
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
ix = idxmap%glob_to_loc(idx(i))
if (ix < 0) then
#if defined(OPENMP)
!$OMP CRITICAL(LISTINS)
ix = idxmap%glob_to_loc(idx(i))
if (ix < 0) then
ix = idxmap%local_cols + 1
call psb_ensure_size(ix,idxmap%loc_to_glob,info,addsz=laddsz)
if (info /= 0) then
info = -4
else
idxmap%local_cols = ix
idxmap%loc_to_glob(ix) = idx(i)
idxmap%glob_to_loc(idx(i)) = ix
end if
end if
!$OMP END CRITICAL(LISTINS)
#else
ix = idxmap%local_cols + 1
call psb_ensure_size(ix,idxmap%loc_to_glob,info,addsz=laddsz)
if (info /= 0) then
if (info /= 0) then
info = -4
return
else
idxmap%local_cols = ix
idxmap%loc_to_glob(ix) = idx(i)
idxmap%glob_to_loc(idx(i)) = ix
end if
idxmap%local_cols = ix
idxmap%loc_to_glob(ix) = idx(i)
idxmap%glob_to_loc(idx(i)) = ix
#endif
end if
idx(i) = ix
else
@ -573,18 +627,36 @@ contains
else if (.not.present(mask)) then
do i=1, is
if (info /= 0) cycle
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
ix = idxmap%glob_to_loc(idx(i))
if (ix < 0) then
#if defined(OPENMP)
!$OMP CRITICAL(LISTINS)
ix = idxmap%glob_to_loc(idx(i))
if (ix < 0) then
ix = idxmap%local_cols + 1
call psb_ensure_size(ix,idxmap%loc_to_glob,info,addsz=laddsz)
if (info /= 0) then
info = -4
else
idxmap%local_cols = ix
idxmap%loc_to_glob(ix) = idx(i)
idxmap%glob_to_loc(idx(i)) = ix
end if
end if
!$OMP END CRITICAL(LISTINS)
#else
ix = idxmap%local_cols + 1
call psb_ensure_size(ix,idxmap%loc_to_glob,info,addsz=laddsz)
if (info /= 0) then
info = -4
return
else
idxmap%local_cols = ix
idxmap%loc_to_glob(ix) = idx(i)
idxmap%glob_to_loc(idx(i)) = ix
end if
idxmap%local_cols = ix
idxmap%loc_to_glob(ix) = idx(i)
idxmap%glob_to_loc(idx(i)) = ix
#endif
end if
idx(i) = ix
else
@ -641,19 +713,37 @@ contains
if (present(lidx)) then
if (present(mask)) then
do i=1, is
if (info /= 0) cycle
if (mask(i)) then
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
ix = idxmap%glob_to_loc(idxin(i))
if (ix < 0) then
if (ix < 0) then
#if defined(OPENMP)
!$OMP CRITICAL(LISTINS)
ix = idxmap%glob_to_loc(idxin(i))
if (ix < 0) then
ix = lidx(i)
call psb_ensure_size(ix,idxmap%loc_to_glob,info,addsz=laddsz)
if (info /= 0) then
info = -4
else
idxmap%local_cols = max(ix,idxmap%local_cols)
idxmap%loc_to_glob(ix) = idxin(i)
idxmap%glob_to_loc(idxin(i)) = ix
end if
end if
!$OMP END CRITICAL(LISTINS)
#else
ix = lidx(i)
call psb_ensure_size(ix,idxmap%loc_to_glob,info,addsz=laddsz)
if ((ix <= idxmap%local_rows).or.(info /= 0)) then
info = -4
return
else
idxmap%local_cols = max(ix,idxmap%local_cols)
idxmap%loc_to_glob(ix) = idxin(i)
idxmap%glob_to_loc(idxin(i)) = ix
end if
idxmap%local_cols = max(ix,idxmap%local_cols)
idxmap%loc_to_glob(ix) = idxin(i)
idxmap%glob_to_loc(idxin(i)) = ix
#endif
end if
idxout(i) = ix
else
@ -665,18 +755,36 @@ contains
else if (.not.present(mask)) then
do i=1, is
if (info /= 0) cycle
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
ix = idxmap%glob_to_loc(idxin(i))
if (ix < 0) then
#if defined(OPENMP)
!$OMP CRITICAL(LISTINS)
ix = idxmap%glob_to_loc(idxin(i))
if (ix < 0) then
ix = lidx(i)
call psb_ensure_size(ix,idxmap%loc_to_glob,info,addsz=laddsz)
if (info /= 0) then
info = -4
else
idxmap%local_cols = max(ix,idxmap%local_cols)
idxmap%loc_to_glob(ix) = idxin(i)
idxmap%glob_to_loc(idxin(i)) = ix
end if
end if
!$OMP END CRITICAL(LISTINS)
#else
ix = lidx(i)
call psb_ensure_size(ix,idxmap%loc_to_glob,info,addsz=laddsz)
if ((ix <= idxmap%local_rows).or.(info /= 0)) then
info = -4
return
else
idxmap%local_cols = max(ix,idxmap%local_cols)
idxmap%loc_to_glob(ix) = idxin(i)
idxmap%glob_to_loc(idxin(i)) = ix
end if
idxmap%local_cols = max(ix,idxmap%local_cols)
idxmap%loc_to_glob(ix) = idxin(i)
idxmap%glob_to_loc(idxin(i)) = ix
#endif
end if
idxout(i) = ix
else
@ -689,19 +797,37 @@ contains
if (present(mask)) then
do i=1, is
if (info /= 0) cycle
if (mask(i)) then
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
ix = idxmap%glob_to_loc(idxin(i))
if (ix < 0) then
if (ix < 0) then
#if defined(OPENMP)
!$OMP CRITICAL(LISTINS)
ix = idxmap%glob_to_loc(idxin(i))
if (ix < 0) then
ix = idxmap%local_cols + 1
call psb_ensure_size(ix,idxmap%loc_to_glob,info,addsz=laddsz)
if (info /= 0) then
info = -4
else
idxmap%local_cols = ix
idxmap%loc_to_glob(ix) = idxin(i)
idxmap%glob_to_loc(idxin(i)) = ix
end if
end if
!$OMP END CRITICAL(LISTINS)
#else
ix = idxmap%local_cols + 1
call psb_ensure_size(ix,idxmap%loc_to_glob,info,addsz=laddsz)
if (info /= 0) then
info = -4
return
else
idxmap%local_cols = ix
idxmap%loc_to_glob(ix) = idxin(i)
idxmap%glob_to_loc(idxin(i)) = ix
end if
idxmap%local_cols = ix
idxmap%loc_to_glob(ix) = idxin(i)
idxmap%glob_to_loc(idxin(i)) = ix
#endif
end if
idxout(i) = ix
else
@ -713,18 +839,36 @@ contains
else if (.not.present(mask)) then
do i=1, is
if (info /= 0) cycle
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
ix = idxmap%glob_to_loc(idxin(i))
if (ix < 0) then
#if defined(OPENMP)
!$OMP CRITICAL(LISTINS)
ix = idxmap%glob_to_loc(idxin(i))
if (ix < 0) then
ix = idxmap%local_cols + 1
call psb_ensure_size(ix,idxmap%loc_to_glob,info,addsz=laddsz)
if (info /= 0) then
info = -4
else
idxmap%local_cols = ix
idxmap%loc_to_glob(ix) = idxin(i)
idxmap%glob_to_loc(idxin(i)) = ix
end if
end if
!$OMP END CRITICAL(LISTINS)
#else
ix = idxmap%local_cols + 1
call psb_ensure_size(ix,idxmap%loc_to_glob,info,addsz=laddsz)
if (info /= 0) then
info = -4
return
end if
idxmap%local_cols = ix
idxmap%loc_to_glob(ix) = idxin(i)
idxmap%glob_to_loc(idxin(i)) = ix
else
idxmap%local_cols = ix
idxmap%loc_to_glob(ix) = idxin(i)
idxmap%glob_to_loc(idxin(i)) = ix
end if
#endif
end if
idxout(i) = ix
else

@ -185,13 +185,17 @@ module psb_const_mod
! The up/down constant are defined in pairs having
! opposite values. We make use of this fact in the heapsort routine.
!
integer(psb_ipk_), parameter :: psb_sort_up_ = 1, psb_sort_down_ = -1
integer(psb_ipk_), parameter :: psb_lsort_up_ = 2, psb_lsort_down_ = -2
integer(psb_ipk_), parameter :: psb_asort_up_ = 3, psb_asort_down_ = -3
integer(psb_ipk_), parameter :: psb_alsort_up_ = 4, psb_alsort_down_ = -4
integer(psb_ipk_), parameter :: psb_sort_ovw_idx_ = 0, psb_sort_keep_idx_ = 1
integer(psb_ipk_), parameter :: psb_heap_resize = 200
integer(psb_ipk_), parameter :: psb_sort_up_ = 1, psb_sort_down_ = -1
integer(psb_ipk_), parameter :: psb_lsort_up_ = 2, psb_lsort_down_ = -2
integer(psb_ipk_), parameter :: psb_asort_up_ = 3, psb_asort_down_ = -3
integer(psb_ipk_), parameter :: psb_alsort_up_ = 4, psb_alsort_down_ = -4
integer(psb_ipk_), parameter :: psb_sort_ovw_idx_ = 0, psb_sort_keep_idx_ = 1
integer(psb_ipk_), parameter :: psb_heap_resize = 200
integer(psb_ipk_), parameter :: psb_find_any_ = 0
integer(psb_ipk_), parameter :: psb_find_first_ge_ = 1
integer(psb_ipk_), parameter :: psb_find_last_le_ = 2
!
! Sparse matrix constants

@ -168,6 +168,8 @@ module psb_c_base_mat_mod
procedure, pass(a) :: reallocate_nz => psb_c_coo_reallocate_nz
procedure, pass(a) :: allocate_mnnz => psb_c_coo_allocate_mnnz
procedure, pass(a) :: ensure_size => psb_c_coo_ensure_size
procedure, pass(a) :: tril => psb_c_coo_tril
procedure, pass(a) :: triu => psb_c_coo_triu
procedure, pass(a) :: cp_to_coo => psb_c_cp_coo_to_coo
procedure, pass(a) :: cp_from_coo => psb_c_cp_coo_from_coo
procedure, pass(a) :: cp_to_fmt => psb_c_cp_coo_to_fmt
@ -1866,18 +1868,6 @@ module psb_c_base_mat_mod
end subroutine psb_c_fix_coo_inner
end interface
interface
subroutine psb_c_fix_coo_inner_colmajor(nr,nc,nzin,dupl,&
& ia,ja,val,iaux,nzout,info)
import
integer(psb_ipk_), intent(in) :: nr,nc,nzin,dupl
integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:)
complex(psb_spk_), intent(inout) :: val(:)
integer(psb_ipk_), intent(out) :: nzout
integer(psb_ipk_), intent(out) :: info
end subroutine psb_c_fix_coo_inner_colmajor
end interface
interface
subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,&
& ia,ja,val,iaux,nzout,info)
@ -1906,6 +1896,93 @@ module psb_c_base_mat_mod
integer(psb_ipk_), intent(in), optional :: idir
end subroutine psb_c_fix_coo
end interface
!
!> Function tril:
!! \memberof psb_c_coo_sparse_mat
!! \brief Copy the lower triangle, i.e. all entries
!! A(I,J) such that J-I <= DIAG
!! default value is DIAG=0, i.e. lower triangle up to
!! the main diagonal.
!! DIAG=-1 means copy the strictly lower triangle
!! DIAG= 1 means copy the lower triangle plus the first diagonal
!! of the upper triangle.
!! Moreover, apply a clipping by copying entries A(I,J) only if
!! IMIN<=I<=IMAX
!! JMIN<=J<=JMAX
!!
!! \param l the output (sub)matrix
!! \param info return code
!! \param diag [0] the last diagonal (J-I) to be considered.
!! \param imin [1] the minimum row index we are interested in
!! \param imax [a\%get_nrows()] the minimum row index we are interested in
!! \param jmin [1] minimum col index
!! \param jmax [a\%get_ncols()] maximum col index
!! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:))
!! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1]
!! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1]
!! ( iren cannot be specified with rscale/cscale)
!! \param append [false] append to ia,ja
!! \param nzin [none] if append, then first new entry should go in entry nzin+1
!! \param u [none] copy of the complementary triangle
!!
!
interface
subroutine psb_c_coo_tril(a,l,info,diag,imin,imax,&
& jmin,jmax,rscale,cscale,u)
import
class(psb_c_coo_sparse_mat), intent(in) :: a
class(psb_c_coo_sparse_mat), intent(out) :: l
integer(psb_ipk_),intent(out) :: info
integer(psb_ipk_), intent(in), optional :: diag,imin,imax,jmin,jmax
logical, intent(in), optional :: rscale,cscale
class(psb_c_coo_sparse_mat), optional, intent(out) :: u
end subroutine psb_c_coo_tril
end interface
!
!> Function triu:
!! \memberof psb_c_coo_sparse_mat
!! \brief Copy the upper triangle, i.e. all entries
!! A(I,J) such that DIAG <= J-I
!! default value is DIAG=0, i.e. upper triangle from
!! the main diagonal up.
!! DIAG= 1 means copy the strictly upper triangle
!! DIAG=-1 means copy the upper triangle plus the first diagonal
!! of the lower triangle.
!! Moreover, apply a clipping by copying entries A(I,J) only if
!! IMIN<=I<=IMAX
!! JMIN<=J<=JMAX
!! Optionally copies the lower triangle at the same time
!!
!! \param u the output (sub)matrix
!! \param info return code
!! \param diag [0] the last diagonal (J-I) to be considered.
!! \param imin [1] the minimum row index we are interested in
!! \param imax [a\%get_nrows()] the minimum row index we are interested in
!! \param jmin [1] minimum col index
!! \param jmax [a\%get_ncols()] maximum col index
!! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:))
!! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1]
!! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1]
!! ( iren cannot be specified with rscale/cscale)
!! \param append [false] append to ia,ja
!! \param nzin [none] if append, then first new entry should go in entry nzin+1
!! \param l [none] copy of the complementary triangle
!!
!
interface
subroutine psb_c_coo_triu(a,u,info,diag,imin,imax,&
& jmin,jmax,rscale,cscale,l)
import
class(psb_c_coo_sparse_mat), intent(in) :: a
class(psb_c_coo_sparse_mat), intent(out) :: u
integer(psb_ipk_),intent(out) :: info
integer(psb_ipk_), intent(in), optional :: diag,imin,imax,jmin,jmax
logical, intent(in), optional :: rscale,cscale
class(psb_c_coo_sparse_mat), optional, intent(out) :: l
end subroutine psb_c_coo_triu
end interface
!>
!! \memberof psb_c_coo_sparse_mat

@ -481,7 +481,11 @@ contains
implicit none
class(psb_c_base_vect_type), intent(inout) :: x
if (allocated(x%v)) x%v=czero
if (allocated(x%v)) then
!$omp workshare
x%v(:)=czero
!$omp end workshare
end if
call x%set_host()
end subroutine c_base_zero

@ -168,6 +168,8 @@ module psb_d_base_mat_mod
procedure, pass(a) :: reallocate_nz => psb_d_coo_reallocate_nz
procedure, pass(a) :: allocate_mnnz => psb_d_coo_allocate_mnnz
procedure, pass(a) :: ensure_size => psb_d_coo_ensure_size
procedure, pass(a) :: tril => psb_d_coo_tril
procedure, pass(a) :: triu => psb_d_coo_triu
procedure, pass(a) :: cp_to_coo => psb_d_cp_coo_to_coo
procedure, pass(a) :: cp_from_coo => psb_d_cp_coo_from_coo
procedure, pass(a) :: cp_to_fmt => psb_d_cp_coo_to_fmt
@ -1866,18 +1868,6 @@ module psb_d_base_mat_mod
end subroutine psb_d_fix_coo_inner
end interface
interface
subroutine psb_d_fix_coo_inner_colmajor(nr,nc,nzin,dupl,&
& ia,ja,val,iaux,nzout,info)
import
integer(psb_ipk_), intent(in) :: nr,nc,nzin,dupl
integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:)
real(psb_dpk_), intent(inout) :: val(:)
integer(psb_ipk_), intent(out) :: nzout
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_fix_coo_inner_colmajor
end interface
interface
subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,&
& ia,ja,val,iaux,nzout,info)
@ -1906,6 +1896,93 @@ module psb_d_base_mat_mod
integer(psb_ipk_), intent(in), optional :: idir
end subroutine psb_d_fix_coo
end interface
!
!> Function tril:
!! \memberof psb_d_coo_sparse_mat
!! \brief Copy the lower triangle, i.e. all entries
!! A(I,J) such that J-I <= DIAG
!! default value is DIAG=0, i.e. lower triangle up to
!! the main diagonal.
!! DIAG=-1 means copy the strictly lower triangle
!! DIAG= 1 means copy the lower triangle plus the first diagonal
!! of the upper triangle.
!! Moreover, apply a clipping by copying entries A(I,J) only if
!! IMIN<=I<=IMAX
!! JMIN<=J<=JMAX
!!
!! \param l the output (sub)matrix
!! \param info return code
!! \param diag [0] the last diagonal (J-I) to be considered.
!! \param imin [1] the minimum row index we are interested in
!! \param imax [a\%get_nrows()] the minimum row index we are interested in
!! \param jmin [1] minimum col index
!! \param jmax [a\%get_ncols()] maximum col index
!! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:))
!! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1]
!! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1]
!! ( iren cannot be specified with rscale/cscale)
!! \param append [false] append to ia,ja
!! \param nzin [none] if append, then first new entry should go in entry nzin+1
!! \param u [none] copy of the complementary triangle
!!
!
interface
subroutine psb_d_coo_tril(a,l,info,diag,imin,imax,&
& jmin,jmax,rscale,cscale,u)
import
class(psb_d_coo_sparse_mat), intent(in) :: a
class(psb_d_coo_sparse_mat), intent(out) :: l
integer(psb_ipk_),intent(out) :: info
integer(psb_ipk_), intent(in), optional :: diag,imin,imax,jmin,jmax
logical, intent(in), optional :: rscale,cscale
class(psb_d_coo_sparse_mat), optional, intent(out) :: u
end subroutine psb_d_coo_tril
end interface
!
!> Function triu:
!! \memberof psb_d_coo_sparse_mat
!! \brief Copy the upper triangle, i.e. all entries
!! A(I,J) such that DIAG <= J-I
!! default value is DIAG=0, i.e. upper triangle from
!! the main diagonal up.
!! DIAG= 1 means copy the strictly upper triangle
!! DIAG=-1 means copy the upper triangle plus the first diagonal
!! of the lower triangle.
!! Moreover, apply a clipping by copying entries A(I,J) only if
!! IMIN<=I<=IMAX
!! JMIN<=J<=JMAX
!! Optionally copies the lower triangle at the same time
!!
!! \param u the output (sub)matrix
!! \param info return code
!! \param diag [0] the last diagonal (J-I) to be considered.
!! \param imin [1] the minimum row index we are interested in
!! \param imax [a\%get_nrows()] the minimum row index we are interested in
!! \param jmin [1] minimum col index
!! \param jmax [a\%get_ncols()] maximum col index
!! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:))
!! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1]
!! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1]
!! ( iren cannot be specified with rscale/cscale)
!! \param append [false] append to ia,ja
!! \param nzin [none] if append, then first new entry should go in entry nzin+1
!! \param l [none] copy of the complementary triangle
!!
!
interface
subroutine psb_d_coo_triu(a,u,info,diag,imin,imax,&
& jmin,jmax,rscale,cscale,l)
import
class(psb_d_coo_sparse_mat), intent(in) :: a
class(psb_d_coo_sparse_mat), intent(out) :: u
integer(psb_ipk_),intent(out) :: info
integer(psb_ipk_), intent(in), optional :: diag,imin,imax,jmin,jmax
logical, intent(in), optional :: rscale,cscale
class(psb_d_coo_sparse_mat), optional, intent(out) :: l
end subroutine psb_d_coo_triu
end interface
!>
!! \memberof psb_d_coo_sparse_mat

@ -488,7 +488,11 @@ contains
implicit none
class(psb_d_base_vect_type), intent(inout) :: x
if (allocated(x%v)) x%v=dzero
if (allocated(x%v)) then
!$omp workshare
x%v(:)=dzero
!$omp end workshare
end if
call x%set_host()
end subroutine d_base_zero

@ -417,7 +417,11 @@ contains
implicit none
class(psb_i_base_vect_type), intent(inout) :: x
if (allocated(x%v)) x%v=izero
if (allocated(x%v)) then
!$omp workshare
x%v(:)=izero
!$omp end workshare
end if
call x%set_host()
end subroutine i_base_zero

@ -418,7 +418,11 @@ contains
implicit none
class(psb_l_base_vect_type), intent(inout) :: x
if (allocated(x%v)) x%v=lzero
if (allocated(x%v)) then
!$omp workshare
x%v(:)=lzero
!$omp end workshare
end if
call x%set_host()
end subroutine l_base_zero

@ -168,6 +168,8 @@ module psb_s_base_mat_mod
procedure, pass(a) :: reallocate_nz => psb_s_coo_reallocate_nz
procedure, pass(a) :: allocate_mnnz => psb_s_coo_allocate_mnnz
procedure, pass(a) :: ensure_size => psb_s_coo_ensure_size
procedure, pass(a) :: tril => psb_s_coo_tril
procedure, pass(a) :: triu => psb_s_coo_triu
procedure, pass(a) :: cp_to_coo => psb_s_cp_coo_to_coo
procedure, pass(a) :: cp_from_coo => psb_s_cp_coo_from_coo
procedure, pass(a) :: cp_to_fmt => psb_s_cp_coo_to_fmt
@ -1866,18 +1868,6 @@ module psb_s_base_mat_mod
end subroutine psb_s_fix_coo_inner
end interface
interface
subroutine psb_s_fix_coo_inner_colmajor(nr,nc,nzin,dupl,&
& ia,ja,val,iaux,nzout,info)
import
integer(psb_ipk_), intent(in) :: nr,nc,nzin,dupl
integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:)
real(psb_spk_), intent(inout) :: val(:)
integer(psb_ipk_), intent(out) :: nzout
integer(psb_ipk_), intent(out) :: info
end subroutine psb_s_fix_coo_inner_colmajor
end interface
interface
subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,&
& ia,ja,val,iaux,nzout,info)
@ -1906,6 +1896,93 @@ module psb_s_base_mat_mod
integer(psb_ipk_), intent(in), optional :: idir
end subroutine psb_s_fix_coo
end interface
!
!> Function tril:
!! \memberof psb_s_coo_sparse_mat
!! \brief Copy the lower triangle, i.e. all entries
!! A(I,J) such that J-I <= DIAG
!! default value is DIAG=0, i.e. lower triangle up to
!! the main diagonal.
!! DIAG=-1 means copy the strictly lower triangle
!! DIAG= 1 means copy the lower triangle plus the first diagonal
!! of the upper triangle.
!! Moreover, apply a clipping by copying entries A(I,J) only if
!! IMIN<=I<=IMAX
!! JMIN<=J<=JMAX
!!
!! \param l the output (sub)matrix
!! \param info return code
!! \param diag [0] the last diagonal (J-I) to be considered.
!! \param imin [1] the minimum row index we are interested in
!! \param imax [a\%get_nrows()] the minimum row index we are interested in
!! \param jmin [1] minimum col index
!! \param jmax [a\%get_ncols()] maximum col index
!! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:))
!! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1]
!! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1]
!! ( iren cannot be specified with rscale/cscale)
!! \param append [false] append to ia,ja
!! \param nzin [none] if append, then first new entry should go in entry nzin+1
!! \param u [none] copy of the complementary triangle
!!
!
interface
subroutine psb_s_coo_tril(a,l,info,diag,imin,imax,&
& jmin,jmax,rscale,cscale,u)
import
class(psb_s_coo_sparse_mat), intent(in) :: a
class(psb_s_coo_sparse_mat), intent(out) :: l
integer(psb_ipk_),intent(out) :: info
integer(psb_ipk_), intent(in), optional :: diag,imin,imax,jmin,jmax
logical, intent(in), optional :: rscale,cscale
class(psb_s_coo_sparse_mat), optional, intent(out) :: u
end subroutine psb_s_coo_tril
end interface
!
!> Function triu:
!! \memberof psb_s_coo_sparse_mat
!! \brief Copy the upper triangle, i.e. all entries
!! A(I,J) such that DIAG <= J-I
!! default value is DIAG=0, i.e. upper triangle from
!! the main diagonal up.
!! DIAG= 1 means copy the strictly upper triangle
!! DIAG=-1 means copy the upper triangle plus the first diagonal
!! of the lower triangle.
!! Moreover, apply a clipping by copying entries A(I,J) only if
!! IMIN<=I<=IMAX
!! JMIN<=J<=JMAX
!! Optionally copies the lower triangle at the same time
!!
!! \param u the output (sub)matrix
!! \param info return code
!! \param diag [0] the last diagonal (J-I) to be considered.
!! \param imin [1] the minimum row index we are interested in
!! \param imax [a\%get_nrows()] the minimum row index we are interested in
!! \param jmin [1] minimum col index
!! \param jmax [a\%get_ncols()] maximum col index
!! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:))
!! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1]
!! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1]
!! ( iren cannot be specified with rscale/cscale)
!! \param append [false] append to ia,ja
!! \param nzin [none] if append, then first new entry should go in entry nzin+1
!! \param l [none] copy of the complementary triangle
!!
!
interface
subroutine psb_s_coo_triu(a,u,info,diag,imin,imax,&
& jmin,jmax,rscale,cscale,l)
import
class(psb_s_coo_sparse_mat), intent(in) :: a
class(psb_s_coo_sparse_mat), intent(out) :: u
integer(psb_ipk_),intent(out) :: info
integer(psb_ipk_), intent(in), optional :: diag,imin,imax,jmin,jmax
logical, intent(in), optional :: rscale,cscale
class(psb_s_coo_sparse_mat), optional, intent(out) :: l
end subroutine psb_s_coo_triu
end interface
!>
!! \memberof psb_s_coo_sparse_mat

@ -488,7 +488,11 @@ contains
implicit none
class(psb_s_base_vect_type), intent(inout) :: x
if (allocated(x%v)) x%v=szero
if (allocated(x%v)) then
!$omp workshare
x%v(:)=szero
!$omp end workshare
end if
call x%set_host()
end subroutine s_base_zero

@ -168,6 +168,8 @@ module psb_z_base_mat_mod
procedure, pass(a) :: reallocate_nz => psb_z_coo_reallocate_nz
procedure, pass(a) :: allocate_mnnz => psb_z_coo_allocate_mnnz
procedure, pass(a) :: ensure_size => psb_z_coo_ensure_size
procedure, pass(a) :: tril => psb_z_coo_tril
procedure, pass(a) :: triu => psb_z_coo_triu
procedure, pass(a) :: cp_to_coo => psb_z_cp_coo_to_coo
procedure, pass(a) :: cp_from_coo => psb_z_cp_coo_from_coo
procedure, pass(a) :: cp_to_fmt => psb_z_cp_coo_to_fmt
@ -1866,18 +1868,6 @@ module psb_z_base_mat_mod
end subroutine psb_z_fix_coo_inner
end interface
interface
subroutine psb_z_fix_coo_inner_colmajor(nr,nc,nzin,dupl,&
& ia,ja,val,iaux,nzout,info)
import
integer(psb_ipk_), intent(in) :: nr,nc,nzin,dupl
integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:)
complex(psb_dpk_), intent(inout) :: val(:)
integer(psb_ipk_), intent(out) :: nzout
integer(psb_ipk_), intent(out) :: info
end subroutine psb_z_fix_coo_inner_colmajor
end interface
interface
subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,&
& ia,ja,val,iaux,nzout,info)
@ -1906,6 +1896,93 @@ module psb_z_base_mat_mod
integer(psb_ipk_), intent(in), optional :: idir
end subroutine psb_z_fix_coo
end interface
!
!> Function tril:
!! \memberof psb_z_coo_sparse_mat
!! \brief Copy the lower triangle, i.e. all entries
!! A(I,J) such that J-I <= DIAG
!! default value is DIAG=0, i.e. lower triangle up to
!! the main diagonal.
!! DIAG=-1 means copy the strictly lower triangle
!! DIAG= 1 means copy the lower triangle plus the first diagonal
!! of the upper triangle.
!! Moreover, apply a clipping by copying entries A(I,J) only if
!! IMIN<=I<=IMAX
!! JMIN<=J<=JMAX
!!
!! \param l the output (sub)matrix
!! \param info return code
!! \param diag [0] the last diagonal (J-I) to be considered.
!! \param imin [1] the minimum row index we are interested in
!! \param imax [a\%get_nrows()] the minimum row index we are interested in
!! \param jmin [1] minimum col index
!! \param jmax [a\%get_ncols()] maximum col index
!! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:))
!! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1]
!! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1]
!! ( iren cannot be specified with rscale/cscale)
!! \param append [false] append to ia,ja
!! \param nzin [none] if append, then first new entry should go in entry nzin+1
!! \param u [none] copy of the complementary triangle
!!
!
interface
subroutine psb_z_coo_tril(a,l,info,diag,imin,imax,&
& jmin,jmax,rscale,cscale,u)
import
class(psb_z_coo_sparse_mat), intent(in) :: a
class(psb_z_coo_sparse_mat), intent(out) :: l
integer(psb_ipk_),intent(out) :: info
integer(psb_ipk_), intent(in), optional :: diag,imin,imax,jmin,jmax
logical, intent(in), optional :: rscale,cscale
class(psb_z_coo_sparse_mat), optional, intent(out) :: u
end subroutine psb_z_coo_tril
end interface
!
!> Function triu:
!! \memberof psb_z_coo_sparse_mat
!! \brief Copy the upper triangle, i.e. all entries
!! A(I,J) such that DIAG <= J-I
!! default value is DIAG=0, i.e. upper triangle from
!! the main diagonal up.
!! DIAG= 1 means copy the strictly upper triangle
!! DIAG=-1 means copy the upper triangle plus the first diagonal
!! of the lower triangle.
!! Moreover, apply a clipping by copying entries A(I,J) only if
!! IMIN<=I<=IMAX
!! JMIN<=J<=JMAX
!! Optionally copies the lower triangle at the same time
!!
!! \param u the output (sub)matrix
!! \param info return code
!! \param diag [0] the last diagonal (J-I) to be considered.
!! \param imin [1] the minimum row index we are interested in
!! \param imax [a\%get_nrows()] the minimum row index we are interested in
!! \param jmin [1] minimum col index
!! \param jmax [a\%get_ncols()] maximum col index
!! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:))
!! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1]
!! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1]
!! ( iren cannot be specified with rscale/cscale)
!! \param append [false] append to ia,ja
!! \param nzin [none] if append, then first new entry should go in entry nzin+1
!! \param l [none] copy of the complementary triangle
!!
!
interface
subroutine psb_z_coo_triu(a,u,info,diag,imin,imax,&
& jmin,jmax,rscale,cscale,l)
import
class(psb_z_coo_sparse_mat), intent(in) :: a
class(psb_z_coo_sparse_mat), intent(out) :: u
integer(psb_ipk_),intent(out) :: info
integer(psb_ipk_), intent(in), optional :: diag,imin,imax,jmin,jmax
logical, intent(in), optional :: rscale,cscale
class(psb_z_coo_sparse_mat), optional, intent(out) :: l
end subroutine psb_z_coo_triu
end interface
!>
!! \memberof psb_z_coo_sparse_mat

@ -481,7 +481,11 @@ contains
implicit none
class(psb_z_base_vect_type), intent(inout) :: x
if (allocated(x%v)) x%v=zzero
if (allocated(x%v)) then
!$omp workshare
x%v(:)=zzero
!$omp end workshare
end if
call x%set_host()
end subroutine z_base_zero

File diff suppressed because it is too large Load Diff

@ -2189,6 +2189,9 @@ subroutine psb_c_mv_csc_from_coo(a,b,info)
use psb_error_mod
use psb_c_base_mat_mod
use psb_c_csc_mat_mod, psb_protect_name => psb_c_mv_csc_from_coo
#if defined(OPENMP)
use omp_lib
#endif
implicit none
class(psb_c_csc_sparse_mat), intent(inout) :: a
@ -2223,18 +2226,33 @@ subroutine psb_c_mv_csc_from_coo(a,b,info)
call psb_realloc(nc+1,a%icp,info)
call b%free()
#if defined(OPENMP)
!$OMP PARALLEL default(shared)
!$OMP WORKSHARE
a%icp(:) = 0
!$OMP END WORKSHARE
!$OMP DO schedule(STATIC) &
!$OMP private(k,i)
do k=1,nza
i = itemp(k)
a%icp(i) = a%icp(i) + 1
!$OMP ATOMIC UPDATE
a%icp(i+1) = a%icp(i+1) + 1
!$OMP END ATOMIC
end do
ip = 1
do i=1,nc
nrl = a%icp(i)
a%icp(i) = ip
ip = ip + nrl
!$OMP END DO
call psi_exscan(nc+1,a%icp,info,shift=ione)
!$OMP END PARALLEL
#else
a%icp(:) = 0
do k=1,nza
i = itemp(k)
a%icp(i) = a%icp(i) + 1
end do
a%icp(nc+1) = ip
call psi_exscan(nc+1,a%icp,info,shift=ione)
#endif
call a%set_host()
@ -2311,9 +2329,28 @@ subroutine psb_c_cp_csc_to_fmt(a,b,info)
b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat
nc = a%get_ncols()
nz = a%get_nzeros()
if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info)
if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info)
if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info)
if (.false.) then
if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info)
if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info)
if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nc+1,b%icp,info)
call psb_realloc(nz,b%ia,info)
call psb_realloc(nz,b%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nc+1
b%icp(i)=a%icp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
b%ia(j) = a%ia(j)
b%val(j) = a%val(j)
end do
!$omp end parallel do
end if
call b%set_host()
class default
@ -2425,9 +2462,27 @@ subroutine psb_c_cp_csc_from_fmt(a,b,info)
a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat
nc = b%get_ncols()
nz = b%get_nzeros()
if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info)
if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info)
if (info == 0) call psb_safe_cpy( b%val(1:nz), a%val , info)
if (.false.) then
if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info)
if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info)
if (info == 0) call psb_safe_cpy( b%val(1:nz), a%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nc+1,a%icp,info)
call psb_realloc(nz,a%ia,info)
call psb_realloc(nz,a%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nc+1
a%icp(i)=b%icp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
a%ia(j)=b%ia(j)
a%val(j)=b%val(j)
end do
!$omp end parallel do
end if
call a%set_host()
class default

@ -152,6 +152,7 @@ contains
!$omp parallel do private(i,j, acc) schedule(static)
do i=1,m
acc = czero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -163,6 +164,7 @@ contains
!$omp parallel do private(i,j, acc)
do i=1,m
acc = czero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -174,6 +176,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = czero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -189,6 +192,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = czero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -200,6 +204,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = czero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -211,6 +216,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = czero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -225,6 +231,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = czero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -236,6 +243,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = czero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -247,6 +255,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = czero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -261,6 +270,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = czero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -272,6 +282,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = czero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -283,6 +294,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = czero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -2277,7 +2289,155 @@ subroutine psb_c_csr_tril(a,l,info,&
nb = jmax_
endif
#if defined(OPENMP)
block
integer(psb_ipk_), allocatable :: lrws(:),urws(:)
integer(psb_ipk_) :: lpnt, upnt, lnz, unz
call psb_realloc(mb,lrws,info)
!$omp workshare
lrws(:) = 0
!$omp end workshare
nz = a%get_nzeros()
call l%allocate(mb,nb,nz)
!write(0,*) 'Invocation of COO%TRIL', present(u),nz
if (present(u)) then
nzlin = l%get_nzeros() ! At this point it should be 0
call u%allocate(mb,nb,nz)
nzuin = u%get_nzeros() ! At this point it should be 0
if (info == 0) call psb_realloc(mb,urws,info)
!$omp workshare
urws(:) = 0
!$omp end workshare
!write(0,*) 'omp version of COO%TRIL/TRIU'
lnz = 0
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws,urws) reduction(+: lnz,unz)
loop1: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
else
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do
end do loop1
!$omp end parallel do
call psi_exscan(mb,lrws,info)
call psi_exscan(mb,urws,info)
!write(0,*) lrws(:), urws(:)
!$omp parallel do private(i,j,k,lpnt,upnt) shared(imin_,imax_,a)
loop2: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = i
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
else
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = i
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do
end do loop2
!$omp end parallel do
!write(0,*) 'End of copyout',lnz,unz
call l%set_nzeros(lnz)
call l%fix(info)
call u%set_nzeros(unz)
call u%fix(info)
nzout = u%get_nzeros()
if (rscale_) then
!$omp workshare
u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ >=-1).and.(imin_ == jmin_)) then
call u%set_triangle(.true.)
call u%set_lower(.false.)
end if
else
lnz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws) reduction(+: lnz)
loop3: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
end if
end if
end do
end do loop3
!$omp end parallel do
call psi_exscan(mb,lrws,info)
!$omp parallel do private(i,j,k,lpnt) shared(imin_,imax_,a)
loop4: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = i
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
end if
end if
end do
end do loop4
!$omp end parallel do
call l%set_nzeros(lnz)
call l%fix(info)
end if
nzout = l%get_nzeros()
if (rscale_) then
!$omp workshare
l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ <= 0).and.(imin_ == jmin_)) then
call l%set_triangle(.true.)
call l%set_lower(.true.)
end if
end block
#else
nz = a%get_nzeros()
call l%allocate(mb,nb,nz)
@ -2347,7 +2507,7 @@ subroutine psb_c_csr_tril(a,l,info,&
call l%set_triangle(.true.)
call l%set_lower(.true.)
end if
#endif
if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act)
@ -2431,6 +2591,158 @@ subroutine psb_c_csr_triu(a,u,info,&
endif
#if defined(OPENMP)
block
integer(psb_ipk_), allocatable :: lrws(:),urws(:)
integer(psb_ipk_) :: lpnt, upnt, lnz, unz
call psb_realloc(mb,urws,info)
!$omp workshare
urws(:) = 0
!$omp end workshare
nz = a%get_nzeros()
call u%allocate(mb,nb,nz)
!write(0,*) 'Invocation of COO%TRIL', present(u),nz
if (present(l)) then
nzuin = u%get_nzeros() ! At this point it should be 0
call l%allocate(mb,nb,nz)
nzlin = l%get_nzeros() ! At this point it should be 0
if (info == 0) call psb_realloc(mb,urws,info)
!$omp workshare
lrws(:) = 0
!$omp end workshare
!write(0,*) 'omp version of COO%TRIL/TRIU'
lnz = 0
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws,urws) reduction(+: lnz,unz)
loop1: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
else
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do
end do loop1
!$omp end parallel do
call psi_exscan(mb,lrws,info)
call psi_exscan(mb,urws,info)
!write(0,*) lrws(:), urws(:)
!$omp parallel do private(i,j,k,lpnt,upnt) shared(imin_,imax_,a)
loop2: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = i
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
else
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = i
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do
end do loop2
!$omp end parallel do
!write(0,*) 'End of copyout',lnz,unz
call l%set_nzeros(lnz)
call l%fix(info)
call u%set_nzeros(unz)
call u%fix(info)
nzout = l%get_nzeros()
if (rscale_) then
!$omp workshare
l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ <=-1).and.(imin_ == jmin_)) then
call l%set_triangle(.true.)
call l%set_lower(.false.)
end if
else
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,urws) reduction(+: unz)
loop3: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)>=diag_) then
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do
end do loop3
!$omp end parallel do
call psi_exscan(mb,urws,info)
!$omp parallel do private(i,j,k,upnt) shared(imin_,imax_,a)
loop4: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)>=diag_) then
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = i
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do
end do loop4
!$omp end parallel do
call u%set_nzeros(unz)
call u%fix(info)
end if
nzout = u%get_nzeros()
if (rscale_) then
!$omp workshare
u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ >= 0).and.(imin_ == jmin_)) then
call u%set_triangle(.true.)
call u%set_upper(.true.)
end if
end block
#else
nz = a%get_nzeros()
call u%allocate(mb,nb,nz)
@ -2499,7 +2811,7 @@ subroutine psb_c_csr_triu(a,u,info,&
call u%set_triangle(.true.)
call u%set_upper(.true.)
end if
#endif
if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act)
@ -2844,6 +3156,9 @@ subroutine psb_c_cp_csr_from_coo(a,b,info)
use psb_realloc_mod
use psb_c_base_mat_mod
use psb_c_csr_mat_mod, psb_protect_name => psb_c_cp_csr_from_coo
#if defined(OPENMP)
use omp_lib
#endif
implicit none
class(psb_c_csr_sparse_mat), intent(inout) :: a
@ -2860,12 +3175,6 @@ subroutine psb_c_cp_csr_from_coo(a,b,info)
character(len=20) :: name='c_cp_csr_from_coo'
logical :: use_openmp = .false.
!$ integer(psb_ipk_), allocatable :: sum(:)
!$ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j
!$ integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads
!$ use_openmp = .true.
info = psb_success_
debug_unit = psb_get_debug_unit()
debug_level = psb_get_debug_level()
@ -2907,96 +3216,37 @@ subroutine psb_c_cp_csr_from_coo(a,b,info)
endif
a%irp(:) = 0
!!$ if (use_openmp) then
!!$ !$ maxthreads = omp_get_max_threads()
!!$ !$ allocate(sum(maxthreads+1))
!!$ !$ sum(:) = 0
!!$ !$ sum(1) = 1
!!$
!!$ !$OMP PARALLEL default(none) &
!!$ !$OMP shared(nza,itemp,a,nthreads,sum,nr) &
!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
!!$
!!$ !$OMP DO schedule(STATIC) &
!!$ !$OMP private(k,i)
!!$ do k=1,nza
!!$ i = itemp(k)
!!$ a%irp(i) = a%irp(i) + 1
!!$ end do
!!$ !$OMP END DO
!!$
!!$ !$OMP SINGLE
!!$ !$ nthreads = omp_get_num_threads()
!!$ !$OMP END SINGLE
!!$
!!$ !$ ithread = omp_get_thread_num()
!!$
!!$ !$ work = nr/nthreads
!!$ !$ if (ithread < MOD(nr,nthreads)) then
!!$ !$ work = work + 1
!!$ !$ first_idx = ithread*work + 1
!!$ !$ else
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
!!$ !$ end if
!!$
!!$ !$ last_idx = first_idx + work - 1
!!$
!!$ !$ s = 0
!!$ !$ do i=first_idx,last_idx
!!$ !$ s = s + a%irp(i)
!!$ !$ end do
!!$ !$ if (work > 0) then
!!$ !$ sum(ithread+2) = s
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$OMP SINGLE
!!$ !$ do i=2,nthreads+1
!!$ !$ sum(i) = sum(i) + sum(i-1)
!!$ !$ end do
!!$ !$OMP END SINGLE
!!$
!!$ !$ if (work > 0) then
!!$ !$ saved_elem = a%irp(first_idx)
!!$ !$ end if
!!$ !$ if (ithread == 0) then
!!$ !$ a%irp(1) = 1
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$ if (work > 0) then
!!$ !$ old_val = a%irp(first_idx+1)
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!!$ !$ end if
!!$
!!$ !$ do i=first_idx+2,last_idx+1
!!$ !$ nxt_val = a%irp(i)
!!$ !$ a%irp(i) = a%irp(i-1) + old_val
!!$ !$ old_val = nxt_val
!!$ !$ end do
!!$
!!$ !$OMP END PARALLEL
!!$ else
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
!!$ end if
call a%set_host()
#if defined(OPENMP)
!$OMP PARALLEL default(shared) reduction(max:info)
!$OMP WORKSHARE
a%irp(:) = 0
!$OMP END WORKSHARE
!$OMP DO schedule(STATIC) &
!$OMP private(k,i)
do k=1,nza
i = itemp(k)
!$OMP ATOMIC UPDATE
a%irp(i) = a%irp(i) + 1
!$OMP END ATOMIC
end do
!$OMP END DO
call psi_exscan(nr+1,a%irp,info,shift=ione)
!$OMP END PARALLEL
#else
a%irp(:) = 0
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
call psi_exscan(nr+1,a%irp,info,shift=ione)
#endif
call a%set_host()
end subroutine psb_c_cp_csr_from_coo
@ -3096,6 +3346,9 @@ subroutine psb_c_mv_csr_from_coo(a,b,info)
use psb_error_mod
use psb_c_base_mat_mod
use psb_c_csr_mat_mod, psb_protect_name => psb_c_mv_csr_from_coo
#if defined(OPENMP)
use omp_lib
#endif
implicit none
class(psb_c_csr_sparse_mat), intent(inout) :: a
@ -3109,13 +3362,6 @@ subroutine psb_c_mv_csr_from_coo(a,b,info)
integer(psb_ipk_), Parameter :: maxtry=8
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='mv_from_coo'
logical :: use_openmp = .false.
! $ integer(psb_ipk_), allocatable :: sum(:)
! $ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s
! $ integer(psb_ipk_) :: nxt_val,old_val,saved_elem
! $ use_openmp = .true.
info = psb_success_
debug_unit = psb_get_debug_unit()
@ -3139,90 +3385,34 @@ subroutine psb_c_mv_csr_from_coo(a,b,info)
call psb_realloc(max(nr+1,nc+1),a%irp,info)
call b%free()
#if defined(OPENMP)
!$OMP PARALLEL default(shared) reduction(max:info)
!$OMP WORKSHARE
a%irp(:) = 0
!$OMP END WORKSHARE
!$OMP DO schedule(STATIC) &
!$OMP private(k,i)
do k=1,nza
i = itemp(k)
!$OMP ATOMIC UPDATE
a%irp(i) = a%irp(i) + 1
!$OMP END ATOMIC
end do
!$OMP END DO
call psi_exscan(nr+1,a%irp,info,shift=ione)
!$OMP END PARALLEL
#else
a%irp(:) = 0
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
call psi_exscan(nr+1,a%irp,info,shift=ione)
#endif
!!$ if (use_openmp) then
!!$ !$OMP PARALLEL default(none) &
!!$ !$OMP shared(sum,nthreads,nr,a,itemp,nza) &
!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
!!$
!!$ !$OMP DO schedule(STATIC) &
!!$ !$OMP private(k,i)
!!$ do k=1,nza
!!$ i = itemp(k)
!!$ a%irp(i) = a%irp(i) + 1
!!$ end do
!!$ !$OMP END DO
!!$
!!$ !$OMP SINGLE
!!$ !$ nthreads = omp_get_num_threads()
!!$ !$ allocate(sum(nthreads+1))
!!$ !$ sum(:) = 0
!!$ !$ sum(1) = 1
!!$ !$OMP END SINGLE
!!$
!!$ !$ ithread = omp_get_thread_num()
!!$
!!$ !$ work = nr/nthreads
!!$ !$ if (ithread < MOD(nr,nthreads)) then
!!$ !$ work = work + 1
!!$ !$ first_idx = ithread*work + 1
!!$ !$ else
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
!!$ !$ end if
!!$
!!$ !$ last_idx = first_idx + work - 1
!!$
!!$ !$ s = 0
!!$ !$ do i=first_idx,last_idx
!!$ !$ s = s + a%irp(i)
!!$ !$ end do
!!$ !$ if (work > 0) then
!!$ !$ sum(ithread+2) = s
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$OMP SINGLE
!!$ !$ do i=2,nthreads+1
!!$ !$ sum(i) = sum(i) + sum(i-1)
!!$ !$ end do
!!$ !$OMP END SINGLE
!!$
!!$ !$ if (work > 0) then
!!$ !$ saved_elem = a%irp(first_idx)
!!$ !$ end if
!!$ !$ if (ithread == 0) then
!!$ !$ a%irp(1) = 1
!!$ !$ end if
!!$
!!$ !$ if (work > 0) then
!!$ !$ old_val = a%irp(first_idx+1)
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!!$ !$ end if
!!$
!!$ !$ do i=first_idx+2,last_idx+1
!!$ !$ nxt_val = a%irp(i)
!!$ !$ a%irp(i) = a%irp(i-1) + old_val
!!$ !$ old_val = nxt_val
!!$ !$ end do
!!$
!!$ !$OMP END PARALLEL
!!$ else
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
!!$ end if
call a%set_host()
end subroutine psb_c_mv_csr_from_coo
@ -3300,9 +3490,28 @@ subroutine psb_c_cp_csr_to_fmt(a,b,info)
b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat
nr = a%get_nrows()
nz = a%get_nzeros()
if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info)
if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info)
if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info)
if (.false.) then
if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info)
if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info)
if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nr+1,b%irp,info)
call psb_realloc(nz,b%ja,info)
call psb_realloc(nz,b%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nr+1
b%irp(i)=a%irp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
b%ja(j) = a%ja(j)
b%val(j) = a%val(j)
end do
!$omp end parallel do
end if
call b%set_host()
class default
@ -3386,9 +3595,27 @@ subroutine psb_c_cp_csr_from_fmt(a,b,info)
a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat
nr = b%get_nrows()
nz = b%get_nzeros()
if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info)
if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info)
if (info == 0) call psb_safe_cpy( b%val(1:nz) , a%val , info)
if (.false.) then
if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info)
if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info)
if (info == 0) call psb_safe_cpy( b%val(1:nz) , a%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nr+1,a%irp,info)
call psb_realloc(nz,a%ja,info)
call psb_realloc(nz,a%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nr+1
a%irp(i)=b%irp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
a%ja(j)=b%ja(j)
a%val(j)=b%val(j)
end do
!$omp end parallel do
end if
call a%set_host()
class default

File diff suppressed because it is too large Load Diff

@ -2189,6 +2189,9 @@ subroutine psb_d_mv_csc_from_coo(a,b,info)
use psb_error_mod
use psb_d_base_mat_mod
use psb_d_csc_mat_mod, psb_protect_name => psb_d_mv_csc_from_coo
#if defined(OPENMP)
use omp_lib
#endif
implicit none
class(psb_d_csc_sparse_mat), intent(inout) :: a
@ -2223,18 +2226,33 @@ subroutine psb_d_mv_csc_from_coo(a,b,info)
call psb_realloc(nc+1,a%icp,info)
call b%free()
#if defined(OPENMP)
!$OMP PARALLEL default(shared)
!$OMP WORKSHARE
a%icp(:) = 0
!$OMP END WORKSHARE
!$OMP DO schedule(STATIC) &
!$OMP private(k,i)
do k=1,nza
i = itemp(k)
a%icp(i) = a%icp(i) + 1
!$OMP ATOMIC UPDATE
a%icp(i+1) = a%icp(i+1) + 1
!$OMP END ATOMIC
end do
ip = 1
do i=1,nc
nrl = a%icp(i)
a%icp(i) = ip
ip = ip + nrl
!$OMP END DO
call psi_exscan(nc+1,a%icp,info,shift=ione)
!$OMP END PARALLEL
#else
a%icp(:) = 0
do k=1,nza
i = itemp(k)
a%icp(i) = a%icp(i) + 1
end do
a%icp(nc+1) = ip
call psi_exscan(nc+1,a%icp,info,shift=ione)
#endif
call a%set_host()
@ -2311,9 +2329,28 @@ subroutine psb_d_cp_csc_to_fmt(a,b,info)
b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat
nc = a%get_ncols()
nz = a%get_nzeros()
if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info)
if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info)
if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info)
if (.false.) then
if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info)
if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info)
if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nc+1,b%icp,info)
call psb_realloc(nz,b%ia,info)
call psb_realloc(nz,b%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nc+1
b%icp(i)=a%icp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
b%ia(j) = a%ia(j)
b%val(j) = a%val(j)
end do
!$omp end parallel do
end if
call b%set_host()
class default
@ -2425,9 +2462,27 @@ subroutine psb_d_cp_csc_from_fmt(a,b,info)
a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat
nc = b%get_ncols()
nz = b%get_nzeros()
if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info)
if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info)
if (info == 0) call psb_safe_cpy( b%val(1:nz), a%val , info)
if (.false.) then
if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info)
if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info)
if (info == 0) call psb_safe_cpy( b%val(1:nz), a%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nc+1,a%icp,info)
call psb_realloc(nz,a%ia,info)
call psb_realloc(nz,a%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nc+1
a%icp(i)=b%icp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
a%ia(j)=b%ia(j)
a%val(j)=b%val(j)
end do
!$omp end parallel do
end if
call a%set_host()
class default

@ -152,6 +152,7 @@ contains
!$omp parallel do private(i,j, acc) schedule(static)
do i=1,m
acc = dzero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -163,6 +164,7 @@ contains
!$omp parallel do private(i,j, acc)
do i=1,m
acc = dzero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -174,6 +176,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = dzero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -189,6 +192,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = dzero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -200,6 +204,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = dzero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -211,6 +216,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = dzero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -225,6 +231,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = dzero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -236,6 +243,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = dzero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -247,6 +255,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = dzero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -261,6 +270,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = dzero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -272,6 +282,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = dzero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -283,6 +294,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = dzero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -2277,7 +2289,155 @@ subroutine psb_d_csr_tril(a,l,info,&
nb = jmax_
endif
#if defined(OPENMP)
block
integer(psb_ipk_), allocatable :: lrws(:),urws(:)
integer(psb_ipk_) :: lpnt, upnt, lnz, unz
call psb_realloc(mb,lrws,info)
!$omp workshare
lrws(:) = 0
!$omp end workshare
nz = a%get_nzeros()
call l%allocate(mb,nb,nz)
!write(0,*) 'Invocation of COO%TRIL', present(u),nz
if (present(u)) then
nzlin = l%get_nzeros() ! At this point it should be 0
call u%allocate(mb,nb,nz)
nzuin = u%get_nzeros() ! At this point it should be 0
if (info == 0) call psb_realloc(mb,urws,info)
!$omp workshare
urws(:) = 0
!$omp end workshare
!write(0,*) 'omp version of COO%TRIL/TRIU'
lnz = 0
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws,urws) reduction(+: lnz,unz)
loop1: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
else
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do
end do loop1
!$omp end parallel do
call psi_exscan(mb,lrws,info)
call psi_exscan(mb,urws,info)
!write(0,*) lrws(:), urws(:)
!$omp parallel do private(i,j,k,lpnt,upnt) shared(imin_,imax_,a)
loop2: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = i
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
else
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = i
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do
end do loop2
!$omp end parallel do
!write(0,*) 'End of copyout',lnz,unz
call l%set_nzeros(lnz)
call l%fix(info)
call u%set_nzeros(unz)
call u%fix(info)
nzout = u%get_nzeros()
if (rscale_) then
!$omp workshare
u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ >=-1).and.(imin_ == jmin_)) then
call u%set_triangle(.true.)
call u%set_lower(.false.)
end if
else
lnz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws) reduction(+: lnz)
loop3: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
end if
end if
end do
end do loop3
!$omp end parallel do
call psi_exscan(mb,lrws,info)
!$omp parallel do private(i,j,k,lpnt) shared(imin_,imax_,a)
loop4: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = i
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
end if
end if
end do
end do loop4
!$omp end parallel do
call l%set_nzeros(lnz)
call l%fix(info)
end if
nzout = l%get_nzeros()
if (rscale_) then
!$omp workshare
l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ <= 0).and.(imin_ == jmin_)) then
call l%set_triangle(.true.)
call l%set_lower(.true.)
end if
end block
#else
nz = a%get_nzeros()
call l%allocate(mb,nb,nz)
@ -2347,7 +2507,7 @@ subroutine psb_d_csr_tril(a,l,info,&
call l%set_triangle(.true.)
call l%set_lower(.true.)
end if
#endif
if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act)
@ -2431,6 +2591,158 @@ subroutine psb_d_csr_triu(a,u,info,&
endif
#if defined(OPENMP)
block
integer(psb_ipk_), allocatable :: lrws(:),urws(:)
integer(psb_ipk_) :: lpnt, upnt, lnz, unz
call psb_realloc(mb,urws,info)
!$omp workshare
urws(:) = 0
!$omp end workshare
nz = a%get_nzeros()
call u%allocate(mb,nb,nz)
!write(0,*) 'Invocation of COO%TRIL', present(u),nz
if (present(l)) then
nzuin = u%get_nzeros() ! At this point it should be 0
call l%allocate(mb,nb,nz)
nzlin = l%get_nzeros() ! At this point it should be 0
if (info == 0) call psb_realloc(mb,urws,info)
!$omp workshare
lrws(:) = 0
!$omp end workshare
!write(0,*) 'omp version of COO%TRIL/TRIU'
lnz = 0
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws,urws) reduction(+: lnz,unz)
loop1: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
else
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do
end do loop1
!$omp end parallel do
call psi_exscan(mb,lrws,info)
call psi_exscan(mb,urws,info)
!write(0,*) lrws(:), urws(:)
!$omp parallel do private(i,j,k,lpnt,upnt) shared(imin_,imax_,a)
loop2: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = i
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
else
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = i
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do
end do loop2
!$omp end parallel do
!write(0,*) 'End of copyout',lnz,unz
call l%set_nzeros(lnz)
call l%fix(info)
call u%set_nzeros(unz)
call u%fix(info)
nzout = l%get_nzeros()
if (rscale_) then
!$omp workshare
l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ <=-1).and.(imin_ == jmin_)) then
call l%set_triangle(.true.)
call l%set_lower(.false.)
end if
else
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,urws) reduction(+: unz)
loop3: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)>=diag_) then
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do
end do loop3
!$omp end parallel do
call psi_exscan(mb,urws,info)
!$omp parallel do private(i,j,k,upnt) shared(imin_,imax_,a)
loop4: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)>=diag_) then
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = i
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do
end do loop4
!$omp end parallel do
call u%set_nzeros(unz)
call u%fix(info)
end if
nzout = u%get_nzeros()
if (rscale_) then
!$omp workshare
u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ >= 0).and.(imin_ == jmin_)) then
call u%set_triangle(.true.)
call u%set_upper(.true.)
end if
end block
#else
nz = a%get_nzeros()
call u%allocate(mb,nb,nz)
@ -2499,7 +2811,7 @@ subroutine psb_d_csr_triu(a,u,info,&
call u%set_triangle(.true.)
call u%set_upper(.true.)
end if
#endif
if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act)
@ -2844,6 +3156,9 @@ subroutine psb_d_cp_csr_from_coo(a,b,info)
use psb_realloc_mod
use psb_d_base_mat_mod
use psb_d_csr_mat_mod, psb_protect_name => psb_d_cp_csr_from_coo
#if defined(OPENMP)
use omp_lib
#endif
implicit none
class(psb_d_csr_sparse_mat), intent(inout) :: a
@ -2860,12 +3175,6 @@ subroutine psb_d_cp_csr_from_coo(a,b,info)
character(len=20) :: name='d_cp_csr_from_coo'
logical :: use_openmp = .false.
!$ integer(psb_ipk_), allocatable :: sum(:)
!$ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j
!$ integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads
!$ use_openmp = .true.
info = psb_success_
debug_unit = psb_get_debug_unit()
debug_level = psb_get_debug_level()
@ -2907,96 +3216,37 @@ subroutine psb_d_cp_csr_from_coo(a,b,info)
endif
a%irp(:) = 0
!!$ if (use_openmp) then
!!$ !$ maxthreads = omp_get_max_threads()
!!$ !$ allocate(sum(maxthreads+1))
!!$ !$ sum(:) = 0
!!$ !$ sum(1) = 1
!!$
!!$ !$OMP PARALLEL default(none) &
!!$ !$OMP shared(nza,itemp,a,nthreads,sum,nr) &
!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
!!$
!!$ !$OMP DO schedule(STATIC) &
!!$ !$OMP private(k,i)
!!$ do k=1,nza
!!$ i = itemp(k)
!!$ a%irp(i) = a%irp(i) + 1
!!$ end do
!!$ !$OMP END DO
!!$
!!$ !$OMP SINGLE
!!$ !$ nthreads = omp_get_num_threads()
!!$ !$OMP END SINGLE
!!$
!!$ !$ ithread = omp_get_thread_num()
!!$
!!$ !$ work = nr/nthreads
!!$ !$ if (ithread < MOD(nr,nthreads)) then
!!$ !$ work = work + 1
!!$ !$ first_idx = ithread*work + 1
!!$ !$ else
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
!!$ !$ end if
!!$
!!$ !$ last_idx = first_idx + work - 1
!!$
!!$ !$ s = 0
!!$ !$ do i=first_idx,last_idx
!!$ !$ s = s + a%irp(i)
!!$ !$ end do
!!$ !$ if (work > 0) then
!!$ !$ sum(ithread+2) = s
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$OMP SINGLE
!!$ !$ do i=2,nthreads+1
!!$ !$ sum(i) = sum(i) + sum(i-1)
!!$ !$ end do
!!$ !$OMP END SINGLE
!!$
!!$ !$ if (work > 0) then
!!$ !$ saved_elem = a%irp(first_idx)
!!$ !$ end if
!!$ !$ if (ithread == 0) then
!!$ !$ a%irp(1) = 1
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$ if (work > 0) then
!!$ !$ old_val = a%irp(first_idx+1)
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!!$ !$ end if
!!$
!!$ !$ do i=first_idx+2,last_idx+1
!!$ !$ nxt_val = a%irp(i)
!!$ !$ a%irp(i) = a%irp(i-1) + old_val
!!$ !$ old_val = nxt_val
!!$ !$ end do
!!$
!!$ !$OMP END PARALLEL
!!$ else
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
!!$ end if
call a%set_host()
#if defined(OPENMP)
!$OMP PARALLEL default(shared) reduction(max:info)
!$OMP WORKSHARE
a%irp(:) = 0
!$OMP END WORKSHARE
!$OMP DO schedule(STATIC) &
!$OMP private(k,i)
do k=1,nza
i = itemp(k)
!$OMP ATOMIC UPDATE
a%irp(i) = a%irp(i) + 1
!$OMP END ATOMIC
end do
!$OMP END DO
call psi_exscan(nr+1,a%irp,info,shift=ione)
!$OMP END PARALLEL
#else
a%irp(:) = 0
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
call psi_exscan(nr+1,a%irp,info,shift=ione)
#endif
call a%set_host()
end subroutine psb_d_cp_csr_from_coo
@ -3096,6 +3346,9 @@ subroutine psb_d_mv_csr_from_coo(a,b,info)
use psb_error_mod
use psb_d_base_mat_mod
use psb_d_csr_mat_mod, psb_protect_name => psb_d_mv_csr_from_coo
#if defined(OPENMP)
use omp_lib
#endif
implicit none
class(psb_d_csr_sparse_mat), intent(inout) :: a
@ -3109,13 +3362,6 @@ subroutine psb_d_mv_csr_from_coo(a,b,info)
integer(psb_ipk_), Parameter :: maxtry=8
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='mv_from_coo'
logical :: use_openmp = .false.
! $ integer(psb_ipk_), allocatable :: sum(:)
! $ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s
! $ integer(psb_ipk_) :: nxt_val,old_val,saved_elem
! $ use_openmp = .true.
info = psb_success_
debug_unit = psb_get_debug_unit()
@ -3139,90 +3385,34 @@ subroutine psb_d_mv_csr_from_coo(a,b,info)
call psb_realloc(max(nr+1,nc+1),a%irp,info)
call b%free()
#if defined(OPENMP)
!$OMP PARALLEL default(shared) reduction(max:info)
!$OMP WORKSHARE
a%irp(:) = 0
!$OMP END WORKSHARE
!$OMP DO schedule(STATIC) &
!$OMP private(k,i)
do k=1,nza
i = itemp(k)
!$OMP ATOMIC UPDATE
a%irp(i) = a%irp(i) + 1
!$OMP END ATOMIC
end do
!$OMP END DO
call psi_exscan(nr+1,a%irp,info,shift=ione)
!$OMP END PARALLEL
#else
a%irp(:) = 0
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
call psi_exscan(nr+1,a%irp,info,shift=ione)
#endif
!!$ if (use_openmp) then
!!$ !$OMP PARALLEL default(none) &
!!$ !$OMP shared(sum,nthreads,nr,a,itemp,nza) &
!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
!!$
!!$ !$OMP DO schedule(STATIC) &
!!$ !$OMP private(k,i)
!!$ do k=1,nza
!!$ i = itemp(k)
!!$ a%irp(i) = a%irp(i) + 1
!!$ end do
!!$ !$OMP END DO
!!$
!!$ !$OMP SINGLE
!!$ !$ nthreads = omp_get_num_threads()
!!$ !$ allocate(sum(nthreads+1))
!!$ !$ sum(:) = 0
!!$ !$ sum(1) = 1
!!$ !$OMP END SINGLE
!!$
!!$ !$ ithread = omp_get_thread_num()
!!$
!!$ !$ work = nr/nthreads
!!$ !$ if (ithread < MOD(nr,nthreads)) then
!!$ !$ work = work + 1
!!$ !$ first_idx = ithread*work + 1
!!$ !$ else
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
!!$ !$ end if
!!$
!!$ !$ last_idx = first_idx + work - 1
!!$
!!$ !$ s = 0
!!$ !$ do i=first_idx,last_idx
!!$ !$ s = s + a%irp(i)
!!$ !$ end do
!!$ !$ if (work > 0) then
!!$ !$ sum(ithread+2) = s
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$OMP SINGLE
!!$ !$ do i=2,nthreads+1
!!$ !$ sum(i) = sum(i) + sum(i-1)
!!$ !$ end do
!!$ !$OMP END SINGLE
!!$
!!$ !$ if (work > 0) then
!!$ !$ saved_elem = a%irp(first_idx)
!!$ !$ end if
!!$ !$ if (ithread == 0) then
!!$ !$ a%irp(1) = 1
!!$ !$ end if
!!$
!!$ !$ if (work > 0) then
!!$ !$ old_val = a%irp(first_idx+1)
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!!$ !$ end if
!!$
!!$ !$ do i=first_idx+2,last_idx+1
!!$ !$ nxt_val = a%irp(i)
!!$ !$ a%irp(i) = a%irp(i-1) + old_val
!!$ !$ old_val = nxt_val
!!$ !$ end do
!!$
!!$ !$OMP END PARALLEL
!!$ else
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
!!$ end if
call a%set_host()
end subroutine psb_d_mv_csr_from_coo
@ -3300,9 +3490,28 @@ subroutine psb_d_cp_csr_to_fmt(a,b,info)
b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat
nr = a%get_nrows()
nz = a%get_nzeros()
if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info)
if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info)
if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info)
if (.false.) then
if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info)
if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info)
if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nr+1,b%irp,info)
call psb_realloc(nz,b%ja,info)
call psb_realloc(nz,b%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nr+1
b%irp(i)=a%irp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
b%ja(j) = a%ja(j)
b%val(j) = a%val(j)
end do
!$omp end parallel do
end if
call b%set_host()
class default
@ -3386,9 +3595,27 @@ subroutine psb_d_cp_csr_from_fmt(a,b,info)
a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat
nr = b%get_nrows()
nz = b%get_nzeros()
if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info)
if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info)
if (info == 0) call psb_safe_cpy( b%val(1:nz) , a%val , info)
if (.false.) then
if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info)
if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info)
if (info == 0) call psb_safe_cpy( b%val(1:nz) , a%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nr+1,a%irp,info)
call psb_realloc(nz,a%ja,info)
call psb_realloc(nz,a%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nr+1
a%irp(i)=b%irp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
a%ja(j)=b%ja(j)
a%val(j)=b%val(j)
end do
!$omp end parallel do
end if
call a%set_host()
class default

File diff suppressed because it is too large Load Diff

@ -2189,6 +2189,9 @@ subroutine psb_s_mv_csc_from_coo(a,b,info)
use psb_error_mod
use psb_s_base_mat_mod
use psb_s_csc_mat_mod, psb_protect_name => psb_s_mv_csc_from_coo
#if defined(OPENMP)
use omp_lib
#endif
implicit none
class(psb_s_csc_sparse_mat), intent(inout) :: a
@ -2223,18 +2226,33 @@ subroutine psb_s_mv_csc_from_coo(a,b,info)
call psb_realloc(nc+1,a%icp,info)
call b%free()
#if defined(OPENMP)
!$OMP PARALLEL default(shared)
!$OMP WORKSHARE
a%icp(:) = 0
!$OMP END WORKSHARE
!$OMP DO schedule(STATIC) &
!$OMP private(k,i)
do k=1,nza
i = itemp(k)
a%icp(i) = a%icp(i) + 1
!$OMP ATOMIC UPDATE
a%icp(i+1) = a%icp(i+1) + 1
!$OMP END ATOMIC
end do
ip = 1
do i=1,nc
nrl = a%icp(i)
a%icp(i) = ip
ip = ip + nrl
!$OMP END DO
call psi_exscan(nc+1,a%icp,info,shift=ione)
!$OMP END PARALLEL
#else
a%icp(:) = 0
do k=1,nza
i = itemp(k)
a%icp(i) = a%icp(i) + 1
end do
a%icp(nc+1) = ip
call psi_exscan(nc+1,a%icp,info,shift=ione)
#endif
call a%set_host()
@ -2311,9 +2329,28 @@ subroutine psb_s_cp_csc_to_fmt(a,b,info)
b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat
nc = a%get_ncols()
nz = a%get_nzeros()
if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info)
if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info)
if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info)
if (.false.) then
if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info)
if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info)
if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nc+1,b%icp,info)
call psb_realloc(nz,b%ia,info)
call psb_realloc(nz,b%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nc+1
b%icp(i)=a%icp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
b%ia(j) = a%ia(j)
b%val(j) = a%val(j)
end do
!$omp end parallel do
end if
call b%set_host()
class default
@ -2425,9 +2462,27 @@ subroutine psb_s_cp_csc_from_fmt(a,b,info)
a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat
nc = b%get_ncols()
nz = b%get_nzeros()
if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info)
if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info)
if (info == 0) call psb_safe_cpy( b%val(1:nz), a%val , info)
if (.false.) then
if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info)
if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info)
if (info == 0) call psb_safe_cpy( b%val(1:nz), a%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nc+1,a%icp,info)
call psb_realloc(nz,a%ia,info)
call psb_realloc(nz,a%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nc+1
a%icp(i)=b%icp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
a%ia(j)=b%ia(j)
a%val(j)=b%val(j)
end do
!$omp end parallel do
end if
call a%set_host()
class default

@ -152,6 +152,7 @@ contains
!$omp parallel do private(i,j, acc) schedule(static)
do i=1,m
acc = szero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -163,6 +164,7 @@ contains
!$omp parallel do private(i,j, acc)
do i=1,m
acc = szero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -174,6 +176,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = szero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -189,6 +192,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = szero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -200,6 +204,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = szero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -211,6 +216,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = szero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -225,6 +231,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = szero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -236,6 +243,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = szero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -247,6 +255,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = szero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -261,6 +270,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = szero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -272,6 +282,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = szero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -283,6 +294,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = szero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -2277,7 +2289,155 @@ subroutine psb_s_csr_tril(a,l,info,&
nb = jmax_
endif
#if defined(OPENMP)
block
integer(psb_ipk_), allocatable :: lrws(:),urws(:)
integer(psb_ipk_) :: lpnt, upnt, lnz, unz
call psb_realloc(mb,lrws,info)
!$omp workshare
lrws(:) = 0
!$omp end workshare
nz = a%get_nzeros()
call l%allocate(mb,nb,nz)
!write(0,*) 'Invocation of COO%TRIL', present(u),nz
if (present(u)) then
nzlin = l%get_nzeros() ! At this point it should be 0
call u%allocate(mb,nb,nz)
nzuin = u%get_nzeros() ! At this point it should be 0
if (info == 0) call psb_realloc(mb,urws,info)
!$omp workshare
urws(:) = 0
!$omp end workshare
!write(0,*) 'omp version of COO%TRIL/TRIU'
lnz = 0
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws,urws) reduction(+: lnz,unz)
loop1: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
else
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do
end do loop1
!$omp end parallel do
call psi_exscan(mb,lrws,info)
call psi_exscan(mb,urws,info)
!write(0,*) lrws(:), urws(:)
!$omp parallel do private(i,j,k,lpnt,upnt) shared(imin_,imax_,a)
loop2: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = i
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
else
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = i
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do
end do loop2
!$omp end parallel do
!write(0,*) 'End of copyout',lnz,unz
call l%set_nzeros(lnz)
call l%fix(info)
call u%set_nzeros(unz)
call u%fix(info)
nzout = u%get_nzeros()
if (rscale_) then
!$omp workshare
u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ >=-1).and.(imin_ == jmin_)) then
call u%set_triangle(.true.)
call u%set_lower(.false.)
end if
else
lnz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws) reduction(+: lnz)
loop3: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
end if
end if
end do
end do loop3
!$omp end parallel do
call psi_exscan(mb,lrws,info)
!$omp parallel do private(i,j,k,lpnt) shared(imin_,imax_,a)
loop4: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = i
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
end if
end if
end do
end do loop4
!$omp end parallel do
call l%set_nzeros(lnz)
call l%fix(info)
end if
nzout = l%get_nzeros()
if (rscale_) then
!$omp workshare
l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ <= 0).and.(imin_ == jmin_)) then
call l%set_triangle(.true.)
call l%set_lower(.true.)
end if
end block
#else
nz = a%get_nzeros()
call l%allocate(mb,nb,nz)
@ -2347,7 +2507,7 @@ subroutine psb_s_csr_tril(a,l,info,&
call l%set_triangle(.true.)
call l%set_lower(.true.)
end if
#endif
if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act)
@ -2431,6 +2591,158 @@ subroutine psb_s_csr_triu(a,u,info,&
endif
#if defined(OPENMP)
block
integer(psb_ipk_), allocatable :: lrws(:),urws(:)
integer(psb_ipk_) :: lpnt, upnt, lnz, unz
call psb_realloc(mb,urws,info)
!$omp workshare
urws(:) = 0
!$omp end workshare
nz = a%get_nzeros()
call u%allocate(mb,nb,nz)
!write(0,*) 'Invocation of COO%TRIL', present(u),nz
if (present(l)) then
nzuin = u%get_nzeros() ! At this point it should be 0
call l%allocate(mb,nb,nz)
nzlin = l%get_nzeros() ! At this point it should be 0
if (info == 0) call psb_realloc(mb,urws,info)
!$omp workshare
lrws(:) = 0
!$omp end workshare
!write(0,*) 'omp version of COO%TRIL/TRIU'
lnz = 0
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws,urws) reduction(+: lnz,unz)
loop1: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
else
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do
end do loop1
!$omp end parallel do
call psi_exscan(mb,lrws,info)
call psi_exscan(mb,urws,info)
!write(0,*) lrws(:), urws(:)
!$omp parallel do private(i,j,k,lpnt,upnt) shared(imin_,imax_,a)
loop2: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = i
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
else
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = i
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do
end do loop2
!$omp end parallel do
!write(0,*) 'End of copyout',lnz,unz
call l%set_nzeros(lnz)
call l%fix(info)
call u%set_nzeros(unz)
call u%fix(info)
nzout = l%get_nzeros()
if (rscale_) then
!$omp workshare
l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ <=-1).and.(imin_ == jmin_)) then
call l%set_triangle(.true.)
call l%set_lower(.false.)
end if
else
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,urws) reduction(+: unz)
loop3: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)>=diag_) then
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do
end do loop3
!$omp end parallel do
call psi_exscan(mb,urws,info)
!$omp parallel do private(i,j,k,upnt) shared(imin_,imax_,a)
loop4: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)>=diag_) then
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = i
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do
end do loop4
!$omp end parallel do
call u%set_nzeros(unz)
call u%fix(info)
end if
nzout = u%get_nzeros()
if (rscale_) then
!$omp workshare
u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ >= 0).and.(imin_ == jmin_)) then
call u%set_triangle(.true.)
call u%set_upper(.true.)
end if
end block
#else
nz = a%get_nzeros()
call u%allocate(mb,nb,nz)
@ -2499,7 +2811,7 @@ subroutine psb_s_csr_triu(a,u,info,&
call u%set_triangle(.true.)
call u%set_upper(.true.)
end if
#endif
if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act)
@ -2844,6 +3156,9 @@ subroutine psb_s_cp_csr_from_coo(a,b,info)
use psb_realloc_mod
use psb_s_base_mat_mod
use psb_s_csr_mat_mod, psb_protect_name => psb_s_cp_csr_from_coo
#if defined(OPENMP)
use omp_lib
#endif
implicit none
class(psb_s_csr_sparse_mat), intent(inout) :: a
@ -2860,12 +3175,6 @@ subroutine psb_s_cp_csr_from_coo(a,b,info)
character(len=20) :: name='s_cp_csr_from_coo'
logical :: use_openmp = .false.
!$ integer(psb_ipk_), allocatable :: sum(:)
!$ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j
!$ integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads
!$ use_openmp = .true.
info = psb_success_
debug_unit = psb_get_debug_unit()
debug_level = psb_get_debug_level()
@ -2907,96 +3216,37 @@ subroutine psb_s_cp_csr_from_coo(a,b,info)
endif
a%irp(:) = 0
!!$ if (use_openmp) then
!!$ !$ maxthreads = omp_get_max_threads()
!!$ !$ allocate(sum(maxthreads+1))
!!$ !$ sum(:) = 0
!!$ !$ sum(1) = 1
!!$
!!$ !$OMP PARALLEL default(none) &
!!$ !$OMP shared(nza,itemp,a,nthreads,sum,nr) &
!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
!!$
!!$ !$OMP DO schedule(STATIC) &
!!$ !$OMP private(k,i)
!!$ do k=1,nza
!!$ i = itemp(k)
!!$ a%irp(i) = a%irp(i) + 1
!!$ end do
!!$ !$OMP END DO
!!$
!!$ !$OMP SINGLE
!!$ !$ nthreads = omp_get_num_threads()
!!$ !$OMP END SINGLE
!!$
!!$ !$ ithread = omp_get_thread_num()
!!$
!!$ !$ work = nr/nthreads
!!$ !$ if (ithread < MOD(nr,nthreads)) then
!!$ !$ work = work + 1
!!$ !$ first_idx = ithread*work + 1
!!$ !$ else
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
!!$ !$ end if
!!$
!!$ !$ last_idx = first_idx + work - 1
!!$
!!$ !$ s = 0
!!$ !$ do i=first_idx,last_idx
!!$ !$ s = s + a%irp(i)
!!$ !$ end do
!!$ !$ if (work > 0) then
!!$ !$ sum(ithread+2) = s
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$OMP SINGLE
!!$ !$ do i=2,nthreads+1
!!$ !$ sum(i) = sum(i) + sum(i-1)
!!$ !$ end do
!!$ !$OMP END SINGLE
!!$
!!$ !$ if (work > 0) then
!!$ !$ saved_elem = a%irp(first_idx)
!!$ !$ end if
!!$ !$ if (ithread == 0) then
!!$ !$ a%irp(1) = 1
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$ if (work > 0) then
!!$ !$ old_val = a%irp(first_idx+1)
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!!$ !$ end if
!!$
!!$ !$ do i=first_idx+2,last_idx+1
!!$ !$ nxt_val = a%irp(i)
!!$ !$ a%irp(i) = a%irp(i-1) + old_val
!!$ !$ old_val = nxt_val
!!$ !$ end do
!!$
!!$ !$OMP END PARALLEL
!!$ else
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
!!$ end if
call a%set_host()
#if defined(OPENMP)
!$OMP PARALLEL default(shared) reduction(max:info)
!$OMP WORKSHARE
a%irp(:) = 0
!$OMP END WORKSHARE
!$OMP DO schedule(STATIC) &
!$OMP private(k,i)
do k=1,nza
i = itemp(k)
!$OMP ATOMIC UPDATE
a%irp(i) = a%irp(i) + 1
!$OMP END ATOMIC
end do
!$OMP END DO
call psi_exscan(nr+1,a%irp,info,shift=ione)
!$OMP END PARALLEL
#else
a%irp(:) = 0
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
call psi_exscan(nr+1,a%irp,info,shift=ione)
#endif
call a%set_host()
end subroutine psb_s_cp_csr_from_coo
@ -3096,6 +3346,9 @@ subroutine psb_s_mv_csr_from_coo(a,b,info)
use psb_error_mod
use psb_s_base_mat_mod
use psb_s_csr_mat_mod, psb_protect_name => psb_s_mv_csr_from_coo
#if defined(OPENMP)
use omp_lib
#endif
implicit none
class(psb_s_csr_sparse_mat), intent(inout) :: a
@ -3109,13 +3362,6 @@ subroutine psb_s_mv_csr_from_coo(a,b,info)
integer(psb_ipk_), Parameter :: maxtry=8
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='mv_from_coo'
logical :: use_openmp = .false.
! $ integer(psb_ipk_), allocatable :: sum(:)
! $ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s
! $ integer(psb_ipk_) :: nxt_val,old_val,saved_elem
! $ use_openmp = .true.
info = psb_success_
debug_unit = psb_get_debug_unit()
@ -3139,90 +3385,34 @@ subroutine psb_s_mv_csr_from_coo(a,b,info)
call psb_realloc(max(nr+1,nc+1),a%irp,info)
call b%free()
#if defined(OPENMP)
!$OMP PARALLEL default(shared) reduction(max:info)
!$OMP WORKSHARE
a%irp(:) = 0
!$OMP END WORKSHARE
!$OMP DO schedule(STATIC) &
!$OMP private(k,i)
do k=1,nza
i = itemp(k)
!$OMP ATOMIC UPDATE
a%irp(i) = a%irp(i) + 1
!$OMP END ATOMIC
end do
!$OMP END DO
call psi_exscan(nr+1,a%irp,info,shift=ione)
!$OMP END PARALLEL
#else
a%irp(:) = 0
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
call psi_exscan(nr+1,a%irp,info,shift=ione)
#endif
!!$ if (use_openmp) then
!!$ !$OMP PARALLEL default(none) &
!!$ !$OMP shared(sum,nthreads,nr,a,itemp,nza) &
!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
!!$
!!$ !$OMP DO schedule(STATIC) &
!!$ !$OMP private(k,i)
!!$ do k=1,nza
!!$ i = itemp(k)
!!$ a%irp(i) = a%irp(i) + 1
!!$ end do
!!$ !$OMP END DO
!!$
!!$ !$OMP SINGLE
!!$ !$ nthreads = omp_get_num_threads()
!!$ !$ allocate(sum(nthreads+1))
!!$ !$ sum(:) = 0
!!$ !$ sum(1) = 1
!!$ !$OMP END SINGLE
!!$
!!$ !$ ithread = omp_get_thread_num()
!!$
!!$ !$ work = nr/nthreads
!!$ !$ if (ithread < MOD(nr,nthreads)) then
!!$ !$ work = work + 1
!!$ !$ first_idx = ithread*work + 1
!!$ !$ else
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
!!$ !$ end if
!!$
!!$ !$ last_idx = first_idx + work - 1
!!$
!!$ !$ s = 0
!!$ !$ do i=first_idx,last_idx
!!$ !$ s = s + a%irp(i)
!!$ !$ end do
!!$ !$ if (work > 0) then
!!$ !$ sum(ithread+2) = s
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$OMP SINGLE
!!$ !$ do i=2,nthreads+1
!!$ !$ sum(i) = sum(i) + sum(i-1)
!!$ !$ end do
!!$ !$OMP END SINGLE
!!$
!!$ !$ if (work > 0) then
!!$ !$ saved_elem = a%irp(first_idx)
!!$ !$ end if
!!$ !$ if (ithread == 0) then
!!$ !$ a%irp(1) = 1
!!$ !$ end if
!!$
!!$ !$ if (work > 0) then
!!$ !$ old_val = a%irp(first_idx+1)
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!!$ !$ end if
!!$
!!$ !$ do i=first_idx+2,last_idx+1
!!$ !$ nxt_val = a%irp(i)
!!$ !$ a%irp(i) = a%irp(i-1) + old_val
!!$ !$ old_val = nxt_val
!!$ !$ end do
!!$
!!$ !$OMP END PARALLEL
!!$ else
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
!!$ end if
call a%set_host()
end subroutine psb_s_mv_csr_from_coo
@ -3300,9 +3490,28 @@ subroutine psb_s_cp_csr_to_fmt(a,b,info)
b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat
nr = a%get_nrows()
nz = a%get_nzeros()
if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info)
if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info)
if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info)
if (.false.) then
if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info)
if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info)
if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nr+1,b%irp,info)
call psb_realloc(nz,b%ja,info)
call psb_realloc(nz,b%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nr+1
b%irp(i)=a%irp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
b%ja(j) = a%ja(j)
b%val(j) = a%val(j)
end do
!$omp end parallel do
end if
call b%set_host()
class default
@ -3386,9 +3595,27 @@ subroutine psb_s_cp_csr_from_fmt(a,b,info)
a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat
nr = b%get_nrows()
nz = b%get_nzeros()
if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info)
if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info)
if (info == 0) call psb_safe_cpy( b%val(1:nz) , a%val , info)
if (.false.) then
if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info)
if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info)
if (info == 0) call psb_safe_cpy( b%val(1:nz) , a%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nr+1,a%irp,info)
call psb_realloc(nz,a%ja,info)
call psb_realloc(nz,a%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nr+1
a%irp(i)=b%irp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
a%ja(j)=b%ja(j)
a%val(j)=b%val(j)
end do
!$omp end parallel do
end if
call a%set_host()
class default

File diff suppressed because it is too large Load Diff

@ -2189,6 +2189,9 @@ subroutine psb_z_mv_csc_from_coo(a,b,info)
use psb_error_mod
use psb_z_base_mat_mod
use psb_z_csc_mat_mod, psb_protect_name => psb_z_mv_csc_from_coo
#if defined(OPENMP)
use omp_lib
#endif
implicit none
class(psb_z_csc_sparse_mat), intent(inout) :: a
@ -2223,18 +2226,33 @@ subroutine psb_z_mv_csc_from_coo(a,b,info)
call psb_realloc(nc+1,a%icp,info)
call b%free()
#if defined(OPENMP)
!$OMP PARALLEL default(shared)
!$OMP WORKSHARE
a%icp(:) = 0
!$OMP END WORKSHARE
!$OMP DO schedule(STATIC) &
!$OMP private(k,i)
do k=1,nza
i = itemp(k)
a%icp(i) = a%icp(i) + 1
!$OMP ATOMIC UPDATE
a%icp(i+1) = a%icp(i+1) + 1
!$OMP END ATOMIC
end do
ip = 1
do i=1,nc
nrl = a%icp(i)
a%icp(i) = ip
ip = ip + nrl
!$OMP END DO
call psi_exscan(nc+1,a%icp,info,shift=ione)
!$OMP END PARALLEL
#else
a%icp(:) = 0
do k=1,nza
i = itemp(k)
a%icp(i) = a%icp(i) + 1
end do
a%icp(nc+1) = ip
call psi_exscan(nc+1,a%icp,info,shift=ione)
#endif
call a%set_host()
@ -2311,9 +2329,28 @@ subroutine psb_z_cp_csc_to_fmt(a,b,info)
b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat
nc = a%get_ncols()
nz = a%get_nzeros()
if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info)
if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info)
if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info)
if (.false.) then
if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info)
if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info)
if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nc+1,b%icp,info)
call psb_realloc(nz,b%ia,info)
call psb_realloc(nz,b%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nc+1
b%icp(i)=a%icp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
b%ia(j) = a%ia(j)
b%val(j) = a%val(j)
end do
!$omp end parallel do
end if
call b%set_host()
class default
@ -2425,9 +2462,27 @@ subroutine psb_z_cp_csc_from_fmt(a,b,info)
a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat
nc = b%get_ncols()
nz = b%get_nzeros()
if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info)
if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info)
if (info == 0) call psb_safe_cpy( b%val(1:nz), a%val , info)
if (.false.) then
if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info)
if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info)
if (info == 0) call psb_safe_cpy( b%val(1:nz), a%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nc+1,a%icp,info)
call psb_realloc(nz,a%ia,info)
call psb_realloc(nz,a%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nc+1
a%icp(i)=b%icp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
a%ia(j)=b%ia(j)
a%val(j)=b%val(j)
end do
!$omp end parallel do
end if
call a%set_host()
class default

@ -152,6 +152,7 @@ contains
!$omp parallel do private(i,j, acc) schedule(static)
do i=1,m
acc = zzero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -163,6 +164,7 @@ contains
!$omp parallel do private(i,j, acc)
do i=1,m
acc = zzero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -174,6 +176,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = zzero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -189,6 +192,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = zzero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -200,6 +204,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = zzero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -211,6 +216,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = zzero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -225,6 +231,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = zzero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -236,6 +243,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = zzero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -247,6 +255,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = zzero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -261,6 +270,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = zzero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -272,6 +282,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = zzero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -283,6 +294,7 @@ contains
!$omp parallel do private(i,j,acc)
do i=1,m
acc = zzero
!$omp simd
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
@ -2277,7 +2289,155 @@ subroutine psb_z_csr_tril(a,l,info,&
nb = jmax_
endif
#if defined(OPENMP)
block
integer(psb_ipk_), allocatable :: lrws(:),urws(:)
integer(psb_ipk_) :: lpnt, upnt, lnz, unz
call psb_realloc(mb,lrws,info)
!$omp workshare
lrws(:) = 0
!$omp end workshare
nz = a%get_nzeros()
call l%allocate(mb,nb,nz)
!write(0,*) 'Invocation of COO%TRIL', present(u),nz
if (present(u)) then
nzlin = l%get_nzeros() ! At this point it should be 0
call u%allocate(mb,nb,nz)
nzuin = u%get_nzeros() ! At this point it should be 0
if (info == 0) call psb_realloc(mb,urws,info)
!$omp workshare
urws(:) = 0
!$omp end workshare
!write(0,*) 'omp version of COO%TRIL/TRIU'
lnz = 0
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws,urws) reduction(+: lnz,unz)
loop1: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
else
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do
end do loop1
!$omp end parallel do
call psi_exscan(mb,lrws,info)
call psi_exscan(mb,urws,info)
!write(0,*) lrws(:), urws(:)
!$omp parallel do private(i,j,k,lpnt,upnt) shared(imin_,imax_,a)
loop2: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = i
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
else
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = i
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do
end do loop2
!$omp end parallel do
!write(0,*) 'End of copyout',lnz,unz
call l%set_nzeros(lnz)
call l%fix(info)
call u%set_nzeros(unz)
call u%fix(info)
nzout = u%get_nzeros()
if (rscale_) then
!$omp workshare
u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ >=-1).and.(imin_ == jmin_)) then
call u%set_triangle(.true.)
call u%set_lower(.false.)
end if
else
lnz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws) reduction(+: lnz)
loop3: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
end if
end if
end do
end do loop3
!$omp end parallel do
call psi_exscan(mb,lrws,info)
!$omp parallel do private(i,j,k,lpnt) shared(imin_,imax_,a)
loop4: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = i
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
end if
end if
end do
end do loop4
!$omp end parallel do
call l%set_nzeros(lnz)
call l%fix(info)
end if
nzout = l%get_nzeros()
if (rscale_) then
!$omp workshare
l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ <= 0).and.(imin_ == jmin_)) then
call l%set_triangle(.true.)
call l%set_lower(.true.)
end if
end block
#else
nz = a%get_nzeros()
call l%allocate(mb,nb,nz)
@ -2347,7 +2507,7 @@ subroutine psb_z_csr_tril(a,l,info,&
call l%set_triangle(.true.)
call l%set_lower(.true.)
end if
#endif
if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act)
@ -2431,6 +2591,158 @@ subroutine psb_z_csr_triu(a,u,info,&
endif
#if defined(OPENMP)
block
integer(psb_ipk_), allocatable :: lrws(:),urws(:)
integer(psb_ipk_) :: lpnt, upnt, lnz, unz
call psb_realloc(mb,urws,info)
!$omp workshare
urws(:) = 0
!$omp end workshare
nz = a%get_nzeros()
call u%allocate(mb,nb,nz)
!write(0,*) 'Invocation of COO%TRIL', present(u),nz
if (present(l)) then
nzuin = u%get_nzeros() ! At this point it should be 0
call l%allocate(mb,nb,nz)
nzlin = l%get_nzeros() ! At this point it should be 0
if (info == 0) call psb_realloc(mb,urws,info)
!$omp workshare
lrws(:) = 0
!$omp end workshare
!write(0,*) 'omp version of COO%TRIL/TRIU'
lnz = 0
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws,urws) reduction(+: lnz,unz)
loop1: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
else
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do
end do loop1
!$omp end parallel do
call psi_exscan(mb,lrws,info)
call psi_exscan(mb,urws,info)
!write(0,*) lrws(:), urws(:)
!$omp parallel do private(i,j,k,lpnt,upnt) shared(imin_,imax_,a)
loop2: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = i
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
else
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = i
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do
end do loop2
!$omp end parallel do
!write(0,*) 'End of copyout',lnz,unz
call l%set_nzeros(lnz)
call l%fix(info)
call u%set_nzeros(unz)
call u%fix(info)
nzout = l%get_nzeros()
if (rscale_) then
!$omp workshare
l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ <=-1).and.(imin_ == jmin_)) then
call l%set_triangle(.true.)
call l%set_lower(.false.)
end if
else
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,urws) reduction(+: unz)
loop3: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)>=diag_) then
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do
end do loop3
!$omp end parallel do
call psi_exscan(mb,urws,info)
!$omp parallel do private(i,j,k,upnt) shared(imin_,imax_,a)
loop4: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)>=diag_) then
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = i
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do
end do loop4
!$omp end parallel do
call u%set_nzeros(unz)
call u%fix(info)
end if
nzout = u%get_nzeros()
if (rscale_) then
!$omp workshare
u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ >= 0).and.(imin_ == jmin_)) then
call u%set_triangle(.true.)
call u%set_upper(.true.)
end if
end block
#else
nz = a%get_nzeros()
call u%allocate(mb,nb,nz)
@ -2499,7 +2811,7 @@ subroutine psb_z_csr_triu(a,u,info,&
call u%set_triangle(.true.)
call u%set_upper(.true.)
end if
#endif
if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act)
@ -2844,6 +3156,9 @@ subroutine psb_z_cp_csr_from_coo(a,b,info)
use psb_realloc_mod
use psb_z_base_mat_mod
use psb_z_csr_mat_mod, psb_protect_name => psb_z_cp_csr_from_coo
#if defined(OPENMP)
use omp_lib
#endif
implicit none
class(psb_z_csr_sparse_mat), intent(inout) :: a
@ -2860,12 +3175,6 @@ subroutine psb_z_cp_csr_from_coo(a,b,info)
character(len=20) :: name='z_cp_csr_from_coo'
logical :: use_openmp = .false.
!$ integer(psb_ipk_), allocatable :: sum(:)
!$ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j
!$ integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads
!$ use_openmp = .true.
info = psb_success_
debug_unit = psb_get_debug_unit()
debug_level = psb_get_debug_level()
@ -2907,96 +3216,37 @@ subroutine psb_z_cp_csr_from_coo(a,b,info)
endif
a%irp(:) = 0
!!$ if (use_openmp) then
!!$ !$ maxthreads = omp_get_max_threads()
!!$ !$ allocate(sum(maxthreads+1))
!!$ !$ sum(:) = 0
!!$ !$ sum(1) = 1
!!$
!!$ !$OMP PARALLEL default(none) &
!!$ !$OMP shared(nza,itemp,a,nthreads,sum,nr) &
!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
!!$
!!$ !$OMP DO schedule(STATIC) &
!!$ !$OMP private(k,i)
!!$ do k=1,nza
!!$ i = itemp(k)
!!$ a%irp(i) = a%irp(i) + 1
!!$ end do
!!$ !$OMP END DO
!!$
!!$ !$OMP SINGLE
!!$ !$ nthreads = omp_get_num_threads()
!!$ !$OMP END SINGLE
!!$
!!$ !$ ithread = omp_get_thread_num()
!!$
!!$ !$ work = nr/nthreads
!!$ !$ if (ithread < MOD(nr,nthreads)) then
!!$ !$ work = work + 1
!!$ !$ first_idx = ithread*work + 1
!!$ !$ else
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
!!$ !$ end if
!!$
!!$ !$ last_idx = first_idx + work - 1
!!$
!!$ !$ s = 0
!!$ !$ do i=first_idx,last_idx
!!$ !$ s = s + a%irp(i)
!!$ !$ end do
!!$ !$ if (work > 0) then
!!$ !$ sum(ithread+2) = s
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$OMP SINGLE
!!$ !$ do i=2,nthreads+1
!!$ !$ sum(i) = sum(i) + sum(i-1)
!!$ !$ end do
!!$ !$OMP END SINGLE
!!$
!!$ !$ if (work > 0) then
!!$ !$ saved_elem = a%irp(first_idx)
!!$ !$ end if
!!$ !$ if (ithread == 0) then
!!$ !$ a%irp(1) = 1
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$ if (work > 0) then
!!$ !$ old_val = a%irp(first_idx+1)
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!!$ !$ end if
!!$
!!$ !$ do i=first_idx+2,last_idx+1
!!$ !$ nxt_val = a%irp(i)
!!$ !$ a%irp(i) = a%irp(i-1) + old_val
!!$ !$ old_val = nxt_val
!!$ !$ end do
!!$
!!$ !$OMP END PARALLEL
!!$ else
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
!!$ end if
call a%set_host()
#if defined(OPENMP)
!$OMP PARALLEL default(shared) reduction(max:info)
!$OMP WORKSHARE
a%irp(:) = 0
!$OMP END WORKSHARE
!$OMP DO schedule(STATIC) &
!$OMP private(k,i)
do k=1,nza
i = itemp(k)
!$OMP ATOMIC UPDATE
a%irp(i) = a%irp(i) + 1
!$OMP END ATOMIC
end do
!$OMP END DO
call psi_exscan(nr+1,a%irp,info,shift=ione)
!$OMP END PARALLEL
#else
a%irp(:) = 0
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
call psi_exscan(nr+1,a%irp,info,shift=ione)
#endif
call a%set_host()
end subroutine psb_z_cp_csr_from_coo
@ -3096,6 +3346,9 @@ subroutine psb_z_mv_csr_from_coo(a,b,info)
use psb_error_mod
use psb_z_base_mat_mod
use psb_z_csr_mat_mod, psb_protect_name => psb_z_mv_csr_from_coo
#if defined(OPENMP)
use omp_lib
#endif
implicit none
class(psb_z_csr_sparse_mat), intent(inout) :: a
@ -3109,13 +3362,6 @@ subroutine psb_z_mv_csr_from_coo(a,b,info)
integer(psb_ipk_), Parameter :: maxtry=8
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='mv_from_coo'
logical :: use_openmp = .false.
! $ integer(psb_ipk_), allocatable :: sum(:)
! $ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s
! $ integer(psb_ipk_) :: nxt_val,old_val,saved_elem
! $ use_openmp = .true.
info = psb_success_
debug_unit = psb_get_debug_unit()
@ -3139,90 +3385,34 @@ subroutine psb_z_mv_csr_from_coo(a,b,info)
call psb_realloc(max(nr+1,nc+1),a%irp,info)
call b%free()
#if defined(OPENMP)
!$OMP PARALLEL default(shared) reduction(max:info)
!$OMP WORKSHARE
a%irp(:) = 0
!$OMP END WORKSHARE
!$OMP DO schedule(STATIC) &
!$OMP private(k,i)
do k=1,nza
i = itemp(k)
!$OMP ATOMIC UPDATE
a%irp(i) = a%irp(i) + 1
!$OMP END ATOMIC
end do
!$OMP END DO
call psi_exscan(nr+1,a%irp,info,shift=ione)
!$OMP END PARALLEL
#else
a%irp(:) = 0
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
call psi_exscan(nr+1,a%irp,info,shift=ione)
#endif
!!$ if (use_openmp) then
!!$ !$OMP PARALLEL default(none) &
!!$ !$OMP shared(sum,nthreads,nr,a,itemp,nza) &
!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
!!$
!!$ !$OMP DO schedule(STATIC) &
!!$ !$OMP private(k,i)
!!$ do k=1,nza
!!$ i = itemp(k)
!!$ a%irp(i) = a%irp(i) + 1
!!$ end do
!!$ !$OMP END DO
!!$
!!$ !$OMP SINGLE
!!$ !$ nthreads = omp_get_num_threads()
!!$ !$ allocate(sum(nthreads+1))
!!$ !$ sum(:) = 0
!!$ !$ sum(1) = 1
!!$ !$OMP END SINGLE
!!$
!!$ !$ ithread = omp_get_thread_num()
!!$
!!$ !$ work = nr/nthreads
!!$ !$ if (ithread < MOD(nr,nthreads)) then
!!$ !$ work = work + 1
!!$ !$ first_idx = ithread*work + 1
!!$ !$ else
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
!!$ !$ end if
!!$
!!$ !$ last_idx = first_idx + work - 1
!!$
!!$ !$ s = 0
!!$ !$ do i=first_idx,last_idx
!!$ !$ s = s + a%irp(i)
!!$ !$ end do
!!$ !$ if (work > 0) then
!!$ !$ sum(ithread+2) = s
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$OMP SINGLE
!!$ !$ do i=2,nthreads+1
!!$ !$ sum(i) = sum(i) + sum(i-1)
!!$ !$ end do
!!$ !$OMP END SINGLE
!!$
!!$ !$ if (work > 0) then
!!$ !$ saved_elem = a%irp(first_idx)
!!$ !$ end if
!!$ !$ if (ithread == 0) then
!!$ !$ a%irp(1) = 1
!!$ !$ end if
!!$
!!$ !$ if (work > 0) then
!!$ !$ old_val = a%irp(first_idx+1)
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!!$ !$ end if
!!$
!!$ !$ do i=first_idx+2,last_idx+1
!!$ !$ nxt_val = a%irp(i)
!!$ !$ a%irp(i) = a%irp(i-1) + old_val
!!$ !$ old_val = nxt_val
!!$ !$ end do
!!$
!!$ !$OMP END PARALLEL
!!$ else
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
!!$ end if
call a%set_host()
end subroutine psb_z_mv_csr_from_coo
@ -3300,9 +3490,28 @@ subroutine psb_z_cp_csr_to_fmt(a,b,info)
b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat
nr = a%get_nrows()
nz = a%get_nzeros()
if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info)
if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info)
if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info)
if (.false.) then
if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info)
if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info)
if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nr+1,b%irp,info)
call psb_realloc(nz,b%ja,info)
call psb_realloc(nz,b%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nr+1
b%irp(i)=a%irp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
b%ja(j) = a%ja(j)
b%val(j) = a%val(j)
end do
!$omp end parallel do
end if
call b%set_host()
class default
@ -3386,9 +3595,27 @@ subroutine psb_z_cp_csr_from_fmt(a,b,info)
a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat
nr = b%get_nrows()
nz = b%get_nzeros()
if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info)
if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info)
if (info == 0) call psb_safe_cpy( b%val(1:nz) , a%val , info)
if (.false.) then
if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info)
if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info)
if (info == 0) call psb_safe_cpy( b%val(1:nz) , a%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nr+1,a%irp,info)
call psb_realloc(nz,a%ja,info)
call psb_realloc(nz,a%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nr+1
a%irp(i)=b%irp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
a%ja(j)=b%ja(j)
a%val(j)=b%val(j)
end do
!$omp end parallel do
end if
call a%set_host()
class default

@ -29,6 +29,107 @@
! POSSIBILITY OF SUCH DAMAGE.
!
!
subroutine psi_c_exscanv(n,x,info,shift)
use psi_c_serial_mod, psb_protect_name => psi_c_exscanv
use psb_const_mod
use psb_error_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none
integer(psb_ipk_), intent(in) :: n
complex(psb_spk_), intent (inout) :: x(:)
integer(psb_ipk_), intent(out) :: info
complex(psb_spk_), intent(in), optional :: shift
complex(psb_spk_) :: shift_, tp, ts
integer(psb_ipk_) :: i
logical is_nested, is_parallel
if (present(shift)) then
shift_ = shift
else
shift_ = czero
end if
#if defined(OPENMP)
is_parallel = omp_in_parallel()
if (is_parallel) then
call inner_c_exscan()
else
!$OMP PARALLEL default(shared)
call inner_c_exscan()
!$OMP END PARALLEL
end if
#else
tp = shift_
do i=1,n
ts = x(i)
x(i) = tp
tp = tp + ts
end do
#endif
#if defined(OPENMP)
contains
subroutine inner_c_exscan()
! Note: all these variables are private, but SUMB should *really* be
! a pointer. The semantics of COPYPRIVATE is that the POINTER is copied
! so effectively we are recovering a SHARED SUMB which is what
! we need in this case. If it was an ALLOCATABLE, then it would be the contents
! that would get copied, and the SHARED effect would no longer be there.
! Simple parallel version of EXSCAN
integer(psb_ipk_) :: i,ithread,nthreads,idxstart,idxend,wrk
complex(psb_spk_), pointer :: sumb(:)
complex(psb_spk_) :: tp, ts
nthreads = omp_get_num_threads()
ithread = omp_get_thread_num()
!$OMP SINGLE
allocate(sumb(nthreads+1))
sumb(:) = 0
!$OMP END SINGLE COPYPRIVATE(sumb)
wrk = (n)/nthreads
if (ithread < MOD((n),nthreads)) then
wrk = wrk + 1
idxstart = ithread*wrk + 1
else
idxstart = ithread*wrk + MOD((n),nthreads) + 1
end if
idxend = min(idxstart + wrk - 1,n )
tp = czero
if (idxstart<=idxend) then
do i=idxstart,idxend
ts = x(i)
x(i) = tp
tp = tp + ts
end do
end if
sumb(ithread+2) = tp
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
sumb(i) = sumb(i) + sumb(i-1)
end do
!$OMP END SINGLE
!$OMP BARRIER
!$OMP DO SCHEDULE(STATIC)
do i=1,n
x(i) = x(i) + sumb(ithread+1) + shift_
end do
!$OMP END DO
!$OMP SINGLE
deallocate(sumb)
!$OMP END SINGLE
end subroutine inner_c_exscan
#endif
end subroutine psi_c_exscanv
subroutine psb_m_cgelp(trans,iperm,x,info)
use psb_serial_mod, psb_protect_name => psb_m_cgelp
use psb_const_mod

@ -29,6 +29,107 @@
! POSSIBILITY OF SUCH DAMAGE.
!
!
subroutine psi_d_exscanv(n,x,info,shift)
use psi_d_serial_mod, psb_protect_name => psi_d_exscanv
use psb_const_mod
use psb_error_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none
integer(psb_ipk_), intent(in) :: n
real(psb_dpk_), intent (inout) :: x(:)
integer(psb_ipk_), intent(out) :: info
real(psb_dpk_), intent(in), optional :: shift
real(psb_dpk_) :: shift_, tp, ts
integer(psb_ipk_) :: i
logical is_nested, is_parallel
if (present(shift)) then
shift_ = shift
else
shift_ = dzero
end if
#if defined(OPENMP)
is_parallel = omp_in_parallel()
if (is_parallel) then
call inner_d_exscan()
else
!$OMP PARALLEL default(shared)
call inner_d_exscan()
!$OMP END PARALLEL
end if
#else
tp = shift_
do i=1,n
ts = x(i)
x(i) = tp
tp = tp + ts
end do
#endif
#if defined(OPENMP)
contains
subroutine inner_d_exscan()
! Note: all these variables are private, but SUMB should *really* be
! a pointer. The semantics of COPYPRIVATE is that the POINTER is copied
! so effectively we are recovering a SHARED SUMB which is what
! we need in this case. If it was an ALLOCATABLE, then it would be the contents
! that would get copied, and the SHARED effect would no longer be there.
! Simple parallel version of EXSCAN
integer(psb_ipk_) :: i,ithread,nthreads,idxstart,idxend,wrk
real(psb_dpk_), pointer :: sumb(:)
real(psb_dpk_) :: tp, ts
nthreads = omp_get_num_threads()
ithread = omp_get_thread_num()
!$OMP SINGLE
allocate(sumb(nthreads+1))
sumb(:) = 0
!$OMP END SINGLE COPYPRIVATE(sumb)
wrk = (n)/nthreads
if (ithread < MOD((n),nthreads)) then
wrk = wrk + 1
idxstart = ithread*wrk + 1
else
idxstart = ithread*wrk + MOD((n),nthreads) + 1
end if
idxend = min(idxstart + wrk - 1,n )
tp = dzero
if (idxstart<=idxend) then
do i=idxstart,idxend
ts = x(i)
x(i) = tp
tp = tp + ts
end do
end if
sumb(ithread+2) = tp
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
sumb(i) = sumb(i) + sumb(i-1)
end do
!$OMP END SINGLE
!$OMP BARRIER
!$OMP DO SCHEDULE(STATIC)
do i=1,n
x(i) = x(i) + sumb(ithread+1) + shift_
end do
!$OMP END DO
!$OMP SINGLE
deallocate(sumb)
!$OMP END SINGLE
end subroutine inner_d_exscan
#endif
end subroutine psi_d_exscanv
subroutine psb_m_dgelp(trans,iperm,x,info)
use psb_serial_mod, psb_protect_name => psb_m_dgelp
use psb_const_mod

@ -29,6 +29,107 @@
! POSSIBILITY OF SUCH DAMAGE.
!
!
subroutine psi_e_exscanv(n,x,info,shift)
use psi_e_serial_mod, psb_protect_name => psi_e_exscanv
use psb_const_mod
use psb_error_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none
integer(psb_ipk_), intent(in) :: n
integer(psb_epk_), intent (inout) :: x(:)
integer(psb_ipk_), intent(out) :: info
integer(psb_epk_), intent(in), optional :: shift
integer(psb_epk_) :: shift_, tp, ts
integer(psb_ipk_) :: i
logical is_nested, is_parallel
if (present(shift)) then
shift_ = shift
else
shift_ = ezero
end if
#if defined(OPENMP)
is_parallel = omp_in_parallel()
if (is_parallel) then
call inner_e_exscan()
else
!$OMP PARALLEL default(shared)
call inner_e_exscan()
!$OMP END PARALLEL
end if
#else
tp = shift_
do i=1,n
ts = x(i)
x(i) = tp
tp = tp + ts
end do
#endif
#if defined(OPENMP)
contains
subroutine inner_e_exscan()
! Note: all these variables are private, but SUMB should *really* be
! a pointer. The semantics of COPYPRIVATE is that the POINTER is copied
! so effectively we are recovering a SHARED SUMB which is what
! we need in this case. If it was an ALLOCATABLE, then it would be the contents
! that would get copied, and the SHARED effect would no longer be there.
! Simple parallel version of EXSCAN
integer(psb_ipk_) :: i,ithread,nthreads,idxstart,idxend,wrk
integer(psb_epk_), pointer :: sumb(:)
integer(psb_epk_) :: tp, ts
nthreads = omp_get_num_threads()
ithread = omp_get_thread_num()
!$OMP SINGLE
allocate(sumb(nthreads+1))
sumb(:) = 0
!$OMP END SINGLE COPYPRIVATE(sumb)
wrk = (n)/nthreads
if (ithread < MOD((n),nthreads)) then
wrk = wrk + 1
idxstart = ithread*wrk + 1
else
idxstart = ithread*wrk + MOD((n),nthreads) + 1
end if
idxend = min(idxstart + wrk - 1,n )
tp = ezero
if (idxstart<=idxend) then
do i=idxstart,idxend
ts = x(i)
x(i) = tp
tp = tp + ts
end do
end if
sumb(ithread+2) = tp
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
sumb(i) = sumb(i) + sumb(i-1)
end do
!$OMP END SINGLE
!$OMP BARRIER
!$OMP DO SCHEDULE(STATIC)
do i=1,n
x(i) = x(i) + sumb(ithread+1) + shift_
end do
!$OMP END DO
!$OMP SINGLE
deallocate(sumb)
!$OMP END SINGLE
end subroutine inner_e_exscan
#endif
end subroutine psi_e_exscanv
subroutine psb_m_egelp(trans,iperm,x,info)
use psb_serial_mod, psb_protect_name => psb_m_egelp
use psb_const_mod
@ -441,9 +542,9 @@ subroutine psi_eaxpby(m,n,alpha, x, beta, y, info)
integer(psb_epk_), intent (in) :: alpha, beta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
integer(psb_ipk_) :: lx, ly
integer(psb_ipk_) :: lx, ly, i
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name, ch_err
character(len=20) :: name, ch_err
name='psb_geaxpby'
info=psb_success_
@ -502,7 +603,8 @@ subroutine psi_eaxpbyv(m,alpha, x, beta, y, info)
integer(psb_ipk_) :: err_act
integer(psb_ipk_) :: lx, ly
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name, ch_err
integer(psb_ipk_) :: i
character(len=20) :: name, ch_err
name='psb_geaxpby'
info=psb_success_
@ -532,7 +634,106 @@ subroutine psi_eaxpbyv(m,alpha, x, beta, y, info)
goto 9999
end if
if (m>0) call eaxpby(m,ione,alpha,x,lx,beta,y,ly,info)
! if (m>0) call eaxpby(m,ione,alpha,x,lx,beta,y,ly,info)
if (alpha.eq.ezero) then
if (beta.eq.ezero) then
!$omp parallel do private(i)
do i=1,m
y(i) = ezero
enddo
else if (beta.eq.eone) then
!
! Do nothing!
!
else if (beta.eq.-eone) then
!$omp parallel do private(i)
do i=1,m
y(i) = - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
enddo
endif
else if (alpha.eq.eone) then
if (beta.eq.ezero) then
!$omp parallel do private(i)
do i=1,m
y(i) = x(i)
enddo
else if (beta.eq.eone) then
!$omp parallel do private(i)
do i=1,m
y(i) = x(i) + y(i)
enddo
else if (beta.eq.-eone) then
!$omp parallel do private(i)
do i=1,m
y(i) = x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
y(i) = x(i) + beta*y(i)
enddo
endif
else if (alpha.eq.-eone) then
if (beta.eq.ezero) then
!$omp parallel do private(i)
do i=1,m
y(i) = -x(i)
enddo
else if (beta.eq.eone) then
!$omp parallel do private(i)
do i=1,m
y(i) = -x(i) + y(i)
enddo
else if (beta.eq.-eone) then
!$omp parallel do private(i)
do i=1,m
y(i) = -x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
y(i) = -x(i) + beta*y(i)
enddo
endif
else
if (beta.eq.ezero) then
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
enddo
else if (beta.eq.eone) then
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i) + y(i)
enddo
else if (beta.eq.-eone) then
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i) + beta*y(i)
enddo
endif
endif
call psb_erractionrestore(err_act)
return
@ -555,7 +756,7 @@ subroutine psi_eaxpbyv2(m,alpha, x, beta, y, z, info)
integer(psb_epk_), intent (in) :: alpha, beta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
integer(psb_ipk_) :: lx, ly, lz
integer(psb_ipk_) :: lx, ly, lz, i
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name, ch_err
@ -594,7 +795,105 @@ subroutine psi_eaxpbyv2(m,alpha, x, beta, y, z, info)
goto 9999
end if
if (m>0) call eaxpbyv2(m,ione,alpha,x,lx,beta,y,ly,z,lz,info)
if (alpha.eq.ezero) then
if (beta.eq.ezero) then
!$omp parallel do private(i)
do i=1,m
Z(i) = ezero
enddo
else if (beta.eq.eone) then
!
! Do nothing!
!
else if (beta.eq.-eone) then
!$omp parallel do private(i)
do i=1,m
Z(i) = - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
Z(i) = beta*y(i)
enddo
endif
else if (alpha.eq.eone) then
if (beta.eq.ezero) then
!$omp parallel do private(i)
do i=1,m
Z(i) = x(i)
enddo
else if (beta.eq.eone) then
!$omp parallel do private(i)
do i=1,m
Z(i) = x(i) + y(i)
enddo
else if (beta.eq.-eone) then
!$omp parallel do private(i)
do i=1,m
Z(i) = x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
Z(i) = x(i) + beta*y(i)
enddo
endif
else if (alpha.eq.-eone) then
if (beta.eq.ezero) then
!$omp parallel do private(i)
do i=1,m
Z(i) = -x(i)
enddo
else if (beta.eq.eone) then
!$omp parallel do private(i)
do i=1,m
Z(i) = -x(i) + y(i)
enddo
else if (beta.eq.-eone) then
!$omp parallel do private(i)
do i=1,m
Z(i) = -x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
Z(i) = -x(i) + beta*y(i)
enddo
endif
else
if (beta.eq.ezero) then
!$omp parallel do private(i)
do i=1,m
Z(i) = alpha*x(i)
enddo
else if (beta.eq.eone) then
!$omp parallel do private(i)
do i=1,m
Z(i) = alpha*x(i) + y(i)
enddo
else if (beta.eq.-eone) then
!$omp parallel do private(i)
do i=1,m
Z(i) = alpha*x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
Z(i) = alpha*x(i) + beta*y(i)
enddo
endif
endif
call psb_erractionrestore(err_act)
return
@ -942,6 +1241,7 @@ subroutine eaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
if (alpha.eq.ezero) then
if (beta.eq.ezero) then
do j=1, n
!$omp parallel do private(i)
do i=1,m
y(i,j) = ezero
enddo
@ -953,12 +1253,14 @@ subroutine eaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
else if (beta.eq.-eone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = beta*y(i,j)
enddo
@ -969,12 +1271,14 @@ subroutine eaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
if (beta.eq.ezero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = x(i,j)
enddo
enddo
else if (beta.eq.eone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = x(i,j) + y(i,j)
enddo
@ -982,12 +1286,14 @@ subroutine eaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
else if (beta.eq.-eone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = x(i,j) + beta*y(i,j)
enddo
@ -998,12 +1304,14 @@ subroutine eaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
if (beta.eq.ezero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = -x(i,j)
enddo
enddo
else if (beta.eq.eone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = -x(i,j) + y(i,j)
enddo
@ -1011,12 +1319,14 @@ subroutine eaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
else if (beta.eq.-eone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = -x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = -x(i,j) + beta*y(i,j)
enddo
@ -1027,12 +1337,14 @@ subroutine eaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
if (beta.eq.ezero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = alpha*x(i,j)
enddo
enddo
else if (beta.eq.eone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = alpha*x(i,j) + y(i,j)
enddo
@ -1040,12 +1352,14 @@ subroutine eaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
else if (beta.eq.-eone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = alpha*x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = alpha*x(i,j) + beta*y(i,j)
enddo
@ -1131,12 +1445,14 @@ subroutine eaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
else if (beta.eq.-eone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = beta*y(i,j)
enddo
@ -1147,12 +1463,14 @@ subroutine eaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
if (beta.eq.ezero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = x(i,j)
enddo
enddo
else if (beta.eq.eone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = x(i,j) + y(i,j)
enddo
@ -1160,12 +1478,14 @@ subroutine eaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
else if (beta.eq.-eone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = x(i,j) + beta*y(i,j)
enddo
@ -1176,12 +1496,14 @@ subroutine eaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
if (beta.eq.ezero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = -x(i,j)
enddo
enddo
else if (beta.eq.eone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = -x(i,j) + y(i,j)
enddo
@ -1189,12 +1511,14 @@ subroutine eaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
else if (beta.eq.-eone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = -x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = -x(i,j) + beta*y(i,j)
enddo
@ -1205,12 +1529,14 @@ subroutine eaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
if (beta.eq.ezero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = alpha*x(i,j)
enddo
enddo
else if (beta.eq.eone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = alpha*x(i,j) + y(i,j)
enddo
@ -1218,12 +1544,14 @@ subroutine eaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
else if (beta.eq.-eone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = alpha*x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = alpha*x(i,j) + beta*y(i,j)
enddo

@ -29,6 +29,107 @@
! POSSIBILITY OF SUCH DAMAGE.
!
!
subroutine psi_i2_exscanv(n,x,info,shift)
use psi_i2_serial_mod, psb_protect_name => psi_i2_exscanv
use psb_const_mod
use psb_error_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none
integer(psb_ipk_), intent(in) :: n
integer(psb_i2pk_), intent (inout) :: x(:)
integer(psb_ipk_), intent(out) :: info
integer(psb_i2pk_), intent(in), optional :: shift
integer(psb_i2pk_) :: shift_, tp, ts
integer(psb_ipk_) :: i
logical is_nested, is_parallel
if (present(shift)) then
shift_ = shift
else
shift_ = i2zero
end if
#if defined(OPENMP)
is_parallel = omp_in_parallel()
if (is_parallel) then
call inner_i2_exscan()
else
!$OMP PARALLEL default(shared)
call inner_i2_exscan()
!$OMP END PARALLEL
end if
#else
tp = shift_
do i=1,n
ts = x(i)
x(i) = tp
tp = tp + ts
end do
#endif
#if defined(OPENMP)
contains
subroutine inner_i2_exscan()
! Note: all these variables are private, but SUMB should *really* be
! a pointer. The semantics of COPYPRIVATE is that the POINTER is copied
! so effectively we are recovering a SHARED SUMB which is what
! we need in this case. If it was an ALLOCATABLE, then it would be the contents
! that would get copied, and the SHARED effect would no longer be there.
! Simple parallel version of EXSCAN
integer(psb_ipk_) :: i,ithread,nthreads,idxstart,idxend,wrk
integer(psb_i2pk_), pointer :: sumb(:)
integer(psb_i2pk_) :: tp, ts
nthreads = omp_get_num_threads()
ithread = omp_get_thread_num()
!$OMP SINGLE
allocate(sumb(nthreads+1))
sumb(:) = 0
!$OMP END SINGLE COPYPRIVATE(sumb)
wrk = (n)/nthreads
if (ithread < MOD((n),nthreads)) then
wrk = wrk + 1
idxstart = ithread*wrk + 1
else
idxstart = ithread*wrk + MOD((n),nthreads) + 1
end if
idxend = min(idxstart + wrk - 1,n )
tp = i2zero
if (idxstart<=idxend) then
do i=idxstart,idxend
ts = x(i)
x(i) = tp
tp = tp + ts
end do
end if
sumb(ithread+2) = tp
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
sumb(i) = sumb(i) + sumb(i-1)
end do
!$OMP END SINGLE
!$OMP BARRIER
!$OMP DO SCHEDULE(STATIC)
do i=1,n
x(i) = x(i) + sumb(ithread+1) + shift_
end do
!$OMP END DO
!$OMP SINGLE
deallocate(sumb)
!$OMP END SINGLE
end subroutine inner_i2_exscan
#endif
end subroutine psi_i2_exscanv
subroutine psb_m_i2gelp(trans,iperm,x,info)
use psb_serial_mod, psb_protect_name => psb_m_i2gelp
use psb_const_mod
@ -441,9 +542,9 @@ subroutine psi_i2axpby(m,n,alpha, x, beta, y, info)
integer(psb_i2pk_), intent (in) :: alpha, beta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
integer(psb_ipk_) :: lx, ly
integer(psb_ipk_) :: lx, ly, i
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name, ch_err
character(len=20) :: name, ch_err
name='psb_geaxpby'
info=psb_success_
@ -502,7 +603,8 @@ subroutine psi_i2axpbyv(m,alpha, x, beta, y, info)
integer(psb_ipk_) :: err_act
integer(psb_ipk_) :: lx, ly
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name, ch_err
integer(psb_ipk_) :: i
character(len=20) :: name, ch_err
name='psb_geaxpby'
info=psb_success_
@ -532,7 +634,106 @@ subroutine psi_i2axpbyv(m,alpha, x, beta, y, info)
goto 9999
end if
if (m>0) call i2axpby(m,ione,alpha,x,lx,beta,y,ly,info)
! if (m>0) call i2axpby(m,ione,alpha,x,lx,beta,y,ly,info)
if (alpha.eq.i2zero) then
if (beta.eq.i2zero) then
!$omp parallel do private(i)
do i=1,m
y(i) = i2zero
enddo
else if (beta.eq.i2one) then
!
! Do nothing!
!
else if (beta.eq.-i2one) then
!$omp parallel do private(i)
do i=1,m
y(i) = - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
enddo
endif
else if (alpha.eq.i2one) then
if (beta.eq.i2zero) then
!$omp parallel do private(i)
do i=1,m
y(i) = x(i)
enddo
else if (beta.eq.i2one) then
!$omp parallel do private(i)
do i=1,m
y(i) = x(i) + y(i)
enddo
else if (beta.eq.-i2one) then
!$omp parallel do private(i)
do i=1,m
y(i) = x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
y(i) = x(i) + beta*y(i)
enddo
endif
else if (alpha.eq.-i2one) then
if (beta.eq.i2zero) then
!$omp parallel do private(i)
do i=1,m
y(i) = -x(i)
enddo
else if (beta.eq.i2one) then
!$omp parallel do private(i)
do i=1,m
y(i) = -x(i) + y(i)
enddo
else if (beta.eq.-i2one) then
!$omp parallel do private(i)
do i=1,m
y(i) = -x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
y(i) = -x(i) + beta*y(i)
enddo
endif
else
if (beta.eq.i2zero) then
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
enddo
else if (beta.eq.i2one) then
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i) + y(i)
enddo
else if (beta.eq.-i2one) then
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i) + beta*y(i)
enddo
endif
endif
call psb_erractionrestore(err_act)
return
@ -555,7 +756,7 @@ subroutine psi_i2axpbyv2(m,alpha, x, beta, y, z, info)
integer(psb_i2pk_), intent (in) :: alpha, beta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
integer(psb_ipk_) :: lx, ly, lz
integer(psb_ipk_) :: lx, ly, lz, i
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name, ch_err
@ -594,7 +795,105 @@ subroutine psi_i2axpbyv2(m,alpha, x, beta, y, z, info)
goto 9999
end if
if (m>0) call i2axpbyv2(m,ione,alpha,x,lx,beta,y,ly,z,lz,info)
if (alpha.eq.i2zero) then
if (beta.eq.i2zero) then
!$omp parallel do private(i)
do i=1,m
Z(i) = i2zero
enddo
else if (beta.eq.i2one) then
!
! Do nothing!
!
else if (beta.eq.-i2one) then
!$omp parallel do private(i)
do i=1,m
Z(i) = - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
Z(i) = beta*y(i)
enddo
endif
else if (alpha.eq.i2one) then
if (beta.eq.i2zero) then
!$omp parallel do private(i)
do i=1,m
Z(i) = x(i)
enddo
else if (beta.eq.i2one) then
!$omp parallel do private(i)
do i=1,m
Z(i) = x(i) + y(i)
enddo
else if (beta.eq.-i2one) then
!$omp parallel do private(i)
do i=1,m
Z(i) = x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
Z(i) = x(i) + beta*y(i)
enddo
endif
else if (alpha.eq.-i2one) then
if (beta.eq.i2zero) then
!$omp parallel do private(i)
do i=1,m
Z(i) = -x(i)
enddo
else if (beta.eq.i2one) then
!$omp parallel do private(i)
do i=1,m
Z(i) = -x(i) + y(i)
enddo
else if (beta.eq.-i2one) then
!$omp parallel do private(i)
do i=1,m
Z(i) = -x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
Z(i) = -x(i) + beta*y(i)
enddo
endif
else
if (beta.eq.i2zero) then
!$omp parallel do private(i)
do i=1,m
Z(i) = alpha*x(i)
enddo
else if (beta.eq.i2one) then
!$omp parallel do private(i)
do i=1,m
Z(i) = alpha*x(i) + y(i)
enddo
else if (beta.eq.-i2one) then
!$omp parallel do private(i)
do i=1,m
Z(i) = alpha*x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
Z(i) = alpha*x(i) + beta*y(i)
enddo
endif
endif
call psb_erractionrestore(err_act)
return
@ -942,6 +1241,7 @@ subroutine i2axpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
if (alpha.eq.i2zero) then
if (beta.eq.i2zero) then
do j=1, n
!$omp parallel do private(i)
do i=1,m
y(i,j) = i2zero
enddo
@ -953,12 +1253,14 @@ subroutine i2axpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
else if (beta.eq.-i2one) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = beta*y(i,j)
enddo
@ -969,12 +1271,14 @@ subroutine i2axpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
if (beta.eq.i2zero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = x(i,j)
enddo
enddo
else if (beta.eq.i2one) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = x(i,j) + y(i,j)
enddo
@ -982,12 +1286,14 @@ subroutine i2axpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
else if (beta.eq.-i2one) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = x(i,j) + beta*y(i,j)
enddo
@ -998,12 +1304,14 @@ subroutine i2axpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
if (beta.eq.i2zero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = -x(i,j)
enddo
enddo
else if (beta.eq.i2one) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = -x(i,j) + y(i,j)
enddo
@ -1011,12 +1319,14 @@ subroutine i2axpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
else if (beta.eq.-i2one) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = -x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = -x(i,j) + beta*y(i,j)
enddo
@ -1027,12 +1337,14 @@ subroutine i2axpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
if (beta.eq.i2zero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = alpha*x(i,j)
enddo
enddo
else if (beta.eq.i2one) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = alpha*x(i,j) + y(i,j)
enddo
@ -1040,12 +1352,14 @@ subroutine i2axpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
else if (beta.eq.-i2one) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = alpha*x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = alpha*x(i,j) + beta*y(i,j)
enddo
@ -1131,12 +1445,14 @@ subroutine i2axpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
else if (beta.eq.-i2one) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = beta*y(i,j)
enddo
@ -1147,12 +1463,14 @@ subroutine i2axpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
if (beta.eq.i2zero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = x(i,j)
enddo
enddo
else if (beta.eq.i2one) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = x(i,j) + y(i,j)
enddo
@ -1160,12 +1478,14 @@ subroutine i2axpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
else if (beta.eq.-i2one) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = x(i,j) + beta*y(i,j)
enddo
@ -1176,12 +1496,14 @@ subroutine i2axpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
if (beta.eq.i2zero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = -x(i,j)
enddo
enddo
else if (beta.eq.i2one) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = -x(i,j) + y(i,j)
enddo
@ -1189,12 +1511,14 @@ subroutine i2axpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
else if (beta.eq.-i2one) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = -x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = -x(i,j) + beta*y(i,j)
enddo
@ -1205,12 +1529,14 @@ subroutine i2axpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
if (beta.eq.i2zero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = alpha*x(i,j)
enddo
enddo
else if (beta.eq.i2one) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = alpha*x(i,j) + y(i,j)
enddo
@ -1218,12 +1544,14 @@ subroutine i2axpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
else if (beta.eq.-i2one) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = alpha*x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = alpha*x(i,j) + beta*y(i,j)
enddo

@ -29,6 +29,107 @@
! POSSIBILITY OF SUCH DAMAGE.
!
!
subroutine psi_m_exscanv(n,x,info,shift)
use psi_m_serial_mod, psb_protect_name => psi_m_exscanv
use psb_const_mod
use psb_error_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none
integer(psb_ipk_), intent(in) :: n
integer(psb_mpk_), intent (inout) :: x(:)
integer(psb_ipk_), intent(out) :: info
integer(psb_mpk_), intent(in), optional :: shift
integer(psb_mpk_) :: shift_, tp, ts
integer(psb_ipk_) :: i
logical is_nested, is_parallel
if (present(shift)) then
shift_ = shift
else
shift_ = mzero
end if
#if defined(OPENMP)
is_parallel = omp_in_parallel()
if (is_parallel) then
call inner_m_exscan()
else
!$OMP PARALLEL default(shared)
call inner_m_exscan()
!$OMP END PARALLEL
end if
#else
tp = shift_
do i=1,n
ts = x(i)
x(i) = tp
tp = tp + ts
end do
#endif
#if defined(OPENMP)
contains
subroutine inner_m_exscan()
! Note: all these variables are private, but SUMB should *really* be
! a pointer. The semantics of COPYPRIVATE is that the POINTER is copied
! so effectively we are recovering a SHARED SUMB which is what
! we need in this case. If it was an ALLOCATABLE, then it would be the contents
! that would get copied, and the SHARED effect would no longer be there.
! Simple parallel version of EXSCAN
integer(psb_ipk_) :: i,ithread,nthreads,idxstart,idxend,wrk
integer(psb_mpk_), pointer :: sumb(:)
integer(psb_mpk_) :: tp, ts
nthreads = omp_get_num_threads()
ithread = omp_get_thread_num()
!$OMP SINGLE
allocate(sumb(nthreads+1))
sumb(:) = 0
!$OMP END SINGLE COPYPRIVATE(sumb)
wrk = (n)/nthreads
if (ithread < MOD((n),nthreads)) then
wrk = wrk + 1
idxstart = ithread*wrk + 1
else
idxstart = ithread*wrk + MOD((n),nthreads) + 1
end if
idxend = min(idxstart + wrk - 1,n )
tp = mzero
if (idxstart<=idxend) then
do i=idxstart,idxend
ts = x(i)
x(i) = tp
tp = tp + ts
end do
end if
sumb(ithread+2) = tp
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
sumb(i) = sumb(i) + sumb(i-1)
end do
!$OMP END SINGLE
!$OMP BARRIER
!$OMP DO SCHEDULE(STATIC)
do i=1,n
x(i) = x(i) + sumb(ithread+1) + shift_
end do
!$OMP END DO
!$OMP SINGLE
deallocate(sumb)
!$OMP END SINGLE
end subroutine inner_m_exscan
#endif
end subroutine psi_m_exscanv
subroutine psb_m_mgelp(trans,iperm,x,info)
use psb_serial_mod, psb_protect_name => psb_m_mgelp
use psb_const_mod
@ -441,9 +542,9 @@ subroutine psi_maxpby(m,n,alpha, x, beta, y, info)
integer(psb_mpk_), intent (in) :: alpha, beta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
integer(psb_ipk_) :: lx, ly
integer(psb_ipk_) :: lx, ly, i
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name, ch_err
character(len=20) :: name, ch_err
name='psb_geaxpby'
info=psb_success_
@ -502,7 +603,8 @@ subroutine psi_maxpbyv(m,alpha, x, beta, y, info)
integer(psb_ipk_) :: err_act
integer(psb_ipk_) :: lx, ly
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name, ch_err
integer(psb_ipk_) :: i
character(len=20) :: name, ch_err
name='psb_geaxpby'
info=psb_success_
@ -532,7 +634,106 @@ subroutine psi_maxpbyv(m,alpha, x, beta, y, info)
goto 9999
end if
if (m>0) call maxpby(m,ione,alpha,x,lx,beta,y,ly,info)
! if (m>0) call maxpby(m,ione,alpha,x,lx,beta,y,ly,info)
if (alpha.eq.mzero) then
if (beta.eq.mzero) then
!$omp parallel do private(i)
do i=1,m
y(i) = mzero
enddo
else if (beta.eq.mone) then
!
! Do nothing!
!
else if (beta.eq.-mone) then
!$omp parallel do private(i)
do i=1,m
y(i) = - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
enddo
endif
else if (alpha.eq.mone) then
if (beta.eq.mzero) then
!$omp parallel do private(i)
do i=1,m
y(i) = x(i)
enddo
else if (beta.eq.mone) then
!$omp parallel do private(i)
do i=1,m
y(i) = x(i) + y(i)
enddo
else if (beta.eq.-mone) then
!$omp parallel do private(i)
do i=1,m
y(i) = x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
y(i) = x(i) + beta*y(i)
enddo
endif
else if (alpha.eq.-mone) then
if (beta.eq.mzero) then
!$omp parallel do private(i)
do i=1,m
y(i) = -x(i)
enddo
else if (beta.eq.mone) then
!$omp parallel do private(i)
do i=1,m
y(i) = -x(i) + y(i)
enddo
else if (beta.eq.-mone) then
!$omp parallel do private(i)
do i=1,m
y(i) = -x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
y(i) = -x(i) + beta*y(i)
enddo
endif
else
if (beta.eq.mzero) then
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
enddo
else if (beta.eq.mone) then
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i) + y(i)
enddo
else if (beta.eq.-mone) then
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i) + beta*y(i)
enddo
endif
endif
call psb_erractionrestore(err_act)
return
@ -555,7 +756,7 @@ subroutine psi_maxpbyv2(m,alpha, x, beta, y, z, info)
integer(psb_mpk_), intent (in) :: alpha, beta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
integer(psb_ipk_) :: lx, ly, lz
integer(psb_ipk_) :: lx, ly, lz, i
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name, ch_err
@ -594,7 +795,105 @@ subroutine psi_maxpbyv2(m,alpha, x, beta, y, z, info)
goto 9999
end if
if (m>0) call maxpbyv2(m,ione,alpha,x,lx,beta,y,ly,z,lz,info)
if (alpha.eq.mzero) then
if (beta.eq.mzero) then
!$omp parallel do private(i)
do i=1,m
Z(i) = mzero
enddo
else if (beta.eq.mone) then
!
! Do nothing!
!
else if (beta.eq.-mone) then
!$omp parallel do private(i)
do i=1,m
Z(i) = - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
Z(i) = beta*y(i)
enddo
endif
else if (alpha.eq.mone) then
if (beta.eq.mzero) then
!$omp parallel do private(i)
do i=1,m
Z(i) = x(i)
enddo
else if (beta.eq.mone) then
!$omp parallel do private(i)
do i=1,m
Z(i) = x(i) + y(i)
enddo
else if (beta.eq.-mone) then
!$omp parallel do private(i)
do i=1,m
Z(i) = x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
Z(i) = x(i) + beta*y(i)
enddo
endif
else if (alpha.eq.-mone) then
if (beta.eq.mzero) then
!$omp parallel do private(i)
do i=1,m
Z(i) = -x(i)
enddo
else if (beta.eq.mone) then
!$omp parallel do private(i)
do i=1,m
Z(i) = -x(i) + y(i)
enddo
else if (beta.eq.-mone) then
!$omp parallel do private(i)
do i=1,m
Z(i) = -x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
Z(i) = -x(i) + beta*y(i)
enddo
endif
else
if (beta.eq.mzero) then
!$omp parallel do private(i)
do i=1,m
Z(i) = alpha*x(i)
enddo
else if (beta.eq.mone) then
!$omp parallel do private(i)
do i=1,m
Z(i) = alpha*x(i) + y(i)
enddo
else if (beta.eq.-mone) then
!$omp parallel do private(i)
do i=1,m
Z(i) = alpha*x(i) - y(i)
enddo
else
!$omp parallel do private(i)
do i=1,m
Z(i) = alpha*x(i) + beta*y(i)
enddo
endif
endif
call psb_erractionrestore(err_act)
return
@ -942,6 +1241,7 @@ subroutine maxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
if (alpha.eq.mzero) then
if (beta.eq.mzero) then
do j=1, n
!$omp parallel do private(i)
do i=1,m
y(i,j) = mzero
enddo
@ -953,12 +1253,14 @@ subroutine maxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
else if (beta.eq.-mone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = beta*y(i,j)
enddo
@ -969,12 +1271,14 @@ subroutine maxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
if (beta.eq.mzero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = x(i,j)
enddo
enddo
else if (beta.eq.mone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = x(i,j) + y(i,j)
enddo
@ -982,12 +1286,14 @@ subroutine maxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
else if (beta.eq.-mone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = x(i,j) + beta*y(i,j)
enddo
@ -998,12 +1304,14 @@ subroutine maxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
if (beta.eq.mzero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = -x(i,j)
enddo
enddo
else if (beta.eq.mone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = -x(i,j) + y(i,j)
enddo
@ -1011,12 +1319,14 @@ subroutine maxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
else if (beta.eq.-mone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = -x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = -x(i,j) + beta*y(i,j)
enddo
@ -1027,12 +1337,14 @@ subroutine maxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
if (beta.eq.mzero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = alpha*x(i,j)
enddo
enddo
else if (beta.eq.mone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = alpha*x(i,j) + y(i,j)
enddo
@ -1040,12 +1352,14 @@ subroutine maxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
else if (beta.eq.-mone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = alpha*x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
y(i,j) = alpha*x(i,j) + beta*y(i,j)
enddo
@ -1131,12 +1445,14 @@ subroutine maxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
else if (beta.eq.-mone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = beta*y(i,j)
enddo
@ -1147,12 +1463,14 @@ subroutine maxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
if (beta.eq.mzero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = x(i,j)
enddo
enddo
else if (beta.eq.mone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = x(i,j) + y(i,j)
enddo
@ -1160,12 +1478,14 @@ subroutine maxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
else if (beta.eq.-mone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = x(i,j) + beta*y(i,j)
enddo
@ -1176,12 +1496,14 @@ subroutine maxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
if (beta.eq.mzero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = -x(i,j)
enddo
enddo
else if (beta.eq.mone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = -x(i,j) + y(i,j)
enddo
@ -1189,12 +1511,14 @@ subroutine maxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
else if (beta.eq.-mone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = -x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = -x(i,j) + beta*y(i,j)
enddo
@ -1205,12 +1529,14 @@ subroutine maxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
if (beta.eq.mzero) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = alpha*x(i,j)
enddo
enddo
else if (beta.eq.mone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = alpha*x(i,j) + y(i,j)
enddo
@ -1218,12 +1544,14 @@ subroutine maxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
else if (beta.eq.-mone) then
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = alpha*x(i,j) - y(i,j)
enddo
enddo
else
do j=1,n
!$omp parallel do private(i)
do i=1,m
Z(i,j) = alpha*x(i,j) + beta*y(i,j)
enddo

@ -29,6 +29,107 @@
! POSSIBILITY OF SUCH DAMAGE.
!
!
subroutine psi_s_exscanv(n,x,info,shift)
use psi_s_serial_mod, psb_protect_name => psi_s_exscanv
use psb_const_mod
use psb_error_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none
integer(psb_ipk_), intent(in) :: n
real(psb_spk_), intent (inout) :: x(:)
integer(psb_ipk_), intent(out) :: info
real(psb_spk_), intent(in), optional :: shift
real(psb_spk_) :: shift_, tp, ts
integer(psb_ipk_) :: i
logical is_nested, is_parallel
if (present(shift)) then
shift_ = shift
else
shift_ = szero
end if
#if defined(OPENMP)
is_parallel = omp_in_parallel()
if (is_parallel) then
call inner_s_exscan()
else
!$OMP PARALLEL default(shared)
call inner_s_exscan()
!$OMP END PARALLEL
end if
#else
tp = shift_
do i=1,n
ts = x(i)
x(i) = tp
tp = tp + ts
end do
#endif
#if defined(OPENMP)
contains
subroutine inner_s_exscan()
! Note: all these variables are private, but SUMB should *really* be
! a pointer. The semantics of COPYPRIVATE is that the POINTER is copied
! so effectively we are recovering a SHARED SUMB which is what
! we need in this case. If it was an ALLOCATABLE, then it would be the contents
! that would get copied, and the SHARED effect would no longer be there.
! Simple parallel version of EXSCAN
integer(psb_ipk_) :: i,ithread,nthreads,idxstart,idxend,wrk
real(psb_spk_), pointer :: sumb(:)
real(psb_spk_) :: tp, ts
nthreads = omp_get_num_threads()
ithread = omp_get_thread_num()
!$OMP SINGLE
allocate(sumb(nthreads+1))
sumb(:) = 0
!$OMP END SINGLE COPYPRIVATE(sumb)
wrk = (n)/nthreads
if (ithread < MOD((n),nthreads)) then
wrk = wrk + 1
idxstart = ithread*wrk + 1
else
idxstart = ithread*wrk + MOD((n),nthreads) + 1
end if
idxend = min(idxstart + wrk - 1,n )
tp = szero
if (idxstart<=idxend) then
do i=idxstart,idxend
ts = x(i)
x(i) = tp
tp = tp + ts
end do
end if
sumb(ithread+2) = tp
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
sumb(i) = sumb(i) + sumb(i-1)
end do
!$OMP END SINGLE
!$OMP BARRIER
!$OMP DO SCHEDULE(STATIC)
do i=1,n
x(i) = x(i) + sumb(ithread+1) + shift_
end do
!$OMP END DO
!$OMP SINGLE
deallocate(sumb)
!$OMP END SINGLE
end subroutine inner_s_exscan
#endif
end subroutine psi_s_exscanv
subroutine psb_m_sgelp(trans,iperm,x,info)
use psb_serial_mod, psb_protect_name => psb_m_sgelp
use psb_const_mod

@ -29,6 +29,107 @@
! POSSIBILITY OF SUCH DAMAGE.
!
!
subroutine psi_z_exscanv(n,x,info,shift)
use psi_z_serial_mod, psb_protect_name => psi_z_exscanv
use psb_const_mod
use psb_error_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none
integer(psb_ipk_), intent(in) :: n
complex(psb_dpk_), intent (inout) :: x(:)
integer(psb_ipk_), intent(out) :: info
complex(psb_dpk_), intent(in), optional :: shift
complex(psb_dpk_) :: shift_, tp, ts
integer(psb_ipk_) :: i
logical is_nested, is_parallel
if (present(shift)) then
shift_ = shift
else
shift_ = zzero
end if
#if defined(OPENMP)
is_parallel = omp_in_parallel()
if (is_parallel) then
call inner_z_exscan()
else
!$OMP PARALLEL default(shared)
call inner_z_exscan()
!$OMP END PARALLEL
end if
#else
tp = shift_
do i=1,n
ts = x(i)
x(i) = tp
tp = tp + ts
end do
#endif
#if defined(OPENMP)
contains
subroutine inner_z_exscan()
! Note: all these variables are private, but SUMB should *really* be
! a pointer. The semantics of COPYPRIVATE is that the POINTER is copied
! so effectively we are recovering a SHARED SUMB which is what
! we need in this case. If it was an ALLOCATABLE, then it would be the contents
! that would get copied, and the SHARED effect would no longer be there.
! Simple parallel version of EXSCAN
integer(psb_ipk_) :: i,ithread,nthreads,idxstart,idxend,wrk
complex(psb_dpk_), pointer :: sumb(:)
complex(psb_dpk_) :: tp, ts
nthreads = omp_get_num_threads()
ithread = omp_get_thread_num()
!$OMP SINGLE
allocate(sumb(nthreads+1))
sumb(:) = 0
!$OMP END SINGLE COPYPRIVATE(sumb)
wrk = (n)/nthreads
if (ithread < MOD((n),nthreads)) then
wrk = wrk + 1
idxstart = ithread*wrk + 1
else
idxstart = ithread*wrk + MOD((n),nthreads) + 1
end if
idxend = min(idxstart + wrk - 1,n )
tp = zzero
if (idxstart<=idxend) then
do i=idxstart,idxend
ts = x(i)
x(i) = tp
tp = tp + ts
end do
end if
sumb(ithread+2) = tp
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
sumb(i) = sumb(i) + sumb(i-1)
end do
!$OMP END SINGLE
!$OMP BARRIER
!$OMP DO SCHEDULE(STATIC)
do i=1,n
x(i) = x(i) + sumb(ithread+1) + shift_
end do
!$OMP END DO
!$OMP SINGLE
deallocate(sumb)
!$OMP END SINGLE
end subroutine inner_z_exscan
#endif
end subroutine psi_z_exscanv
subroutine psb_m_zgelp(trans,iperm,x,info)
use psb_serial_mod, psb_protect_name => psb_m_zgelp
use psb_const_mod

@ -40,6 +40,7 @@
! Data Structures and Algorithms
! Addison-Wesley
!
subroutine psb_cqsort(x,ix,dir,flag)
use psb_sort_mod, psb_protect_name => psb_cqsort
use psb_error_mod

@ -76,64 +76,6 @@ subroutine psb_dmsort_u(x,nout,dir)
return
end subroutine psb_dmsort_u
function psb_dbsrch(key,n,v) result(ipos)
use psb_sort_mod, psb_protect_name => psb_dbsrch
implicit none
integer(psb_ipk_) :: ipos, n
real(psb_dpk_) :: key
real(psb_dpk_) :: v(:)
integer(psb_ipk_) :: lb, ub, m, i
ipos = -1
if (n<5) then
do i=1,n
if (key.eq.v(i)) then
ipos = i
return
end if
enddo
return
end if
lb = 1
ub = n
do while (lb.le.ub)
m = (lb+ub)/2
if (key.eq.v(m)) then
ipos = m
lb = ub + 1
else if (key < v(m)) then
ub = m-1
else
lb = m + 1
end if
enddo
return
end function psb_dbsrch
function psb_dssrch(key,n,v) result(ipos)
use psb_sort_mod, psb_protect_name => psb_dssrch
implicit none
integer(psb_ipk_) :: ipos, n
real(psb_dpk_) :: key
real(psb_dpk_) :: v(:)
integer(psb_ipk_) :: i
ipos = -1
do i=1,n
if (key.eq.v(i)) then
ipos = i
return
end if
enddo
return
end function psb_dssrch
subroutine psb_dmsort(x,ix,dir,flag)
use psb_sort_mod, psb_protect_name => psb_dmsort
use psb_error_mod

@ -40,6 +40,125 @@
! Data Structures and Algorithms
! Addison-Wesley
!
function psb_dbsrch(key,n,v,dir,find) result(ipos)
use psb_sort_mod, psb_protect_name => psb_dbsrch
implicit none
integer(psb_ipk_) :: ipos, n
real(psb_dpk_) :: key
real(psb_dpk_) :: v(:)
integer(psb_ipk_), optional :: dir, find
integer(psb_ipk_) :: lb, ub, m, i, k, dir_, find_
if (present(dir)) then
dir_ = dir
else
dir_ = psb_sort_up_
end if
if (present(find)) then
find_ = find
else
find_ = psb_find_any_
end if
ipos = -1
if (dir_ == psb_sort_up_) then
if (n<=5) then
do m=1,n
if (key == v(m)) then
ipos = m
exit
end if
enddo
else
lb = 1
ub = n
do while (lb.le.ub)
m = (lb+ub)/2
if (key.eq.v(m)) then
ipos = m
exit
else if (key < v(m)) then
ub = m-1
else
lb = m + 1
end if
enddo
end if
select case(find_)
case (psb_find_any_ )
! do nothing
case (psb_find_last_le_ )
if ((m>n) .or. (m<1)) then
m = n
do while (m>=1)
if (v(m)<=key) exit
m = m - 1
end do
else
do while (m<n)
if (v(m+1)<=key) then
m=m+1
else
exit
end if
end do
end if
ipos = min(m,n)
case (psb_find_first_ge_ )
if ((m>n) .or. (m<1)) then
m = 1
do while (m<=n)
if (v(m)>=key) exit
m = m + 1
end do
else
do while (m>1)
if (v(m-1)>=key) then
m=m-1
else
exit
end if
end do
end if
ipos = max(m,1)
case default
write(0,*) 'Wrong FIND'
end select
else if (dir_ == psb_sort_down_) then
write(0,*) ' bsrch on sort down not implemented'
else
write(0,*) ' bsrch wrong DIR ',dir_,psb_sort_up_,psb_sort_down_
end if
return
end function psb_dbsrch
function psb_dssrch(key,n,v) result(ipos)
use psb_sort_mod, psb_protect_name => psb_dssrch
implicit none
integer(psb_ipk_) :: ipos, n
real(psb_dpk_) :: key
real(psb_dpk_) :: v(:)
integer(psb_ipk_) :: i
ipos = -1
do i=1,n
if (key.eq.v(i)) then
ipos = i
return
end if
enddo
return
end function psb_dssrch
subroutine psb_dqsort(x,ix,dir,flag)
use psb_sort_mod, psb_protect_name => psb_dqsort
use psb_error_mod

@ -131,64 +131,6 @@ subroutine psb_emsort_u(x,nout,dir)
return
end subroutine psb_emsort_u
function psb_ebsrch(key,n,v) result(ipos)
use psb_sort_mod, psb_protect_name => psb_ebsrch
implicit none
integer(psb_ipk_) :: ipos, n
integer(psb_epk_) :: key
integer(psb_epk_) :: v(:)
integer(psb_ipk_) :: lb, ub, m, i
ipos = -1
if (n<5) then
do i=1,n
if (key.eq.v(i)) then
ipos = i
return
end if
enddo
return
end if
lb = 1
ub = n
do while (lb.le.ub)
m = (lb+ub)/2
if (key.eq.v(m)) then
ipos = m
lb = ub + 1
else if (key < v(m)) then
ub = m-1
else
lb = m + 1
end if
enddo
return
end function psb_ebsrch
function psb_essrch(key,n,v) result(ipos)
use psb_sort_mod, psb_protect_name => psb_essrch
implicit none
integer(psb_ipk_) :: ipos, n
integer(psb_epk_) :: key
integer(psb_epk_) :: v(:)
integer(psb_ipk_) :: i
ipos = -1
do i=1,n
if (key.eq.v(i)) then
ipos = i
return
end if
enddo
return
end function psb_essrch
subroutine psb_emsort(x,ix,dir,flag)
use psb_sort_mod, psb_protect_name => psb_emsort
use psb_error_mod

@ -40,6 +40,125 @@
! Data Structures and Algorithms
! Addison-Wesley
!
function psb_ebsrch(key,n,v,dir,find) result(ipos)
use psb_sort_mod, psb_protect_name => psb_ebsrch
implicit none
integer(psb_ipk_) :: ipos, n
integer(psb_epk_) :: key
integer(psb_epk_) :: v(:)
integer(psb_ipk_), optional :: dir, find
integer(psb_ipk_) :: lb, ub, m, i, k, dir_, find_
if (present(dir)) then
dir_ = dir
else
dir_ = psb_sort_up_
end if
if (present(find)) then
find_ = find
else
find_ = psb_find_any_
end if
ipos = -1
if (dir_ == psb_sort_up_) then
if (n<=5) then
do m=1,n
if (key == v(m)) then
ipos = m
exit
end if
enddo
else
lb = 1
ub = n
do while (lb.le.ub)
m = (lb+ub)/2
if (key.eq.v(m)) then
ipos = m
exit
else if (key < v(m)) then
ub = m-1
else
lb = m + 1
end if
enddo
end if
select case(find_)
case (psb_find_any_ )
! do nothing
case (psb_find_last_le_ )
if ((m>n) .or. (m<1)) then
m = n
do while (m>=1)
if (v(m)<=key) exit
m = m - 1
end do
else
do while (m<n)
if (v(m+1)<=key) then
m=m+1
else
exit
end if
end do
end if
ipos = min(m,n)
case (psb_find_first_ge_ )
if ((m>n) .or. (m<1)) then
m = 1
do while (m<=n)
if (v(m)>=key) exit
m = m + 1
end do
else
do while (m>1)
if (v(m-1)>=key) then
m=m-1
else
exit
end if
end do
end if
ipos = max(m,1)
case default
write(0,*) 'Wrong FIND'
end select
else if (dir_ == psb_sort_down_) then
write(0,*) ' bsrch on sort down not implemented'
else
write(0,*) ' bsrch wrong DIR ',dir_,psb_sort_up_,psb_sort_down_
end if
return
end function psb_ebsrch
function psb_essrch(key,n,v) result(ipos)
use psb_sort_mod, psb_protect_name => psb_essrch
implicit none
integer(psb_ipk_) :: ipos, n
integer(psb_epk_) :: key
integer(psb_epk_) :: v(:)
integer(psb_ipk_) :: i
ipos = -1
do i=1,n
if (key.eq.v(i)) then
ipos = i
return
end if
enddo
return
end function psb_essrch
subroutine psb_eqsort(x,ix,dir,flag)
use psb_sort_mod, psb_protect_name => psb_eqsort
use psb_error_mod

@ -131,64 +131,6 @@ subroutine psb_mmsort_u(x,nout,dir)
return
end subroutine psb_mmsort_u
function psb_mbsrch(key,n,v) result(ipos)
use psb_sort_mod, psb_protect_name => psb_mbsrch
implicit none
integer(psb_ipk_) :: ipos, n
integer(psb_mpk_) :: key
integer(psb_mpk_) :: v(:)
integer(psb_ipk_) :: lb, ub, m, i
ipos = -1
if (n<5) then
do i=1,n
if (key.eq.v(i)) then
ipos = i
return
end if
enddo
return
end if
lb = 1
ub = n
do while (lb.le.ub)
m = (lb+ub)/2
if (key.eq.v(m)) then
ipos = m
lb = ub + 1
else if (key < v(m)) then
ub = m-1
else
lb = m + 1
end if
enddo
return
end function psb_mbsrch
function psb_mssrch(key,n,v) result(ipos)
use psb_sort_mod, psb_protect_name => psb_mssrch
implicit none
integer(psb_ipk_) :: ipos, n
integer(psb_mpk_) :: key
integer(psb_mpk_) :: v(:)
integer(psb_ipk_) :: i
ipos = -1
do i=1,n
if (key.eq.v(i)) then
ipos = i
return
end if
enddo
return
end function psb_mssrch
subroutine psb_mmsort(x,ix,dir,flag)
use psb_sort_mod, psb_protect_name => psb_mmsort
use psb_error_mod

@ -40,6 +40,125 @@
! Data Structures and Algorithms
! Addison-Wesley
!
function psb_mbsrch(key,n,v,dir,find) result(ipos)
use psb_sort_mod, psb_protect_name => psb_mbsrch
implicit none
integer(psb_ipk_) :: ipos, n
integer(psb_mpk_) :: key
integer(psb_mpk_) :: v(:)
integer(psb_ipk_), optional :: dir, find
integer(psb_ipk_) :: lb, ub, m, i, k, dir_, find_
if (present(dir)) then
dir_ = dir
else
dir_ = psb_sort_up_
end if
if (present(find)) then
find_ = find
else
find_ = psb_find_any_
end if
ipos = -1
if (dir_ == psb_sort_up_) then
if (n<=5) then
do m=1,n
if (key == v(m)) then
ipos = m
exit
end if
enddo
else
lb = 1
ub = n
do while (lb.le.ub)
m = (lb+ub)/2
if (key.eq.v(m)) then
ipos = m
exit
else if (key < v(m)) then
ub = m-1
else
lb = m + 1
end if
enddo
end if
select case(find_)
case (psb_find_any_ )
! do nothing
case (psb_find_last_le_ )
if ((m>n) .or. (m<1)) then
m = n
do while (m>=1)
if (v(m)<=key) exit
m = m - 1
end do
else
do while (m<n)
if (v(m+1)<=key) then
m=m+1
else
exit
end if
end do
end if
ipos = min(m,n)
case (psb_find_first_ge_ )
if ((m>n) .or. (m<1)) then
m = 1
do while (m<=n)
if (v(m)>=key) exit
m = m + 1
end do
else
do while (m>1)
if (v(m-1)>=key) then
m=m-1
else
exit
end if
end do
end if
ipos = max(m,1)
case default
write(0,*) 'Wrong FIND'
end select
else if (dir_ == psb_sort_down_) then
write(0,*) ' bsrch on sort down not implemented'
else
write(0,*) ' bsrch wrong DIR ',dir_,psb_sort_up_,psb_sort_down_
end if
return
end function psb_mbsrch
function psb_mssrch(key,n,v) result(ipos)
use psb_sort_mod, psb_protect_name => psb_mssrch
implicit none
integer(psb_ipk_) :: ipos, n
integer(psb_mpk_) :: key
integer(psb_mpk_) :: v(:)
integer(psb_ipk_) :: i
ipos = -1
do i=1,n
if (key.eq.v(i)) then
ipos = i
return
end if
enddo
return
end function psb_mssrch
subroutine psb_mqsort(x,ix,dir,flag)
use psb_sort_mod, psb_protect_name => psb_mqsort
use psb_error_mod

@ -76,64 +76,6 @@ subroutine psb_smsort_u(x,nout,dir)
return
end subroutine psb_smsort_u
function psb_sbsrch(key,n,v) result(ipos)
use psb_sort_mod, psb_protect_name => psb_sbsrch
implicit none
integer(psb_ipk_) :: ipos, n
real(psb_spk_) :: key
real(psb_spk_) :: v(:)
integer(psb_ipk_) :: lb, ub, m, i
ipos = -1
if (n<5) then
do i=1,n
if (key.eq.v(i)) then
ipos = i
return
end if
enddo
return
end if
lb = 1
ub = n
do while (lb.le.ub)
m = (lb+ub)/2
if (key.eq.v(m)) then
ipos = m
lb = ub + 1
else if (key < v(m)) then
ub = m-1
else
lb = m + 1
end if
enddo
return
end function psb_sbsrch
function psb_sssrch(key,n,v) result(ipos)
use psb_sort_mod, psb_protect_name => psb_sssrch
implicit none
integer(psb_ipk_) :: ipos, n
real(psb_spk_) :: key
real(psb_spk_) :: v(:)
integer(psb_ipk_) :: i
ipos = -1
do i=1,n
if (key.eq.v(i)) then
ipos = i
return
end if
enddo
return
end function psb_sssrch
subroutine psb_smsort(x,ix,dir,flag)
use psb_sort_mod, psb_protect_name => psb_smsort
use psb_error_mod

@ -40,6 +40,125 @@
! Data Structures and Algorithms
! Addison-Wesley
!
function psb_sbsrch(key,n,v,dir,find) result(ipos)
use psb_sort_mod, psb_protect_name => psb_sbsrch
implicit none
integer(psb_ipk_) :: ipos, n
real(psb_spk_) :: key
real(psb_spk_) :: v(:)
integer(psb_ipk_), optional :: dir, find
integer(psb_ipk_) :: lb, ub, m, i, k, dir_, find_
if (present(dir)) then
dir_ = dir
else
dir_ = psb_sort_up_
end if
if (present(find)) then
find_ = find
else
find_ = psb_find_any_
end if
ipos = -1
if (dir_ == psb_sort_up_) then
if (n<=5) then
do m=1,n
if (key == v(m)) then
ipos = m
exit
end if
enddo
else
lb = 1
ub = n
do while (lb.le.ub)
m = (lb+ub)/2
if (key.eq.v(m)) then
ipos = m
exit
else if (key < v(m)) then
ub = m-1
else
lb = m + 1
end if
enddo
end if
select case(find_)
case (psb_find_any_ )
! do nothing
case (psb_find_last_le_ )
if ((m>n) .or. (m<1)) then
m = n
do while (m>=1)
if (v(m)<=key) exit
m = m - 1
end do
else
do while (m<n)
if (v(m+1)<=key) then
m=m+1
else
exit
end if
end do
end if
ipos = min(m,n)
case (psb_find_first_ge_ )
if ((m>n) .or. (m<1)) then
m = 1
do while (m<=n)
if (v(m)>=key) exit
m = m + 1
end do
else
do while (m>1)
if (v(m-1)>=key) then
m=m-1
else
exit
end if
end do
end if
ipos = max(m,1)
case default
write(0,*) 'Wrong FIND'
end select
else if (dir_ == psb_sort_down_) then
write(0,*) ' bsrch on sort down not implemented'
else
write(0,*) ' bsrch wrong DIR ',dir_,psb_sort_up_,psb_sort_down_
end if
return
end function psb_sbsrch
function psb_sssrch(key,n,v) result(ipos)
use psb_sort_mod, psb_protect_name => psb_sssrch
implicit none
integer(psb_ipk_) :: ipos, n
real(psb_spk_) :: key
real(psb_spk_) :: v(:)
integer(psb_ipk_) :: i
ipos = -1
do i=1,n
if (key.eq.v(i)) then
ipos = i
return
end if
enddo
return
end function psb_sssrch
subroutine psb_sqsort(x,ix,dir,flag)
use psb_sort_mod, psb_protect_name => psb_sqsort
use psb_error_mod

@ -40,6 +40,7 @@
! Data Structures and Algorithms
! Addison-Wesley
!
subroutine psb_zqsort(x,ix,dir,flag)
use psb_sort_mod, psb_protect_name => psb_zqsort
use psb_error_mod

@ -51,6 +51,9 @@
subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
use psb_base_mod, psb_protect_name => psb_cspins
use psi_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none
!....parameters...
@ -131,11 +134,117 @@ subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
& a_err='allocate',i_err=(/info/))
goto 9999
end if
#if defined(OPENMP)
block
logical :: is_in_parallel
is_in_parallel = omp_in_parallel()
if (is_in_parallel) then
!$omp parallel private(ila,jla,nrow,ncol,nnl,k)
call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.)
!$omp critical(spins)
if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,&
& mask=(ila(1:nz)>0))
!$omp end critical(spins)
!write(0,*) me,' after g2l_ins ',psb_errstatus_fatal(),info
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='psb_cdins',i_err=(/info/))
goto 9998
end if
nrow = desc_a%get_local_rows()
ncol = desc_a%get_local_cols()
!write(0,*) me,' Before csput',psb_errstatus_fatal()
if (a%is_bld()) then
call a%csput(nz,ila,jla,val,ione,nrow,ione,ncol,info)
if (info /= psb_success_) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='a%csput')
goto 9998
end if
if (a%is_remote_build()) then
nnl = count(ila(1:nz)<0)
if (nnl > 0) then
allocate(lila(nnl),ljla(nnl),lval(nnl))
k = 0
do i=1,nz
if (ila(i)<0) then
k=k+1
lila(k) = ia(i)
ljla(k) = ja(i)
lval(k) = val(i)
end if
end do
if (k /= nnl) write(0,*) name,' Wrong conversion?',k,nnl
call a%rmta%csput(nnl,lila,ljla,lval,1_psb_lpk_,desc_a%get_global_rows(),&
& 1_psb_lpk_,desc_a%get_global_rows(),info)
end if
end if
else
info = psb_err_invalid_a_and_cd_state_
call psb_errpush(info,name)
end if
9998 continue
!write(0,*) me,' after csput',psb_errstatus_fatal()
!$omp end parallel
else
call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.)
!write(0,*) me,' Before g2l_ins ',psb_errstatus_fatal()
if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,&
& mask=(ila(1:nz)>0))
!write(0,*) me,' after g2l_ins ',psb_errstatus_fatal(),info
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='psb_cdins',i_err=(/info/))
goto 9999
end if
nrow = desc_a%get_local_rows()
ncol = desc_a%get_local_cols()
!write(0,*) me,' Before csput',psb_errstatus_fatal()
if (a%is_bld()) then
call a%csput(nz,ila,jla,val,ione,nrow,ione,ncol,info)
if (info /= psb_success_) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='a%csput')
goto 9999
end if
if (a%is_remote_build()) then
nnl = count(ila(1:nz)<0)
if (nnl > 0) then
allocate(lila(nnl),ljla(nnl),lval(nnl))
k = 0
do i=1,nz
if (ila(i)<0) then
k=k+1
lila(k) = ia(i)
ljla(k) = ja(i)
lval(k) = val(i)
end if
end do
if (k /= nnl) write(0,*) name,' Wrong conversion?',k,nnl
call a%rmta%csput(nnl,lila,ljla,lval,1_psb_lpk_,desc_a%get_global_rows(),&
& 1_psb_lpk_,desc_a%get_global_rows(),info)
end if
end if
else
info = psb_err_invalid_a_and_cd_state_
call psb_errpush(info,name)
goto 9999
end if
end if
end block
#else
!write(0,*) me,' Before g2l ',psb_errstatus_fatal()
call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.)
call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.)
if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,&
& mask=(ila(1:nz)>0))
!write(0,*) me,' after g2l_ins ',psb_errstatus_fatal(),info
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='psb_cdins',i_err=(/info/))
@ -143,7 +252,7 @@ subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
end if
nrow = desc_a%get_local_rows()
ncol = desc_a%get_local_cols()
!write(0,*) me,' Before csput',psb_errstatus_fatal()
if (a%is_bld()) then
call a%csput(nz,ila,jla,val,ione,nrow,ione,ncol,info)
if (info /= psb_success_) then
@ -151,11 +260,10 @@ subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
call psb_errpush(info,name,a_err='a%csput')
goto 9999
end if
if (a%is_remote_build()) then
nnl = count(ila(1:nz)<0)
if (nnl > 0) then
!write(0,*) 'Check on insert ',nnl
allocate(lila(nnl),ljla(nnl),lval(nnl))
k = 0
do i=1,nz
@ -177,6 +285,8 @@ subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
call psb_errpush(info,name)
goto 9999
end if
#endif
if (info /= 0) goto 9999
endif
else if (desc_a%is_asb()) then
@ -189,9 +299,18 @@ subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
& a_err='allocate',i_err=(/info/))
goto 9999
end if
#if defined(OPENMP)
!$omp parallel private(ila,jla,nrow,ncol,nnl,k)
#endif
if (local_) then
#if defined(OPENMP)
!$omp workshare
#endif
ila(1:nz) = ia(1:nz)
jla(1:nz) = ja(1:nz)
#if defined(OPENMP)
!$omp end workshare
#endif
else
call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.)
if (info == 0) call desc_a%indxmap%g2l(ja(1:nz),jla(1:nz),info,&
@ -201,7 +320,7 @@ subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
if (info /= psb_success_) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='a%csput')
goto 9999
!goto 9999
end if
if (a%is_remote_build()) then
nnl = count(ila(1:nz)<0)
@ -220,8 +339,12 @@ subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
if (k /= nnl) write(0,*) name,' Wrong conversion?',k,nnl
call a%rmta%csput(nnl,lila,ljla,lval,1_psb_lpk_,desc_a%get_global_rows(),&
& 1_psb_lpk_,desc_a%get_global_rows(),info)
end if
end if
end if
#if defined(OPENMP)
!$omp end parallel
#endif
else
info = psb_err_invalid_cd_state_
call psb_errpush(info,name)

@ -51,6 +51,9 @@
subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
use psb_base_mod, psb_protect_name => psb_dspins
use psi_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none
!....parameters...
@ -131,11 +134,117 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
& a_err='allocate',i_err=(/info/))
goto 9999
end if
#if defined(OPENMP)
block
logical :: is_in_parallel
is_in_parallel = omp_in_parallel()
if (is_in_parallel) then
!$omp parallel private(ila,jla,nrow,ncol,nnl,k)
call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.)
!$omp critical(spins)
if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,&
& mask=(ila(1:nz)>0))
!$omp end critical(spins)
!write(0,*) me,' after g2l_ins ',psb_errstatus_fatal(),info
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='psb_cdins',i_err=(/info/))
goto 9998
end if
nrow = desc_a%get_local_rows()
ncol = desc_a%get_local_cols()
!write(0,*) me,' Before csput',psb_errstatus_fatal()
if (a%is_bld()) then
call a%csput(nz,ila,jla,val,ione,nrow,ione,ncol,info)
if (info /= psb_success_) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='a%csput')
goto 9998
end if
if (a%is_remote_build()) then
nnl = count(ila(1:nz)<0)
if (nnl > 0) then
allocate(lila(nnl),ljla(nnl),lval(nnl))
k = 0
do i=1,nz
if (ila(i)<0) then
k=k+1
lila(k) = ia(i)
ljla(k) = ja(i)
lval(k) = val(i)
end if
end do
if (k /= nnl) write(0,*) name,' Wrong conversion?',k,nnl
call a%rmta%csput(nnl,lila,ljla,lval,1_psb_lpk_,desc_a%get_global_rows(),&
& 1_psb_lpk_,desc_a%get_global_rows(),info)
end if
end if
else
info = psb_err_invalid_a_and_cd_state_
call psb_errpush(info,name)
end if
9998 continue
!write(0,*) me,' after csput',psb_errstatus_fatal()
!$omp end parallel
else
call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.)
!write(0,*) me,' Before g2l_ins ',psb_errstatus_fatal()
if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,&
& mask=(ila(1:nz)>0))
!write(0,*) me,' after g2l_ins ',psb_errstatus_fatal(),info
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='psb_cdins',i_err=(/info/))
goto 9999
end if
nrow = desc_a%get_local_rows()
ncol = desc_a%get_local_cols()
!write(0,*) me,' Before csput',psb_errstatus_fatal()
if (a%is_bld()) then
call a%csput(nz,ila,jla,val,ione,nrow,ione,ncol,info)
if (info /= psb_success_) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='a%csput')
goto 9999
end if
if (a%is_remote_build()) then
nnl = count(ila(1:nz)<0)
if (nnl > 0) then
allocate(lila(nnl),ljla(nnl),lval(nnl))
k = 0
do i=1,nz
if (ila(i)<0) then
k=k+1
lila(k) = ia(i)
ljla(k) = ja(i)
lval(k) = val(i)
end if
end do
if (k /= nnl) write(0,*) name,' Wrong conversion?',k,nnl
call a%rmta%csput(nnl,lila,ljla,lval,1_psb_lpk_,desc_a%get_global_rows(),&
& 1_psb_lpk_,desc_a%get_global_rows(),info)
end if
end if
else
info = psb_err_invalid_a_and_cd_state_
call psb_errpush(info,name)
goto 9999
end if
end if
end block
#else
!write(0,*) me,' Before g2l ',psb_errstatus_fatal()
call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.)
call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.)
if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,&
& mask=(ila(1:nz)>0))
!write(0,*) me,' after g2l_ins ',psb_errstatus_fatal(),info
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='psb_cdins',i_err=(/info/))
@ -143,7 +252,7 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
end if
nrow = desc_a%get_local_rows()
ncol = desc_a%get_local_cols()
!write(0,*) me,' Before csput',psb_errstatus_fatal()
if (a%is_bld()) then
call a%csput(nz,ila,jla,val,ione,nrow,ione,ncol,info)
if (info /= psb_success_) then
@ -151,11 +260,10 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
call psb_errpush(info,name,a_err='a%csput')
goto 9999
end if
if (a%is_remote_build()) then
nnl = count(ila(1:nz)<0)
if (nnl > 0) then
!write(0,*) 'Check on insert ',nnl
allocate(lila(nnl),ljla(nnl),lval(nnl))
k = 0
do i=1,nz
@ -177,6 +285,8 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
call psb_errpush(info,name)
goto 9999
end if
#endif
if (info /= 0) goto 9999
endif
else if (desc_a%is_asb()) then
@ -189,9 +299,18 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
& a_err='allocate',i_err=(/info/))
goto 9999
end if
#if defined(OPENMP)
!$omp parallel private(ila,jla,nrow,ncol,nnl,k)
#endif
if (local_) then
#if defined(OPENMP)
!$omp workshare
#endif
ila(1:nz) = ia(1:nz)
jla(1:nz) = ja(1:nz)
#if defined(OPENMP)
!$omp end workshare
#endif
else
call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.)
if (info == 0) call desc_a%indxmap%g2l(ja(1:nz),jla(1:nz),info,&
@ -201,7 +320,7 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
if (info /= psb_success_) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='a%csput')
goto 9999
!goto 9999
end if
if (a%is_remote_build()) then
nnl = count(ila(1:nz)<0)
@ -220,8 +339,12 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
if (k /= nnl) write(0,*) name,' Wrong conversion?',k,nnl
call a%rmta%csput(nnl,lila,ljla,lval,1_psb_lpk_,desc_a%get_global_rows(),&
& 1_psb_lpk_,desc_a%get_global_rows(),info)
end if
end if
end if
#if defined(OPENMP)
!$omp end parallel
#endif
else
info = psb_err_invalid_cd_state_
call psb_errpush(info,name)

@ -51,6 +51,9 @@
subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
use psb_base_mod, psb_protect_name => psb_sspins
use psi_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none
!....parameters...
@ -131,11 +134,117 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
& a_err='allocate',i_err=(/info/))
goto 9999
end if
#if defined(OPENMP)
block
logical :: is_in_parallel
is_in_parallel = omp_in_parallel()
if (is_in_parallel) then
!$omp parallel private(ila,jla,nrow,ncol,nnl,k)
call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.)
!$omp critical(spins)
if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,&
& mask=(ila(1:nz)>0))
!$omp end critical(spins)
!write(0,*) me,' after g2l_ins ',psb_errstatus_fatal(),info
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='psb_cdins',i_err=(/info/))
goto 9998
end if
nrow = desc_a%get_local_rows()
ncol = desc_a%get_local_cols()
!write(0,*) me,' Before csput',psb_errstatus_fatal()
if (a%is_bld()) then
call a%csput(nz,ila,jla,val,ione,nrow,ione,ncol,info)
if (info /= psb_success_) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='a%csput')
goto 9998
end if
if (a%is_remote_build()) then
nnl = count(ila(1:nz)<0)
if (nnl > 0) then
allocate(lila(nnl),ljla(nnl),lval(nnl))
k = 0
do i=1,nz
if (ila(i)<0) then
k=k+1
lila(k) = ia(i)
ljla(k) = ja(i)
lval(k) = val(i)
end if
end do
if (k /= nnl) write(0,*) name,' Wrong conversion?',k,nnl
call a%rmta%csput(nnl,lila,ljla,lval,1_psb_lpk_,desc_a%get_global_rows(),&
& 1_psb_lpk_,desc_a%get_global_rows(),info)
end if
end if
else
info = psb_err_invalid_a_and_cd_state_
call psb_errpush(info,name)
end if
9998 continue
!write(0,*) me,' after csput',psb_errstatus_fatal()
!$omp end parallel
else
call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.)
!write(0,*) me,' Before g2l_ins ',psb_errstatus_fatal()
if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,&
& mask=(ila(1:nz)>0))
!write(0,*) me,' after g2l_ins ',psb_errstatus_fatal(),info
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='psb_cdins',i_err=(/info/))
goto 9999
end if
nrow = desc_a%get_local_rows()
ncol = desc_a%get_local_cols()
!write(0,*) me,' Before csput',psb_errstatus_fatal()
if (a%is_bld()) then
call a%csput(nz,ila,jla,val,ione,nrow,ione,ncol,info)
if (info /= psb_success_) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='a%csput')
goto 9999
end if
if (a%is_remote_build()) then
nnl = count(ila(1:nz)<0)
if (nnl > 0) then
allocate(lila(nnl),ljla(nnl),lval(nnl))
k = 0
do i=1,nz
if (ila(i)<0) then
k=k+1
lila(k) = ia(i)
ljla(k) = ja(i)
lval(k) = val(i)
end if
end do
if (k /= nnl) write(0,*) name,' Wrong conversion?',k,nnl
call a%rmta%csput(nnl,lila,ljla,lval,1_psb_lpk_,desc_a%get_global_rows(),&
& 1_psb_lpk_,desc_a%get_global_rows(),info)
end if
end if
else
info = psb_err_invalid_a_and_cd_state_
call psb_errpush(info,name)
goto 9999
end if
end if
end block
#else
!write(0,*) me,' Before g2l ',psb_errstatus_fatal()
call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.)
call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.)
if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,&
& mask=(ila(1:nz)>0))
!write(0,*) me,' after g2l_ins ',psb_errstatus_fatal(),info
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='psb_cdins',i_err=(/info/))
@ -143,7 +252,7 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
end if
nrow = desc_a%get_local_rows()
ncol = desc_a%get_local_cols()
!write(0,*) me,' Before csput',psb_errstatus_fatal()
if (a%is_bld()) then
call a%csput(nz,ila,jla,val,ione,nrow,ione,ncol,info)
if (info /= psb_success_) then
@ -151,11 +260,10 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
call psb_errpush(info,name,a_err='a%csput')
goto 9999
end if
if (a%is_remote_build()) then
nnl = count(ila(1:nz)<0)
if (nnl > 0) then
!write(0,*) 'Check on insert ',nnl
allocate(lila(nnl),ljla(nnl),lval(nnl))
k = 0
do i=1,nz
@ -177,6 +285,8 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
call psb_errpush(info,name)
goto 9999
end if
#endif
if (info /= 0) goto 9999
endif
else if (desc_a%is_asb()) then
@ -189,9 +299,18 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
& a_err='allocate',i_err=(/info/))
goto 9999
end if
#if defined(OPENMP)
!$omp parallel private(ila,jla,nrow,ncol,nnl,k)
#endif
if (local_) then
#if defined(OPENMP)
!$omp workshare
#endif
ila(1:nz) = ia(1:nz)
jla(1:nz) = ja(1:nz)
#if defined(OPENMP)
!$omp end workshare
#endif
else
call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.)
if (info == 0) call desc_a%indxmap%g2l(ja(1:nz),jla(1:nz),info,&
@ -201,7 +320,7 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
if (info /= psb_success_) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='a%csput')
goto 9999
!goto 9999
end if
if (a%is_remote_build()) then
nnl = count(ila(1:nz)<0)
@ -220,8 +339,12 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
if (k /= nnl) write(0,*) name,' Wrong conversion?',k,nnl
call a%rmta%csput(nnl,lila,ljla,lval,1_psb_lpk_,desc_a%get_global_rows(),&
& 1_psb_lpk_,desc_a%get_global_rows(),info)
end if
end if
end if
#if defined(OPENMP)
!$omp end parallel
#endif
else
info = psb_err_invalid_cd_state_
call psb_errpush(info,name)

@ -51,6 +51,9 @@
subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
use psb_base_mod, psb_protect_name => psb_zspins
use psi_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none
!....parameters...
@ -131,11 +134,117 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
& a_err='allocate',i_err=(/info/))
goto 9999
end if
#if defined(OPENMP)
block
logical :: is_in_parallel
is_in_parallel = omp_in_parallel()
if (is_in_parallel) then
!$omp parallel private(ila,jla,nrow,ncol,nnl,k)
call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.)
!$omp critical(spins)
if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,&
& mask=(ila(1:nz)>0))
!$omp end critical(spins)
!write(0,*) me,' after g2l_ins ',psb_errstatus_fatal(),info
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='psb_cdins',i_err=(/info/))
goto 9998
end if
nrow = desc_a%get_local_rows()
ncol = desc_a%get_local_cols()
!write(0,*) me,' Before csput',psb_errstatus_fatal()
if (a%is_bld()) then
call a%csput(nz,ila,jla,val,ione,nrow,ione,ncol,info)
if (info /= psb_success_) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='a%csput')
goto 9998
end if
if (a%is_remote_build()) then
nnl = count(ila(1:nz)<0)
if (nnl > 0) then
allocate(lila(nnl),ljla(nnl),lval(nnl))
k = 0
do i=1,nz
if (ila(i)<0) then
k=k+1
lila(k) = ia(i)
ljla(k) = ja(i)
lval(k) = val(i)
end if
end do
if (k /= nnl) write(0,*) name,' Wrong conversion?',k,nnl
call a%rmta%csput(nnl,lila,ljla,lval,1_psb_lpk_,desc_a%get_global_rows(),&
& 1_psb_lpk_,desc_a%get_global_rows(),info)
end if
end if
else
info = psb_err_invalid_a_and_cd_state_
call psb_errpush(info,name)
end if
9998 continue
!write(0,*) me,' after csput',psb_errstatus_fatal()
!$omp end parallel
else
call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.)
!write(0,*) me,' Before g2l_ins ',psb_errstatus_fatal()
if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,&
& mask=(ila(1:nz)>0))
!write(0,*) me,' after g2l_ins ',psb_errstatus_fatal(),info
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='psb_cdins',i_err=(/info/))
goto 9999
end if
nrow = desc_a%get_local_rows()
ncol = desc_a%get_local_cols()
!write(0,*) me,' Before csput',psb_errstatus_fatal()
if (a%is_bld()) then
call a%csput(nz,ila,jla,val,ione,nrow,ione,ncol,info)
if (info /= psb_success_) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='a%csput')
goto 9999
end if
if (a%is_remote_build()) then
nnl = count(ila(1:nz)<0)
if (nnl > 0) then
allocate(lila(nnl),ljla(nnl),lval(nnl))
k = 0
do i=1,nz
if (ila(i)<0) then
k=k+1
lila(k) = ia(i)
ljla(k) = ja(i)
lval(k) = val(i)
end if
end do
if (k /= nnl) write(0,*) name,' Wrong conversion?',k,nnl
call a%rmta%csput(nnl,lila,ljla,lval,1_psb_lpk_,desc_a%get_global_rows(),&
& 1_psb_lpk_,desc_a%get_global_rows(),info)
end if
end if
else
info = psb_err_invalid_a_and_cd_state_
call psb_errpush(info,name)
goto 9999
end if
end if
end block
#else
!write(0,*) me,' Before g2l ',psb_errstatus_fatal()
call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.)
call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.)
if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,&
& mask=(ila(1:nz)>0))
!write(0,*) me,' after g2l_ins ',psb_errstatus_fatal(),info
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='psb_cdins',i_err=(/info/))
@ -143,7 +252,7 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
end if
nrow = desc_a%get_local_rows()
ncol = desc_a%get_local_cols()
!write(0,*) me,' Before csput',psb_errstatus_fatal()
if (a%is_bld()) then
call a%csput(nz,ila,jla,val,ione,nrow,ione,ncol,info)
if (info /= psb_success_) then
@ -151,11 +260,10 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
call psb_errpush(info,name,a_err='a%csput')
goto 9999
end if
if (a%is_remote_build()) then
nnl = count(ila(1:nz)<0)
if (nnl > 0) then
!write(0,*) 'Check on insert ',nnl
allocate(lila(nnl),ljla(nnl),lval(nnl))
k = 0
do i=1,nz
@ -177,6 +285,8 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
call psb_errpush(info,name)
goto 9999
end if
#endif
if (info /= 0) goto 9999
endif
else if (desc_a%is_asb()) then
@ -189,9 +299,18 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
& a_err='allocate',i_err=(/info/))
goto 9999
end if
#if defined(OPENMP)
!$omp parallel private(ila,jla,nrow,ncol,nnl,k)
#endif
if (local_) then
#if defined(OPENMP)
!$omp workshare
#endif
ila(1:nz) = ia(1:nz)
jla(1:nz) = ja(1:nz)
#if defined(OPENMP)
!$omp end workshare
#endif
else
call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.)
if (info == 0) call desc_a%indxmap%g2l(ja(1:nz),jla(1:nz),info,&
@ -201,7 +320,7 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
if (info /= psb_success_) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='a%csput')
goto 9999
!goto 9999
end if
if (a%is_remote_build()) then
nnl = count(ila(1:nz)<0)
@ -220,8 +339,12 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
if (k /= nnl) write(0,*) name,' Wrong conversion?',k,nnl
call a%rmta%csput(nnl,lila,ljla,lval,1_psb_lpk_,desc_a%get_global_rows(),&
& 1_psb_lpk_,desc_a%get_global_rows(),info)
end if
end if
end if
#if defined(OPENMP)
!$omp end parallel
#endif
else
info = psb_err_invalid_cd_state_
call psb_errpush(info,name)

@ -0,0 +1,56 @@
INSTALLDIR=../..
INCDIR=$(INSTALLDIR)/include
MODDIR=$(INSTALLDIR)/modules/
include $(INCDIR)/Make.inc.psblas
#
# Libraries used
LIBDIR=$(INSTALLDIR)/lib
PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base
LDLIBS=$(PSBLDLIBS)
#
# Compilers and such
#
CCOPT= -g
FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG).
EXEDIR=./runs
all: runsd psb_tomp #psb_d_pde3d psb_s_pde3d psb_d_pde2d psb_s_pde2d
runsd:
(if test ! -d runs ; then mkdir runs; fi)
psb_tomp: psb_tomp.o
$(FLINK) psb_tomp.o -o psb_tomp $(PSBLAS_LIB) $(LDLIBS)
/bin/mv psb_tomp $(EXEDIR)
psb_d_pde3d: psb_d_pde3d.o
$(FLINK) psb_d_pde3d.o -o psb_d_pde3d $(PSBLAS_LIB) $(LDLIBS)
/bin/mv psb_d_pde3d $(EXEDIR)
psb_s_pde3d: psb_s_pde3d.o
$(FLINK) psb_s_pde3d.o -o psb_s_pde3d $(PSBLAS_LIB) $(LDLIBS)
/bin/mv psb_s_pde3d $(EXEDIR)
psb_d_pde2d: psb_d_pde2d.o
$(FLINK) psb_d_pde2d.o -o psb_d_pde2d $(PSBLAS_LIB) $(LDLIBS)
/bin/mv psb_d_pde2d $(EXEDIR)
psb_s_pde2d: psb_s_pde2d.o
$(FLINK) psb_s_pde2d.o -o psb_s_pde2d $(PSBLAS_LIB) $(LDLIBS)
/bin/mv psb_s_pde2d $(EXEDIR)
clean:
/bin/rm -f psb_tomp.o psb_d_pde3d.o psb_s_pde3d.o psb_d_pde2d.o psb_s_pde2d.o *$(.mod) \
$(EXEDIR)/psb_d_pde3d $(EXEDIR)/psb_s_pde3d $(EXEDIR)/psb_d_pde2d $(EXEDIR)/psb_s_pde2d
verycleanlib:
(cd ../..; make veryclean)
lib:
(cd ../../; make library)

File diff suppressed because it is too large Load Diff

@ -156,6 +156,9 @@ contains
& f,amold,vmold,imold,partition,nrl,iv)
use psb_base_mod
use psb_util_mod
#if defined(OPENMP)
use omp_lib
#endif
!
! Discretizes the partial differential equation
!
@ -192,7 +195,7 @@ contains
type(psb_d_coo_sparse_mat) :: acoo
type(psb_d_csr_sparse_mat) :: acsr
real(psb_dpk_) :: zt(nb),x,y,z
integer(psb_ipk_) :: nnz,nr,nlr,i,j,ii,ib,k, partition_
integer(psb_ipk_) :: nnz,nr,nlr,i,j,ii,ib,k, partition_, mysz
integer(psb_lpk_) :: m,n,glob_row,nt
integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner
! For 2D partition
@ -204,8 +207,7 @@ contains
! Process grid
integer(psb_ipk_) :: np, iam
integer(psb_ipk_) :: icoeff
integer(psb_lpk_), allocatable :: irow(:),icol(:),myidx(:)
real(psb_dpk_), allocatable :: val(:)
integer(psb_lpk_), allocatable :: myidx(:)
! deltah dimension of each grid cell
! deltat discretization time
real(psb_dpk_) :: deltah, sqdeltah, deltah2
@ -391,7 +393,6 @@ contains
end if
end block
case default
write(psb_err_unit,*) iam, 'Initialization error: should not get here'
info = -1
@ -418,94 +419,114 @@ contains
goto 9999
end if
! we build an auxiliary matrix consisting of one row at a
! time; just a small matrix. might be extended to generate
! a bunch of rows per call.
!
allocate(val(20*nb),irow(20*nb),&
&icol(20*nb),stat=info)
if (info /= psb_success_ ) then
info=psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
endif
! loop over rows belonging to current process in a block
! distribution.
call psb_barrier(ctxt)
t1 = psb_wtime()
do ii=1, nlr,nb
ib = min(nb,nlr-ii+1)
icoeff = 1
do k=1,ib
i=ii+k-1
! local matrix pointer
glob_row=myidx(i)
! compute gridpoint coordinates
call idx2ijk(ix,iy,glob_row,idim,idim)
! x, y coordinates
x = (ix-1)*deltah
y = (iy-1)*deltah
zt(k) = f_(x,y)
! internal point: build discretization
!
! term depending on (x-1,y)
!
val(icoeff) = -a1(x,y)/sqdeltah-b1(x,y)/deltah2
if (ix == 1) then
zt(k) = g(dzero,y)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix-1,iy,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y-1)
val(icoeff) = -a2(x,y)/sqdeltah-b2(x,y)/deltah2
if (iy == 1) then
zt(k) = g(x,dzero)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy-1,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y)
val(icoeff)=(2*done)*(a1(x,y) + a2(x,y))/sqdeltah + c(x,y)
call ijk2idx(icol(icoeff),ix,iy,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
! term depending on (x,y+1)
val(icoeff)=-a2(x,y)/sqdeltah+b2(x,y)/deltah2
if (iy == idim) then
zt(k) = g(x,done)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy+1,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x+1,y)
val(icoeff)=-a1(x,y)/sqdeltah+b1(x,y)/deltah2
if (ix==idim) then
zt(k) = g(done,y)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix+1,iy,idim,idim)
!$omp parallel shared(deltah,myidx,a,desc_a)
!
block
integer(psb_ipk_) :: i,j,k,ii,ib,icoeff, ix,iy, ith,nth
integer(psb_lpk_) :: glob_row
integer(psb_lpk_), allocatable :: irow(:),icol(:)
real(psb_dpk_), allocatable :: val(:)
real(psb_dpk_) :: x,y, zt(nb)
#if defined(OPENMP)
nth = omp_get_num_threads()
ith = omp_get_thread_num()
#else
nth = 1
ith = 0
#endif
allocate(val(20*nb),irow(20*nb),&
&icol(20*nb),stat=info)
if (info /= psb_success_ ) then
info=psb_err_alloc_dealloc_
call psb_errpush(info,name)
!goto 9999
endif
!$omp do schedule(dynamic)
!
do ii=1, nlr,nb
if(info /= psb_success_) cycle
ib = min(nb,nlr-ii+1)
icoeff = 1
do k=1,ib
i=ii+k-1
! local matrix pointer
glob_row=myidx(i)
! compute gridpoint coordinates
call idx2ijk(ix,iy,glob_row,idim,idim)
! x, y coordinates
x = (ix-1)*deltah
y = (iy-1)*deltah
zt(k) = f_(x,y)
! internal point: build discretization
!
! term depending on (x-1,y)
!
val(icoeff) = -a1(x,y)/sqdeltah-b1(x,y)/deltah2
if (ix == 1) then
zt(k) = g(dzero,y)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix-1,iy,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y-1)
val(icoeff) = -a2(x,y)/sqdeltah-b2(x,y)/deltah2
if (iy == 1) then
zt(k) = g(x,dzero)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy-1,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y)
val(icoeff)=(2*done)*(a1(x,y) + a2(x,y))/sqdeltah + c(x,y)
call ijk2idx(icol(icoeff),ix,iy,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y+1)
val(icoeff)=-a2(x,y)/sqdeltah+b2(x,y)/deltah2
if (iy == idim) then
zt(k) = g(x,done)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy+1,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x+1,y)
val(icoeff)=-a1(x,y)/sqdeltah+b1(x,y)/deltah2
if (ix==idim) then
zt(k) = g(done,y)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix+1,iy,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
end do
#if defined(OPENMP)
!!$ write(0,*) omp_get_thread_num(),' Check insertion ',&
!!$ & irow(1:icoeff-1),':',icol(1:icoeff-1)
#endif
call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info)
if(info /= psb_success_) cycle
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info)
if(info /= psb_success_) cycle
zt(:)=dzero
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
if(info /= psb_success_) cycle
end do
call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info)
if(info /= psb_success_) exit
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info)
if(info /= psb_success_) exit
zt(:)=dzero
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
if(info /= psb_success_) exit
end do
!$omp end do
deallocate(val,irow,icol)
end block
!$omp end parallel
tgen = psb_wtime()-t1
if(info /= psb_success_) then
info=psb_err_from_subroutine_
@ -514,8 +535,6 @@ contains
goto 9999
end if
deallocate(val,irow,icol)
call psb_barrier(ctxt)
t1 = psb_wtime()
call psb_cdasb(desc_a,info,mold=imold)
@ -579,6 +598,9 @@ program psb_d_pde2d
use psb_krylov_mod
use psb_util_mod
use psb_d_pde2d_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none
! input parameters
@ -600,7 +622,7 @@ program psb_d_pde2d
type(psb_d_vect_type) :: xxv,bv
! parallel environment
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: iam, np
integer(psb_ipk_) :: iam, np, nth
! solver parameters
integer(psb_ipk_) :: iter, itmax,itrace, istopc, irst, ipart
@ -625,7 +647,16 @@ program psb_d_pde2d
call psb_init(ctxt)
call psb_info(ctxt,iam,np)
#if defined(OPENMP)
!$OMP parallel shared(nth)
!$OMP master
nth = omp_get_num_threads()
!$OMP end master
!$OMP end parallel
#else
nth = 1
#endif
if (iam < 0) then
! This should not happen, but just in case
call psb_exit(ctxt)
@ -750,6 +781,8 @@ program psb_d_pde2d
if (iam == psb_root_) then
write(psb_out_unit,'(" ")')
write(psb_out_unit,'("Number of processes : ",i12)')np
write(psb_out_unit,'("Number of threads : ",i12)')nth
write(psb_out_unit,'("Total number of tasks : ",i12)')nth*np
write(psb_out_unit,'("Linear system size : ",i12)') system_size
write(psb_out_unit,'("Time to solve system : ",es12.5)')t2
write(psb_out_unit,'("Time per iteration : ",es12.5)')t2/iter
@ -790,7 +823,8 @@ contains
!
! get iteration parameters from standard input
!
subroutine get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ipart,parms)
subroutine get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,&
& itmax,itrace,irst,ipart,parms)
type(psb_ctxt_type) :: ctxt
character(len=*) :: kmethd, ptype, afmt
integer(psb_ipk_) :: idim, istopc,itmax,itrace,irst,ipart

@ -172,6 +172,9 @@ contains
& f,amold,vmold,imold,partition,nrl,iv)
use psb_base_mod
use psb_util_mod
#if defined(OPENMP)
use omp_lib
#endif
!
! Discretizes the partial differential equation
!
@ -220,8 +223,7 @@ contains
! Process grid
integer(psb_ipk_) :: np, iam
integer(psb_ipk_) :: icoeff
integer(psb_lpk_), allocatable :: irow(:),icol(:),myidx(:)
real(psb_dpk_), allocatable :: val(:)
integer(psb_lpk_), allocatable :: myidx(:)
! deltah dimension of each grid cell
! deltat discretization time
real(psb_dpk_) :: deltah, sqdeltah, deltah2
@ -377,7 +379,6 @@ contains
!
call psb_cdall(ctxt,desc_a,info,vl=myidx)
!
! Specify process topology
!
@ -447,69 +448,79 @@ contains
goto 9999
end if
! we build an auxiliary matrix consisting of one row at a
! time; just a small matrix. might be extended to generate
! a bunch of rows per call.
!
allocate(val(20*nb),irow(20*nb),&
&icol(20*nb),stat=info)
if (info /= psb_success_ ) then
info=psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
endif
! loop over rows belonging to current process in a block
! distribution.
call psb_barrier(ctxt)
t1 = psb_wtime()
do ii=1, nlr, nb
ib = min(nb,nlr-ii+1)
!ib = min(nb,mysz-ii+1)
icoeff = 1
do k=1,ib
i=ii+k-1
! local matrix pointer
glob_row=myidx(i)
! compute gridpoint coordinates
call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim)
! x, y, z coordinates
x = (ix-1)*deltah
y = (iy-1)*deltah
z = (iz-1)*deltah
zt(k) = f_(x,y,z)
! internal point: build discretization
!
! term depending on (x-1,y,z)
!
val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2
if (ix == 1) then
zt(k) = g(dzero,y,z)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y-1,z)
val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2
if (iy == 1) then
zt(k) = g(x,dzero,z)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y,z-1)
val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2
if (iz == 1) then
zt(k) = g(x,y,dzero)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
!$omp parallel shared(deltah,myidx,a,desc_a)
!
block
integer(psb_ipk_) :: i,j,k,ii,ib,icoeff, ix,iy,iz, ith,nth
integer(psb_lpk_) :: glob_row
integer(psb_lpk_), allocatable :: irow(:),icol(:)
real(psb_dpk_), allocatable :: val(:)
real(psb_dpk_) :: x,y,z, zt(nb)
#if defined(OPENMP)
nth = omp_get_num_threads()
ith = omp_get_thread_num()
#else
nth = 1
ith = 0
#endif
allocate(val(20*nb),irow(20*nb),&
&icol(20*nb),stat=info)
if (info /= psb_success_ ) then
info=psb_err_alloc_dealloc_
call psb_errpush(info,name)
!goto 9999
endif
!$omp do schedule(dynamic)
!
do ii=1, nlr, nb
if(info /= psb_success_) cycle
ib = min(nb,nlr-ii+1)
!ib = min(nb,mysz-ii+1)
icoeff = 1
do k=1,ib
i=ii+k-1
! local matrix pointer
glob_row=myidx(i)
! compute gridpoint coordinates
call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim)
! x, y, z coordinates
x = (ix-1)*deltah
y = (iy-1)*deltah
z = (iz-1)*deltah
zt(k) = f_(x,y,z)
! internal point: build discretization
!
! term depending on (x-1,y,z)
!
val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2
if (ix == 1) then
zt(k) = g(dzero,y,z)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y-1,z)
val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2
if (iy == 1) then
zt(k) = g(x,dzero,z)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y,z-1)
val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2
if (iz == 1) then
zt(k) = g(x,y,dzero)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y,z)
val(icoeff)=(2*done)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah &
@ -546,14 +557,22 @@ contains
endif
end do
call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info)
if(info /= psb_success_) exit
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info)
if(info /= psb_success_) exit
zt(:)=dzero
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
if(info /= psb_success_) exit
end do
#if defined(OPENMP)
!!$ write(0,*) omp_get_thread_num(),' Check insertion ',&
!!$ & irow(1:icoeff-1),':',icol(1:icoeff-1)
#endif
call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info)
if(info /= psb_success_) cycle
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info)
if(info /= psb_success_) cycle
zt(:)=dzero
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
if(info /= psb_success_) cycle
end do
!$omp end do
deallocate(val,irow,icol)
end block
!$omp end parallel
tgen = psb_wtime()-t1
if(info /= psb_success_) then
@ -569,113 +588,6 @@ contains
call psb_cdasb(desc_a,info,mold=imold)
tcdasb = psb_wtime()-t1
if (.false.) then
!
! Add extra rows to test remote build.
!
block
integer(psb_ipk_) :: ks, i
ks = desc_a%get_local_cols()-desc_a%get_local_rows()
if (ks > 0) ks = max(1,ks / 10)
mysz = nlr+ks
call psb_realloc(mysz,myidx,info)
do i=nlr+1, mysz
myidx(i) = i
end do
call desc_a%l2gv1(myidx(nlr+1:mysz),info)
!write(0,*) iam,' Check on extra nodes ',nlr,mysz,':',myidx(nlr+1:mysz)
do ii= nlr+1, mysz, nb
ib = min(nb,mysz-ii+1)
icoeff = 1
do k=1,ib
i=ii+k-1
! local matrix pointer
glob_row=myidx(i)
! compute gridpoint coordinates
call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim)
! x, y, z coordinates
x = (ix-1)*deltah
y = (iy-1)*deltah
z = (iz-1)*deltah
zt(k) = f_(x,y,z)
! internal point: build discretization
!
! term depending on (x-1,y,z)
!
val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2
if (ix == 1) then
zt(k) = g(dzero,y,z)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y-1,z)
val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2
if (iy == 1) then
zt(k) = g(x,dzero,z)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y,z-1)
val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2
if (iz == 1) then
zt(k) = g(x,y,dzero)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y,z)
val(icoeff)=(2*done)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah &
& + c(x,y,z)
call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
! term depending on (x,y,z+1)
val(icoeff)=-a3(x,y,z)/sqdeltah+b3(x,y,z)/deltah2
if (iz == idim) then
zt(k) = g(x,y,done)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y+1,z)
val(icoeff)=-a2(x,y,z)/sqdeltah+b2(x,y,z)/deltah2
if (iy == idim) then
zt(k) = g(x,done,z)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x+1,y,z)
val(icoeff)=-a1(x,y,z)/sqdeltah+b1(x,y,z)/deltah2
if (ix==idim) then
zt(k) = g(done,y,z)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
end do
call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info)
if(info /= psb_success_) exit
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info)
if(info /= psb_success_) exit
zt(:)=dzero
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
if(info /= psb_success_) exit
end do
end block
end if
call psb_barrier(ctxt)
t1 = psb_wtime()
if (info == psb_success_) then
@ -719,7 +631,7 @@ contains
write(psb_out_unit,'("-total time : ",es12.5)') ttot
end if
deallocate(val,irow,icol)
call psb_erractionrestore(err_act)
return
@ -744,6 +656,9 @@ program psb_d_pde3d
use psb_krylov_mod
use psb_util_mod
use psb_d_pde3d_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none
! input parameters
@ -765,7 +680,7 @@ program psb_d_pde3d
type(psb_d_vect_type) :: xxv,bv
! parallel environment
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: iam, np
integer(psb_ipk_) :: iam, np, nth
! solver parameters
integer(psb_ipk_) :: iter, itmax,itrace, istopc, irst, ipart
@ -790,7 +705,16 @@ program psb_d_pde3d
call psb_init(ctxt)
call psb_info(ctxt,iam,np)
#if defined(OPENMP)
!$OMP parallel shared(nth)
!$OMP master
nth = omp_get_num_threads()
!$OMP end master
!$OMP end parallel
#else
nth = 1
#endif
if (iam < 0) then
! This should not happen, but just in case
call psb_exit(ctxt)
@ -914,6 +838,8 @@ program psb_d_pde3d
if (iam == psb_root_) then
write(psb_out_unit,'(" ")')
write(psb_out_unit,'("Number of processes : ",i12)')np
write(psb_out_unit,'("Number of threads : ",i12)')nth
write(psb_out_unit,'("Total number of tasks : ",i12)')nth*np
write(psb_out_unit,'("Linear system size : ",i12)') system_size
write(psb_out_unit,'("Time to solve system : ",es12.5)')t2
write(psb_out_unit,'("Time per iteration : ",es12.5)')t2/iter

@ -156,6 +156,9 @@ contains
& f,amold,vmold,imold,partition,nrl,iv)
use psb_base_mod
use psb_util_mod
#if defined(OPENMP)
use omp_lib
#endif
!
! Discretizes the partial differential equation
!
@ -192,7 +195,7 @@ contains
type(psb_s_coo_sparse_mat) :: acoo
type(psb_s_csr_sparse_mat) :: acsr
real(psb_spk_) :: zt(nb),x,y,z
integer(psb_ipk_) :: nnz,nr,nlr,i,j,ii,ib,k, partition_
integer(psb_ipk_) :: nnz,nr,nlr,i,j,ii,ib,k, partition_, mysz
integer(psb_lpk_) :: m,n,glob_row,nt
integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner
! For 2D partition
@ -204,8 +207,7 @@ contains
! Process grid
integer(psb_ipk_) :: np, iam
integer(psb_ipk_) :: icoeff
integer(psb_lpk_), allocatable :: irow(:),icol(:),myidx(:)
real(psb_spk_), allocatable :: val(:)
integer(psb_lpk_), allocatable :: myidx(:)
! deltah dimension of each grid cell
! deltat discretization time
real(psb_spk_) :: deltah, sqdeltah, deltah2
@ -391,7 +393,6 @@ contains
end if
end block
case default
write(psb_err_unit,*) iam, 'Initialization error: should not get here'
info = -1
@ -418,94 +419,114 @@ contains
goto 9999
end if
! we build an auxiliary matrix consisting of one row at a
! time; just a small matrix. might be extended to generate
! a bunch of rows per call.
!
allocate(val(20*nb),irow(20*nb),&
&icol(20*nb),stat=info)
if (info /= psb_success_ ) then
info=psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
endif
! loop over rows belonging to current process in a block
! distribution.
call psb_barrier(ctxt)
t1 = psb_wtime()
do ii=1, nlr,nb
ib = min(nb,nlr-ii+1)
icoeff = 1
do k=1,ib
i=ii+k-1
! local matrix pointer
glob_row=myidx(i)
! compute gridpoint coordinates
call idx2ijk(ix,iy,glob_row,idim,idim)
! x, y coordinates
x = (ix-1)*deltah
y = (iy-1)*deltah
zt(k) = f_(x,y)
! internal point: build discretization
!
! term depending on (x-1,y)
!
val(icoeff) = -a1(x,y)/sqdeltah-b1(x,y)/deltah2
if (ix == 1) then
zt(k) = g(szero,y)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix-1,iy,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y-1)
val(icoeff) = -a2(x,y)/sqdeltah-b2(x,y)/deltah2
if (iy == 1) then
zt(k) = g(x,szero)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy-1,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y)
val(icoeff)=(2*sone)*(a1(x,y) + a2(x,y))/sqdeltah + c(x,y)
call ijk2idx(icol(icoeff),ix,iy,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
! term depending on (x,y+1)
val(icoeff)=-a2(x,y)/sqdeltah+b2(x,y)/deltah2
if (iy == idim) then
zt(k) = g(x,sone)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy+1,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x+1,y)
val(icoeff)=-a1(x,y)/sqdeltah+b1(x,y)/deltah2
if (ix==idim) then
zt(k) = g(sone,y)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix+1,iy,idim,idim)
!$omp parallel shared(deltah,myidx,a,desc_a)
!
block
integer(psb_ipk_) :: i,j,k,ii,ib,icoeff, ix,iy, ith,nth
integer(psb_lpk_) :: glob_row
integer(psb_lpk_), allocatable :: irow(:),icol(:)
real(psb_spk_), allocatable :: val(:)
real(psb_spk_) :: x,y, zt(nb)
#if defined(OPENMP)
nth = omp_get_num_threads()
ith = omp_get_thread_num()
#else
nth = 1
ith = 0
#endif
allocate(val(20*nb),irow(20*nb),&
&icol(20*nb),stat=info)
if (info /= psb_success_ ) then
info=psb_err_alloc_dealloc_
call psb_errpush(info,name)
!goto 9999
endif
!$omp do schedule(dynamic)
!
do ii=1, nlr,nb
if(info /= psb_success_) cycle
ib = min(nb,nlr-ii+1)
icoeff = 1
do k=1,ib
i=ii+k-1
! local matrix pointer
glob_row=myidx(i)
! compute gridpoint coordinates
call idx2ijk(ix,iy,glob_row,idim,idim)
! x, y coordinates
x = (ix-1)*deltah
y = (iy-1)*deltah
zt(k) = f_(x,y)
! internal point: build discretization
!
! term depending on (x-1,y)
!
val(icoeff) = -a1(x,y)/sqdeltah-b1(x,y)/deltah2
if (ix == 1) then
zt(k) = g(szero,y)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix-1,iy,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y-1)
val(icoeff) = -a2(x,y)/sqdeltah-b2(x,y)/deltah2
if (iy == 1) then
zt(k) = g(x,szero)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy-1,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y)
val(icoeff)=(2*sone)*(a1(x,y) + a2(x,y))/sqdeltah + c(x,y)
call ijk2idx(icol(icoeff),ix,iy,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y+1)
val(icoeff)=-a2(x,y)/sqdeltah+b2(x,y)/deltah2
if (iy == idim) then
zt(k) = g(x,sone)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy+1,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x+1,y)
val(icoeff)=-a1(x,y)/sqdeltah+b1(x,y)/deltah2
if (ix==idim) then
zt(k) = g(sone,y)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix+1,iy,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
end do
#if defined(OPENMP)
!!$ write(0,*) omp_get_thread_num(),' Check insertion ',&
!!$ & irow(1:icoeff-1),':',icol(1:icoeff-1)
#endif
call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info)
if(info /= psb_success_) cycle
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info)
if(info /= psb_success_) cycle
zt(:)=szero
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
if(info /= psb_success_) cycle
end do
call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info)
if(info /= psb_success_) exit
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info)
if(info /= psb_success_) exit
zt(:)=szero
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
if(info /= psb_success_) exit
end do
!$omp end do
deallocate(val,irow,icol)
end block
!$omp end parallel
tgen = psb_wtime()-t1
if(info /= psb_success_) then
info=psb_err_from_subroutine_
@ -514,8 +535,6 @@ contains
goto 9999
end if
deallocate(val,irow,icol)
call psb_barrier(ctxt)
t1 = psb_wtime()
call psb_cdasb(desc_a,info,mold=imold)
@ -579,6 +598,9 @@ program psb_s_pde2d
use psb_krylov_mod
use psb_util_mod
use psb_s_pde2d_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none
! input parameters
@ -600,7 +622,7 @@ program psb_s_pde2d
type(psb_s_vect_type) :: xxv,bv
! parallel environment
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: iam, np
integer(psb_ipk_) :: iam, np, nth
! solver parameters
integer(psb_ipk_) :: iter, itmax,itrace, istopc, irst, ipart
@ -625,7 +647,16 @@ program psb_s_pde2d
call psb_init(ctxt)
call psb_info(ctxt,iam,np)
#if defined(OPENMP)
!$OMP parallel shared(nth)
!$OMP master
nth = omp_get_num_threads()
!$OMP end master
!$OMP end parallel
#else
nth = 1
#endif
if (iam < 0) then
! This should not happen, but just in case
call psb_exit(ctxt)
@ -750,6 +781,8 @@ program psb_s_pde2d
if (iam == psb_root_) then
write(psb_out_unit,'(" ")')
write(psb_out_unit,'("Number of processes : ",i12)')np
write(psb_out_unit,'("Number of threads : ",i12)')nth
write(psb_out_unit,'("Total number of tasks : ",i12)')nth*np
write(psb_out_unit,'("Linear system size : ",i12)') system_size
write(psb_out_unit,'("Time to solve system : ",es12.5)')t2
write(psb_out_unit,'("Time per iteration : ",es12.5)')t2/iter
@ -790,7 +823,8 @@ contains
!
! get iteration parameters from standard input
!
subroutine get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ipart,parms)
subroutine get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,&
& itmax,itrace,irst,ipart,parms)
type(psb_ctxt_type) :: ctxt
character(len=*) :: kmethd, ptype, afmt
integer(psb_ipk_) :: idim, istopc,itmax,itrace,irst,ipart

@ -172,6 +172,9 @@ contains
& f,amold,vmold,imold,partition,nrl,iv)
use psb_base_mod
use psb_util_mod
#if defined(OPENMP)
use omp_lib
#endif
!
! Discretizes the partial differential equation
!
@ -220,8 +223,7 @@ contains
! Process grid
integer(psb_ipk_) :: np, iam
integer(psb_ipk_) :: icoeff
integer(psb_lpk_), allocatable :: irow(:),icol(:),myidx(:)
real(psb_spk_), allocatable :: val(:)
integer(psb_lpk_), allocatable :: myidx(:)
! deltah dimension of each grid cell
! deltat discretization time
real(psb_spk_) :: deltah, sqdeltah, deltah2
@ -377,7 +379,6 @@ contains
!
call psb_cdall(ctxt,desc_a,info,vl=myidx)
!
! Specify process topology
!
@ -447,69 +448,79 @@ contains
goto 9999
end if
! we build an auxiliary matrix consisting of one row at a
! time; just a small matrix. might be extended to generate
! a bunch of rows per call.
!
allocate(val(20*nb),irow(20*nb),&
&icol(20*nb),stat=info)
if (info /= psb_success_ ) then
info=psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
endif
! loop over rows belonging to current process in a block
! distribution.
call psb_barrier(ctxt)
t1 = psb_wtime()
do ii=1, nlr, nb
ib = min(nb,nlr-ii+1)
!ib = min(nb,mysz-ii+1)
icoeff = 1
do k=1,ib
i=ii+k-1
! local matrix pointer
glob_row=myidx(i)
! compute gridpoint coordinates
call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim)
! x, y, z coordinates
x = (ix-1)*deltah
y = (iy-1)*deltah
z = (iz-1)*deltah
zt(k) = f_(x,y,z)
! internal point: build discretization
!
! term depending on (x-1,y,z)
!
val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2
if (ix == 1) then
zt(k) = g(szero,y,z)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y-1,z)
val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2
if (iy == 1) then
zt(k) = g(x,szero,z)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y,z-1)
val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2
if (iz == 1) then
zt(k) = g(x,y,szero)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
!$omp parallel shared(deltah,myidx,a,desc_a)
!
block
integer(psb_ipk_) :: i,j,k,ii,ib,icoeff, ix,iy,iz, ith,nth
integer(psb_lpk_) :: glob_row
integer(psb_lpk_), allocatable :: irow(:),icol(:)
real(psb_spk_), allocatable :: val(:)
real(psb_spk_) :: x,y,z, zt(nb)
#if defined(OPENMP)
nth = omp_get_num_threads()
ith = omp_get_thread_num()
#else
nth = 1
ith = 0
#endif
allocate(val(20*nb),irow(20*nb),&
&icol(20*nb),stat=info)
if (info /= psb_success_ ) then
info=psb_err_alloc_dealloc_
call psb_errpush(info,name)
!goto 9999
endif
!$omp do schedule(dynamic)
!
do ii=1, nlr, nb
if(info /= psb_success_) cycle
ib = min(nb,nlr-ii+1)
!ib = min(nb,mysz-ii+1)
icoeff = 1
do k=1,ib
i=ii+k-1
! local matrix pointer
glob_row=myidx(i)
! compute gridpoint coordinates
call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim)
! x, y, z coordinates
x = (ix-1)*deltah
y = (iy-1)*deltah
z = (iz-1)*deltah
zt(k) = f_(x,y,z)
! internal point: build discretization
!
! term depending on (x-1,y,z)
!
val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2
if (ix == 1) then
zt(k) = g(szero,y,z)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y-1,z)
val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2
if (iy == 1) then
zt(k) = g(x,szero,z)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y,z-1)
val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2
if (iz == 1) then
zt(k) = g(x,y,szero)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y,z)
val(icoeff)=(2*sone)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah &
@ -546,14 +557,22 @@ contains
endif
end do
call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info)
if(info /= psb_success_) exit
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info)
if(info /= psb_success_) exit
zt(:)=szero
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
if(info /= psb_success_) exit
end do
#if defined(OPENMP)
!!$ write(0,*) omp_get_thread_num(),' Check insertion ',&
!!$ & irow(1:icoeff-1),':',icol(1:icoeff-1)
#endif
call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info)
if(info /= psb_success_) cycle
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info)
if(info /= psb_success_) cycle
zt(:)=szero
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
if(info /= psb_success_) cycle
end do
!$omp end do
deallocate(val,irow,icol)
end block
!$omp end parallel
tgen = psb_wtime()-t1
if(info /= psb_success_) then
@ -569,113 +588,6 @@ contains
call psb_cdasb(desc_a,info,mold=imold)
tcdasb = psb_wtime()-t1
if (.false.) then
!
! Add extra rows to test remote build.
!
block
integer(psb_ipk_) :: ks, i
ks = desc_a%get_local_cols()-desc_a%get_local_rows()
if (ks > 0) ks = max(1,ks / 10)
mysz = nlr+ks
call psb_realloc(mysz,myidx,info)
do i=nlr+1, mysz
myidx(i) = i
end do
call desc_a%l2gv1(myidx(nlr+1:mysz),info)
!write(0,*) iam,' Check on extra nodes ',nlr,mysz,':',myidx(nlr+1:mysz)
do ii= nlr+1, mysz, nb
ib = min(nb,mysz-ii+1)
icoeff = 1
do k=1,ib
i=ii+k-1
! local matrix pointer
glob_row=myidx(i)
! compute gridpoint coordinates
call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim)
! x, y, z coordinates
x = (ix-1)*deltah
y = (iy-1)*deltah
z = (iz-1)*deltah
zt(k) = f_(x,y,z)
! internal point: build discretization
!
! term depending on (x-1,y,z)
!
val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2
if (ix == 1) then
zt(k) = g(szero,y,z)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y-1,z)
val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2
if (iy == 1) then
zt(k) = g(x,szero,z)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y,z-1)
val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2
if (iz == 1) then
zt(k) = g(x,y,szero)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y,z)
val(icoeff)=(2*sone)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah &
& + c(x,y,z)
call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
! term depending on (x,y,z+1)
val(icoeff)=-a3(x,y,z)/sqdeltah+b3(x,y,z)/deltah2
if (iz == idim) then
zt(k) = g(x,y,sone)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x,y+1,z)
val(icoeff)=-a2(x,y,z)/sqdeltah+b2(x,y,z)/deltah2
if (iy == idim) then
zt(k) = g(x,sone,z)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
! term depending on (x+1,y,z)
val(icoeff)=-a1(x,y,z)/sqdeltah+b1(x,y,z)/deltah2
if (ix==idim) then
zt(k) = g(sone,y,z)*(-val(icoeff)) + zt(k)
else
call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim)
irow(icoeff) = glob_row
icoeff = icoeff+1
endif
end do
call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info)
if(info /= psb_success_) exit
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info)
if(info /= psb_success_) exit
zt(:)=szero
call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
if(info /= psb_success_) exit
end do
end block
end if
call psb_barrier(ctxt)
t1 = psb_wtime()
if (info == psb_success_) then
@ -719,7 +631,7 @@ contains
write(psb_out_unit,'("-total time : ",es12.5)') ttot
end if
deallocate(val,irow,icol)
call psb_erractionrestore(err_act)
return
@ -744,6 +656,9 @@ program psb_s_pde3d
use psb_krylov_mod
use psb_util_mod
use psb_s_pde3d_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none
! input parameters
@ -765,7 +680,7 @@ program psb_s_pde3d
type(psb_s_vect_type) :: xxv,bv
! parallel environment
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: iam, np
integer(psb_ipk_) :: iam, np, nth
! solver parameters
integer(psb_ipk_) :: iter, itmax,itrace, istopc, irst, ipart
@ -790,7 +705,16 @@ program psb_s_pde3d
call psb_init(ctxt)
call psb_info(ctxt,iam,np)
#if defined(OPENMP)
!$OMP parallel shared(nth)
!$OMP master
nth = omp_get_num_threads()
!$OMP end master
!$OMP end parallel
#else
nth = 1
#endif
if (iam < 0) then
! This should not happen, but just in case
call psb_exit(ctxt)
@ -914,6 +838,8 @@ program psb_s_pde3d
if (iam == psb_root_) then
write(psb_out_unit,'(" ")')
write(psb_out_unit,'("Number of processes : ",i12)')np
write(psb_out_unit,'("Number of threads : ",i12)')nth
write(psb_out_unit,'("Total number of tasks : ",i12)')nth*np
write(psb_out_unit,'("Linear system size : ",i12)') system_size
write(psb_out_unit,'("Time to solve system : ",es12.5)')t2
write(psb_out_unit,'("Time per iteration : ",es12.5)')t2/iter

@ -2,13 +2,13 @@
BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL RGMRES FCG CGR
BJAC Preconditioner NONE DIAG BJAC
CSR Storage format for matrix A: CSR COO
040 Domain size (acutal system is this**3 (pde3d) or **2 (pde2d) )
140 Domain size (acutal system is this**3 (pde3d) or **2 (pde2d) )
3 Partition: 1 BLOCK 3 3D
2 Stopping criterion 1 2
0100 MAXIT
0200 MAXIT
05 ITRACE
002 IRST restart for RGMRES and BiCGSTABL
ILU Block Solver ILU,ILUT,INVK,AINVT,AORTH
INVK Block Solver ILU,ILUT,INVK,AINVT,AORTH
NONE If ILU : MILU or NONE othewise ignored
NONE Scaling if ILUT: NONE, MAXVAL otherwise ignored
0 Level of fill for forward factorization

Loading…
Cancel
Save