merged dev-openmp into omp-walther

omp-walther
wlthr 1 year ago
commit 979a3da95f

@ -121,6 +121,8 @@ Salvatore Filippone
Contributors (roughly reverse cronological order): Contributors (roughly reverse cronological order):
Andea Di Iorio
Stefano Petrilli
Soren Rasmussen Soren Rasmussen
Zaak Beekman Zaak Beekman
Ambra Abdullahi Hassan Ambra Abdullahi Hassan

@ -123,7 +123,7 @@ contains
return return
endif endif
call psb_ensure_size(heap%last+1,heap%keys,info,addsz=(1_psb_ipk_)*psb_heap_resize) call psb_ensure_size(heap%last+1,heap%keys,info)
if (info /= psb_success_) then if (info /= psb_success_) then
write(psb_err_unit,*) 'Memory allocation failure in heap_insert' write(psb_err_unit,*) 'Memory allocation failure in heap_insert'
info = -5 info = -5
@ -236,9 +236,9 @@ contains
return return
endif endif
call psb_ensure_size(heap%last+1,heap%keys,info,addsz=(1_psb_ipk_)*psb_heap_resize) call psb_ensure_size(heap%last+1,heap%keys,info)
if (info == psb_success_) & if (info == psb_success_) &
& call psb_ensure_size(heap%last+1,heap%idxs,info,addsz=(1_psb_ipk_)*psb_heap_resize) & call psb_ensure_size(heap%last+1,heap%idxs,info)
if (info /= psb_success_) then if (info /= psb_success_) then
write(psb_err_unit,*) 'Memory allocation failure in heap_insert' write(psb_err_unit,*) 'Memory allocation failure in heap_insert'
info = -5 info = -5

@ -768,7 +768,7 @@ Contains
integer(psb_ipk_) :: info integer(psb_ipk_) :: info
! ...Local Variables ! ...Local Variables
integer(psb_ipk_) :: isz,err_act,lb integer(psb_ipk_) :: isz,err_act,lb, i
character(len=30) :: name, char_err character(len=30) :: name, char_err
logical, parameter :: debug=.false. logical, parameter :: debug=.false.
@ -790,7 +790,11 @@ Contains
call psb_errpush(info,name,a_err=char_err) call psb_errpush(info,name,a_err=char_err)
goto 9999 goto 9999
else else
vout(:) = vin(:) !$omp parallel do private(i)
do i=lb,lb+isz-1
vout(i) = vin(i)
end do
!$omp end parallel do
endif endif
endif endif
@ -836,7 +840,9 @@ Contains
call psb_errpush(info,name,a_err=char_err) call psb_errpush(info,name,a_err=char_err)
goto 9999 goto 9999
else else
!$omp workshare
vout(:,:) = vin(:,:) vout(:,:) = vin(:,:)
!$omp end workshare
endif endif
endif endif
@ -991,36 +997,17 @@ Contains
goto 9999 goto 9999
end if end if
!!$ If (len > psb_size(v)) Then isz = psb_size(v)
!!$ if (present(newsz)) then If (len > isz) Then
!!$ isz = (max(len+1,newsz))
!!$ else
!!$ if (present(addsz)) then
!!$ isz = len+max(1,addsz)
!!$ else
!!$ isz = max(len+10, int(1.25*len))
!!$ endif
!!$ endif
!!$
!!$ call psb_realloc(isz,v,info,pad=pad)
!!$ if (info /= psb_success_) then
!!$ info=psb_err_from_subroutine_
!!$ call psb_errpush(info,name,a_err='psb_realloc')
!!$ goto 9999
!!$ End If
!!$ end If
If (len > psb_size(v)) Then
#if defined(OPENMP) #if defined(OPENMP)
!$OMP CRITICAL !$OMP CRITICAL
if (len > psb_size(v)) then if (len > isz) then
if (present(newsz)) then if (present(newsz)) then
isz = (max(len+1,newsz)) isz = max(len+1,1,newsz)
else if (present(addsz)) then
isz = max(len,1,isz+addsz)
else else
if (present(addsz)) then isz = max(len,1,int(1.25*isz))
isz = len+max(1,addsz)
else
isz = max(len+10, int(1.25*len))
endif
endif endif
call psb_realloc(isz,v,info,pad=pad) call psb_realloc(isz,v,info,pad=pad)
@ -1033,17 +1020,18 @@ Contains
goto 9999 goto 9999
end if end if
#else #else
if (len > isz) then
if (present(newsz)) then if (present(newsz)) then
isz = (max(len+1,newsz)) isz = max(len+1,1,newsz)
else else if (present(addsz)) then
if (present(addsz)) then isz = max(len,1,isz+addsz)
isz = len+max(1,addsz)
else else
isz = max(len+10, int(1.25*len)) isz = max(len,1,int(1.25*isz))
endif
endif endif
call psb_realloc(isz,v,info,pad=pad) call psb_realloc(isz,v,info,pad=pad)
end if
if (info /= psb_success_) then if (info /= psb_success_) then
info=psb_err_from_subroutine_ info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='psb_realloc') call psb_errpush(info,name,a_err='psb_realloc')
@ -1085,16 +1073,14 @@ Contains
info=psb_err_from_subroutine_ info=psb_err_from_subroutine_
goto 9999 goto 9999
end if end if
isz = psb_size(v)
If (len > psb_size(v)) Then If (len > isz) Then
if (present(newsz)) then if (present(newsz)) then
isz = (max(len+1,newsz)) isz = max(len+1,1,newsz)
else else if (present(addsz)) then
if (present(addsz)) then isz = max(len,1,isz+addsz)
isz = len+max(1,addsz)
else else
isz = max(len+10, int(1.25*len)) isz = max(len,1,int(1.25*isz))
endif
endif endif
call psb_realloc(isz,v,info,pad=pad) call psb_realloc(isz,v,info,pad=pad)

@ -123,7 +123,7 @@ contains
return return
endif endif
call psb_ensure_size(heap%last+1,heap%keys,info,addsz=(1_psb_ipk_)*psb_heap_resize) call psb_ensure_size(heap%last+1,heap%keys,info)
if (info /= psb_success_) then if (info /= psb_success_) then
write(psb_err_unit,*) 'Memory allocation failure in heap_insert' write(psb_err_unit,*) 'Memory allocation failure in heap_insert'
info = -5 info = -5
@ -236,9 +236,9 @@ contains
return return
endif endif
call psb_ensure_size(heap%last+1,heap%keys,info,addsz=(1_psb_ipk_)*psb_heap_resize) call psb_ensure_size(heap%last+1,heap%keys,info)
if (info == psb_success_) & if (info == psb_success_) &
& call psb_ensure_size(heap%last+1,heap%idxs,info,addsz=(1_psb_ipk_)*psb_heap_resize) & call psb_ensure_size(heap%last+1,heap%idxs,info)
if (info /= psb_success_) then if (info /= psb_success_) then
write(psb_err_unit,*) 'Memory allocation failure in heap_insert' write(psb_err_unit,*) 'Memory allocation failure in heap_insert'
info = -5 info = -5

@ -768,7 +768,7 @@ Contains
integer(psb_ipk_) :: info integer(psb_ipk_) :: info
! ...Local Variables ! ...Local Variables
integer(psb_ipk_) :: isz,err_act,lb integer(psb_ipk_) :: isz,err_act,lb, i
character(len=30) :: name, char_err character(len=30) :: name, char_err
logical, parameter :: debug=.false. logical, parameter :: debug=.false.
@ -790,7 +790,11 @@ Contains
call psb_errpush(info,name,a_err=char_err) call psb_errpush(info,name,a_err=char_err)
goto 9999 goto 9999
else else
vout(:) = vin(:) !$omp parallel do private(i)
do i=lb,lb+isz-1
vout(i) = vin(i)
end do
!$omp end parallel do
endif endif
endif endif
@ -836,7 +840,9 @@ Contains
call psb_errpush(info,name,a_err=char_err) call psb_errpush(info,name,a_err=char_err)
goto 9999 goto 9999
else else
!$omp workshare
vout(:,:) = vin(:,:) vout(:,:) = vin(:,:)
!$omp end workshare
endif endif
endif endif
@ -991,36 +997,17 @@ Contains
goto 9999 goto 9999
end if end if
!!$ If (len > psb_size(v)) Then isz = psb_size(v)
!!$ if (present(newsz)) then If (len > isz) Then
!!$ isz = (max(len+1,newsz))
!!$ else
!!$ if (present(addsz)) then
!!$ isz = len+max(1,addsz)
!!$ else
!!$ isz = max(len+10, int(1.25*len))
!!$ endif
!!$ endif
!!$
!!$ call psb_realloc(isz,v,info,pad=pad)
!!$ if (info /= psb_success_) then
!!$ info=psb_err_from_subroutine_
!!$ call psb_errpush(info,name,a_err='psb_realloc')
!!$ goto 9999
!!$ End If
!!$ end If
If (len > psb_size(v)) Then
#if defined(OPENMP) #if defined(OPENMP)
!$OMP CRITICAL !$OMP CRITICAL
if (len > psb_size(v)) then if (len > isz) then
if (present(newsz)) then if (present(newsz)) then
isz = (max(len+1,newsz)) isz = max(len+1,1,newsz)
else if (present(addsz)) then
isz = max(len,1,isz+addsz)
else else
if (present(addsz)) then isz = max(len,1,int(1.25*isz))
isz = len+max(1,addsz)
else
isz = max(len+10, int(1.25*len))
endif
endif endif
call psb_realloc(isz,v,info,pad=pad) call psb_realloc(isz,v,info,pad=pad)
@ -1033,17 +1020,18 @@ Contains
goto 9999 goto 9999
end if end if
#else #else
if (len > isz) then
if (present(newsz)) then if (present(newsz)) then
isz = (max(len+1,newsz)) isz = max(len+1,1,newsz)
else else if (present(addsz)) then
if (present(addsz)) then isz = max(len,1,isz+addsz)
isz = len+max(1,addsz)
else else
isz = max(len+10, int(1.25*len)) isz = max(len,1,int(1.25*isz))
endif
endif endif
call psb_realloc(isz,v,info,pad=pad) call psb_realloc(isz,v,info,pad=pad)
end if
if (info /= psb_success_) then if (info /= psb_success_) then
info=psb_err_from_subroutine_ info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='psb_realloc') call psb_errpush(info,name,a_err='psb_realloc')
@ -1085,16 +1073,14 @@ Contains
info=psb_err_from_subroutine_ info=psb_err_from_subroutine_
goto 9999 goto 9999
end if end if
isz = psb_size(v)
If (len > psb_size(v)) Then If (len > isz) Then
if (present(newsz)) then if (present(newsz)) then
isz = (max(len+1,newsz)) isz = max(len+1,1,newsz)
else else if (present(addsz)) then
if (present(addsz)) then isz = max(len,1,isz+addsz)
isz = len+max(1,addsz)
else else
isz = max(len+10, int(1.25*len)) isz = max(len,1,int(1.25*isz))
endif
endif endif
call psb_realloc(isz,v,info,pad=pad) call psb_realloc(isz,v,info,pad=pad)

@ -768,7 +768,7 @@ Contains
integer(psb_ipk_) :: info integer(psb_ipk_) :: info
! ...Local Variables ! ...Local Variables
integer(psb_ipk_) :: isz,err_act,lb integer(psb_ipk_) :: isz,err_act,lb, i
character(len=30) :: name, char_err character(len=30) :: name, char_err
logical, parameter :: debug=.false. logical, parameter :: debug=.false.
@ -790,7 +790,11 @@ Contains
call psb_errpush(info,name,a_err=char_err) call psb_errpush(info,name,a_err=char_err)
goto 9999 goto 9999
else else
vout(:) = vin(:) !$omp parallel do private(i)
do i=lb,lb+isz-1
vout(i) = vin(i)
end do
!$omp end parallel do
endif endif
endif endif
@ -836,7 +840,9 @@ Contains
call psb_errpush(info,name,a_err=char_err) call psb_errpush(info,name,a_err=char_err)
goto 9999 goto 9999
else else
!$omp workshare
vout(:,:) = vin(:,:) vout(:,:) = vin(:,:)
!$omp end workshare
endif endif
endif endif
@ -991,36 +997,17 @@ Contains
goto 9999 goto 9999
end if end if
!!$ If (len > psb_size(v)) Then isz = psb_size(v)
!!$ if (present(newsz)) then If (len > isz) Then
!!$ isz = (max(len+1,newsz))
!!$ else
!!$ if (present(addsz)) then
!!$ isz = len+max(1,addsz)
!!$ else
!!$ isz = max(len+10, int(1.25*len))
!!$ endif
!!$ endif
!!$
!!$ call psb_realloc(isz,v,info,pad=pad)
!!$ if (info /= psb_success_) then
!!$ info=psb_err_from_subroutine_
!!$ call psb_errpush(info,name,a_err='psb_realloc')
!!$ goto 9999
!!$ End If
!!$ end If
If (len > psb_size(v)) Then
#if defined(OPENMP) #if defined(OPENMP)
!$OMP CRITICAL !$OMP CRITICAL
if (len > psb_size(v)) then if (len > isz) then
if (present(newsz)) then if (present(newsz)) then
isz = (max(len+1,newsz)) isz = max(len+1,1,newsz)
else if (present(addsz)) then
isz = max(len,1,isz+addsz)
else else
if (present(addsz)) then isz = max(len,1,int(1.25*isz))
isz = len+max(1,addsz)
else
isz = max(len+10, int(1.25*len))
endif
endif endif
call psb_realloc(isz,v,info,pad=pad) call psb_realloc(isz,v,info,pad=pad)
@ -1033,17 +1020,18 @@ Contains
goto 9999 goto 9999
end if end if
#else #else
if (len > isz) then
if (present(newsz)) then if (present(newsz)) then
isz = (max(len+1,newsz)) isz = max(len+1,1,newsz)
else else if (present(addsz)) then
if (present(addsz)) then isz = max(len,1,isz+addsz)
isz = len+max(1,addsz)
else else
isz = max(len+10, int(1.25*len)) isz = max(len,1,int(1.25*isz))
endif
endif endif
call psb_realloc(isz,v,info,pad=pad) call psb_realloc(isz,v,info,pad=pad)
end if
if (info /= psb_success_) then if (info /= psb_success_) then
info=psb_err_from_subroutine_ info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='psb_realloc') call psb_errpush(info,name,a_err='psb_realloc')
@ -1085,16 +1073,14 @@ Contains
info=psb_err_from_subroutine_ info=psb_err_from_subroutine_
goto 9999 goto 9999
end if end if
isz = psb_size(v)
If (len > psb_size(v)) Then If (len > isz) Then
if (present(newsz)) then if (present(newsz)) then
isz = (max(len+1,newsz)) isz = max(len+1,1,newsz)
else else if (present(addsz)) then
if (present(addsz)) then isz = max(len,1,isz+addsz)
isz = len+max(1,addsz)
else else
isz = max(len+10, int(1.25*len)) isz = max(len,1,int(1.25*isz))
endif
endif endif
call psb_realloc(isz,v,info,pad=pad) call psb_realloc(isz,v,info,pad=pad)

@ -768,7 +768,7 @@ Contains
integer(psb_ipk_) :: info integer(psb_ipk_) :: info
! ...Local Variables ! ...Local Variables
integer(psb_ipk_) :: isz,err_act,lb integer(psb_ipk_) :: isz,err_act,lb, i
character(len=30) :: name, char_err character(len=30) :: name, char_err
logical, parameter :: debug=.false. logical, parameter :: debug=.false.
@ -790,7 +790,11 @@ Contains
call psb_errpush(info,name,a_err=char_err) call psb_errpush(info,name,a_err=char_err)
goto 9999 goto 9999
else else
vout(:) = vin(:) !$omp parallel do private(i)
do i=lb,lb+isz-1
vout(i) = vin(i)
end do
!$omp end parallel do
endif endif
endif endif
@ -836,7 +840,9 @@ Contains
call psb_errpush(info,name,a_err=char_err) call psb_errpush(info,name,a_err=char_err)
goto 9999 goto 9999
else else
!$omp workshare
vout(:,:) = vin(:,:) vout(:,:) = vin(:,:)
!$omp end workshare
endif endif
endif endif
@ -991,36 +997,17 @@ Contains
goto 9999 goto 9999
end if end if
!!$ If (len > psb_size(v)) Then isz = psb_size(v)
!!$ if (present(newsz)) then If (len > isz) Then
!!$ isz = (max(len+1,newsz))
!!$ else
!!$ if (present(addsz)) then
!!$ isz = len+max(1,addsz)
!!$ else
!!$ isz = max(len+10, int(1.25*len))
!!$ endif
!!$ endif
!!$
!!$ call psb_realloc(isz,v,info,pad=pad)
!!$ if (info /= psb_success_) then
!!$ info=psb_err_from_subroutine_
!!$ call psb_errpush(info,name,a_err='psb_realloc')
!!$ goto 9999
!!$ End If
!!$ end If
If (len > psb_size(v)) Then
#if defined(OPENMP) #if defined(OPENMP)
!$OMP CRITICAL !$OMP CRITICAL
if (len > psb_size(v)) then if (len > isz) then
if (present(newsz)) then if (present(newsz)) then
isz = (max(len+1,newsz)) isz = max(len+1,1,newsz)
else if (present(addsz)) then
isz = max(len,1,isz+addsz)
else else
if (present(addsz)) then isz = max(len,1,int(1.25*isz))
isz = len+max(1,addsz)
else
isz = max(len+10, int(1.25*len))
endif
endif endif
call psb_realloc(isz,v,info,pad=pad) call psb_realloc(isz,v,info,pad=pad)
@ -1033,17 +1020,18 @@ Contains
goto 9999 goto 9999
end if end if
#else #else
if (len > isz) then
if (present(newsz)) then if (present(newsz)) then
isz = (max(len+1,newsz)) isz = max(len+1,1,newsz)
else else if (present(addsz)) then
if (present(addsz)) then isz = max(len,1,isz+addsz)
isz = len+max(1,addsz)
else else
isz = max(len+10, int(1.25*len)) isz = max(len,1,int(1.25*isz))
endif
endif endif
call psb_realloc(isz,v,info,pad=pad) call psb_realloc(isz,v,info,pad=pad)
end if
if (info /= psb_success_) then if (info /= psb_success_) then
info=psb_err_from_subroutine_ info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='psb_realloc') call psb_errpush(info,name,a_err='psb_realloc')
@ -1085,16 +1073,14 @@ Contains
info=psb_err_from_subroutine_ info=psb_err_from_subroutine_
goto 9999 goto 9999
end if end if
isz = psb_size(v)
If (len > psb_size(v)) Then If (len > isz) Then
if (present(newsz)) then if (present(newsz)) then
isz = (max(len+1,newsz)) isz = max(len+1,1,newsz)
else else if (present(addsz)) then
if (present(addsz)) then isz = max(len,1,isz+addsz)
isz = len+max(1,addsz)
else else
isz = max(len+10, int(1.25*len)) isz = max(len,1,int(1.25*isz))
endif
endif endif
call psb_realloc(isz,v,info,pad=pad) call psb_realloc(isz,v,info,pad=pad)

@ -124,7 +124,7 @@ contains
return return
endif endif
call psb_ensure_size(heap%last+1,heap%keys,info,addsz=(1_psb_ipk_)*psb_heap_resize) call psb_ensure_size(heap%last+1,heap%keys,info)
if (info /= psb_success_) then if (info /= psb_success_) then
write(psb_err_unit,*) 'Memory allocation failure in heap_insert' write(psb_err_unit,*) 'Memory allocation failure in heap_insert'
info = -5 info = -5
@ -237,9 +237,9 @@ contains
return return
endif endif
call psb_ensure_size(heap%last+1,heap%keys,info,addsz=(1_psb_ipk_)*psb_heap_resize) call psb_ensure_size(heap%last+1,heap%keys,info)
if (info == psb_success_) & if (info == psb_success_) &
& call psb_ensure_size(heap%last+1,heap%idxs,info,addsz=(1_psb_ipk_)*psb_heap_resize) & call psb_ensure_size(heap%last+1,heap%idxs,info)
if (info /= psb_success_) then if (info /= psb_success_) then
write(psb_err_unit,*) 'Memory allocation failure in heap_insert' write(psb_err_unit,*) 'Memory allocation failure in heap_insert'
info = -5 info = -5

@ -124,7 +124,7 @@ contains
return return
endif endif
call psb_ensure_size(heap%last+1,heap%keys,info,addsz=(1_psb_lpk_)*psb_heap_resize) call psb_ensure_size(heap%last+1,heap%keys,info)
if (info /= psb_success_) then if (info /= psb_success_) then
write(psb_err_unit,*) 'Memory allocation failure in heap_insert' write(psb_err_unit,*) 'Memory allocation failure in heap_insert'
info = -5 info = -5
@ -237,9 +237,9 @@ contains
return return
endif endif
call psb_ensure_size(heap%last+1,heap%keys,info,addsz=(1_psb_lpk_)*psb_heap_resize) call psb_ensure_size(heap%last+1,heap%keys,info)
if (info == psb_success_) & if (info == psb_success_) &
& call psb_ensure_size(heap%last+1,heap%idxs,info,addsz=(1_psb_lpk_)*psb_heap_resize) & call psb_ensure_size(heap%last+1,heap%idxs,info)
if (info /= psb_success_) then if (info /= psb_success_) then
write(psb_err_unit,*) 'Memory allocation failure in heap_insert' write(psb_err_unit,*) 'Memory allocation failure in heap_insert'
info = -5 info = -5

@ -768,7 +768,7 @@ Contains
integer(psb_ipk_) :: info integer(psb_ipk_) :: info
! ...Local Variables ! ...Local Variables
integer(psb_ipk_) :: isz,err_act,lb integer(psb_ipk_) :: isz,err_act,lb, i
character(len=30) :: name, char_err character(len=30) :: name, char_err
logical, parameter :: debug=.false. logical, parameter :: debug=.false.
@ -790,7 +790,11 @@ Contains
call psb_errpush(info,name,a_err=char_err) call psb_errpush(info,name,a_err=char_err)
goto 9999 goto 9999
else else
vout(:) = vin(:) !$omp parallel do private(i)
do i=lb,lb+isz-1
vout(i) = vin(i)
end do
!$omp end parallel do
endif endif
endif endif
@ -836,7 +840,9 @@ Contains
call psb_errpush(info,name,a_err=char_err) call psb_errpush(info,name,a_err=char_err)
goto 9999 goto 9999
else else
!$omp workshare
vout(:,:) = vin(:,:) vout(:,:) = vin(:,:)
!$omp end workshare
endif endif
endif endif
@ -991,36 +997,17 @@ Contains
goto 9999 goto 9999
end if end if
!!$ If (len > psb_size(v)) Then isz = psb_size(v)
!!$ if (present(newsz)) then If (len > isz) Then
!!$ isz = (max(len+1,newsz))
!!$ else
!!$ if (present(addsz)) then
!!$ isz = len+max(1,addsz)
!!$ else
!!$ isz = max(len+10, int(1.25*len))
!!$ endif
!!$ endif
!!$
!!$ call psb_realloc(isz,v,info,pad=pad)
!!$ if (info /= psb_success_) then
!!$ info=psb_err_from_subroutine_
!!$ call psb_errpush(info,name,a_err='psb_realloc')
!!$ goto 9999
!!$ End If
!!$ end If
If (len > psb_size(v)) Then
#if defined(OPENMP) #if defined(OPENMP)
!$OMP CRITICAL !$OMP CRITICAL
if (len > psb_size(v)) then if (len > isz) then
if (present(newsz)) then if (present(newsz)) then
isz = (max(len+1,newsz)) isz = max(len+1,1,newsz)
else if (present(addsz)) then
isz = max(len,1,isz+addsz)
else else
if (present(addsz)) then isz = max(len,1,int(1.25*isz))
isz = len+max(1,addsz)
else
isz = max(len+10, int(1.25*len))
endif
endif endif
call psb_realloc(isz,v,info,pad=pad) call psb_realloc(isz,v,info,pad=pad)
@ -1033,17 +1020,18 @@ Contains
goto 9999 goto 9999
end if end if
#else #else
if (len > isz) then
if (present(newsz)) then if (present(newsz)) then
isz = (max(len+1,newsz)) isz = max(len+1,1,newsz)
else else if (present(addsz)) then
if (present(addsz)) then isz = max(len,1,isz+addsz)
isz = len+max(1,addsz)
else else
isz = max(len+10, int(1.25*len)) isz = max(len,1,int(1.25*isz))
endif
endif endif
call psb_realloc(isz,v,info,pad=pad) call psb_realloc(isz,v,info,pad=pad)
end if
if (info /= psb_success_) then if (info /= psb_success_) then
info=psb_err_from_subroutine_ info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='psb_realloc') call psb_errpush(info,name,a_err='psb_realloc')
@ -1085,16 +1073,14 @@ Contains
info=psb_err_from_subroutine_ info=psb_err_from_subroutine_
goto 9999 goto 9999
end if end if
isz = psb_size(v)
If (len > psb_size(v)) Then If (len > isz) Then
if (present(newsz)) then if (present(newsz)) then
isz = (max(len+1,newsz)) isz = max(len+1,1,newsz)
else else if (present(addsz)) then
if (present(addsz)) then isz = max(len,1,isz+addsz)
isz = len+max(1,addsz)
else else
isz = max(len+10, int(1.25*len)) isz = max(len,1,int(1.25*isz))
endif
endif endif
call psb_realloc(isz,v,info,pad=pad) call psb_realloc(isz,v,info,pad=pad)

@ -123,7 +123,7 @@ contains
return return
endif endif
call psb_ensure_size(heap%last+1,heap%keys,info,addsz=(1_psb_ipk_)*psb_heap_resize) call psb_ensure_size(heap%last+1,heap%keys,info)
if (info /= psb_success_) then if (info /= psb_success_) then
write(psb_err_unit,*) 'Memory allocation failure in heap_insert' write(psb_err_unit,*) 'Memory allocation failure in heap_insert'
info = -5 info = -5
@ -236,9 +236,9 @@ contains
return return
endif endif
call psb_ensure_size(heap%last+1,heap%keys,info,addsz=(1_psb_ipk_)*psb_heap_resize) call psb_ensure_size(heap%last+1,heap%keys,info)
if (info == psb_success_) & if (info == psb_success_) &
& call psb_ensure_size(heap%last+1,heap%idxs,info,addsz=(1_psb_ipk_)*psb_heap_resize) & call psb_ensure_size(heap%last+1,heap%idxs,info)
if (info /= psb_success_) then if (info /= psb_success_) then
write(psb_err_unit,*) 'Memory allocation failure in heap_insert' write(psb_err_unit,*) 'Memory allocation failure in heap_insert'
info = -5 info = -5

@ -768,7 +768,7 @@ Contains
integer(psb_ipk_) :: info integer(psb_ipk_) :: info
! ...Local Variables ! ...Local Variables
integer(psb_ipk_) :: isz,err_act,lb integer(psb_ipk_) :: isz,err_act,lb, i
character(len=30) :: name, char_err character(len=30) :: name, char_err
logical, parameter :: debug=.false. logical, parameter :: debug=.false.
@ -790,7 +790,11 @@ Contains
call psb_errpush(info,name,a_err=char_err) call psb_errpush(info,name,a_err=char_err)
goto 9999 goto 9999
else else
vout(:) = vin(:) !$omp parallel do private(i)
do i=lb,lb+isz-1
vout(i) = vin(i)
end do
!$omp end parallel do
endif endif
endif endif
@ -836,7 +840,9 @@ Contains
call psb_errpush(info,name,a_err=char_err) call psb_errpush(info,name,a_err=char_err)
goto 9999 goto 9999
else else
!$omp workshare
vout(:,:) = vin(:,:) vout(:,:) = vin(:,:)
!$omp end workshare
endif endif
endif endif
@ -991,36 +997,17 @@ Contains
goto 9999 goto 9999
end if end if
!!$ If (len > psb_size(v)) Then isz = psb_size(v)
!!$ if (present(newsz)) then If (len > isz) Then
!!$ isz = (max(len+1,newsz))
!!$ else
!!$ if (present(addsz)) then
!!$ isz = len+max(1,addsz)
!!$ else
!!$ isz = max(len+10, int(1.25*len))
!!$ endif
!!$ endif
!!$
!!$ call psb_realloc(isz,v,info,pad=pad)
!!$ if (info /= psb_success_) then
!!$ info=psb_err_from_subroutine_
!!$ call psb_errpush(info,name,a_err='psb_realloc')
!!$ goto 9999
!!$ End If
!!$ end If
If (len > psb_size(v)) Then
#if defined(OPENMP) #if defined(OPENMP)
!$OMP CRITICAL !$OMP CRITICAL
if (len > psb_size(v)) then if (len > isz) then
if (present(newsz)) then if (present(newsz)) then
isz = (max(len+1,newsz)) isz = max(len+1,1,newsz)
else if (present(addsz)) then
isz = max(len,1,isz+addsz)
else else
if (present(addsz)) then isz = max(len,1,int(1.25*isz))
isz = len+max(1,addsz)
else
isz = max(len+10, int(1.25*len))
endif
endif endif
call psb_realloc(isz,v,info,pad=pad) call psb_realloc(isz,v,info,pad=pad)
@ -1033,17 +1020,18 @@ Contains
goto 9999 goto 9999
end if end if
#else #else
if (len > isz) then
if (present(newsz)) then if (present(newsz)) then
isz = (max(len+1,newsz)) isz = max(len+1,1,newsz)
else else if (present(addsz)) then
if (present(addsz)) then isz = max(len,1,isz+addsz)
isz = len+max(1,addsz)
else else
isz = max(len+10, int(1.25*len)) isz = max(len,1,int(1.25*isz))
endif
endif endif
call psb_realloc(isz,v,info,pad=pad) call psb_realloc(isz,v,info,pad=pad)
end if
if (info /= psb_success_) then if (info /= psb_success_) then
info=psb_err_from_subroutine_ info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='psb_realloc') call psb_errpush(info,name,a_err='psb_realloc')
@ -1085,16 +1073,14 @@ Contains
info=psb_err_from_subroutine_ info=psb_err_from_subroutine_
goto 9999 goto 9999
end if end if
isz = psb_size(v)
If (len > psb_size(v)) Then If (len > isz) Then
if (present(newsz)) then if (present(newsz)) then
isz = (max(len+1,newsz)) isz = max(len+1,1,newsz)
else else if (present(addsz)) then
if (present(addsz)) then isz = max(len,1,isz+addsz)
isz = len+max(1,addsz)
else else
isz = max(len+10, int(1.25*len)) isz = max(len,1,int(1.25*isz))
endif
endif endif
call psb_realloc(isz,v,info,pad=pad) call psb_realloc(isz,v,info,pad=pad)

@ -123,7 +123,7 @@ contains
return return
endif endif
call psb_ensure_size(heap%last+1,heap%keys,info,addsz=(1_psb_ipk_)*psb_heap_resize) call psb_ensure_size(heap%last+1,heap%keys,info)
if (info /= psb_success_) then if (info /= psb_success_) then
write(psb_err_unit,*) 'Memory allocation failure in heap_insert' write(psb_err_unit,*) 'Memory allocation failure in heap_insert'
info = -5 info = -5
@ -236,9 +236,9 @@ contains
return return
endif endif
call psb_ensure_size(heap%last+1,heap%keys,info,addsz=(1_psb_ipk_)*psb_heap_resize) call psb_ensure_size(heap%last+1,heap%keys,info)
if (info == psb_success_) & if (info == psb_success_) &
& call psb_ensure_size(heap%last+1,heap%idxs,info,addsz=(1_psb_ipk_)*psb_heap_resize) & call psb_ensure_size(heap%last+1,heap%idxs,info)
if (info /= psb_success_) then if (info /= psb_success_) then
write(psb_err_unit,*) 'Memory allocation failure in heap_insert' write(psb_err_unit,*) 'Memory allocation failure in heap_insert'
info = -5 info = -5

@ -768,7 +768,7 @@ Contains
integer(psb_ipk_) :: info integer(psb_ipk_) :: info
! ...Local Variables ! ...Local Variables
integer(psb_ipk_) :: isz,err_act,lb integer(psb_ipk_) :: isz,err_act,lb, i
character(len=30) :: name, char_err character(len=30) :: name, char_err
logical, parameter :: debug=.false. logical, parameter :: debug=.false.
@ -790,7 +790,11 @@ Contains
call psb_errpush(info,name,a_err=char_err) call psb_errpush(info,name,a_err=char_err)
goto 9999 goto 9999
else else
vout(:) = vin(:) !$omp parallel do private(i)
do i=lb,lb+isz-1
vout(i) = vin(i)
end do
!$omp end parallel do
endif endif
endif endif
@ -836,7 +840,9 @@ Contains
call psb_errpush(info,name,a_err=char_err) call psb_errpush(info,name,a_err=char_err)
goto 9999 goto 9999
else else
!$omp workshare
vout(:,:) = vin(:,:) vout(:,:) = vin(:,:)
!$omp end workshare
endif endif
endif endif
@ -991,36 +997,17 @@ Contains
goto 9999 goto 9999
end if end if
!!$ If (len > psb_size(v)) Then isz = psb_size(v)
!!$ if (present(newsz)) then If (len > isz) Then
!!$ isz = (max(len+1,newsz))
!!$ else
!!$ if (present(addsz)) then
!!$ isz = len+max(1,addsz)
!!$ else
!!$ isz = max(len+10, int(1.25*len))
!!$ endif
!!$ endif
!!$
!!$ call psb_realloc(isz,v,info,pad=pad)
!!$ if (info /= psb_success_) then
!!$ info=psb_err_from_subroutine_
!!$ call psb_errpush(info,name,a_err='psb_realloc')
!!$ goto 9999
!!$ End If
!!$ end If
If (len > psb_size(v)) Then
#if defined(OPENMP) #if defined(OPENMP)
!$OMP CRITICAL !$OMP CRITICAL
if (len > psb_size(v)) then if (len > isz) then
if (present(newsz)) then if (present(newsz)) then
isz = (max(len+1,newsz)) isz = max(len+1,1,newsz)
else if (present(addsz)) then
isz = max(len,1,isz+addsz)
else else
if (present(addsz)) then isz = max(len,1,int(1.25*isz))
isz = len+max(1,addsz)
else
isz = max(len+10, int(1.25*len))
endif
endif endif
call psb_realloc(isz,v,info,pad=pad) call psb_realloc(isz,v,info,pad=pad)
@ -1033,17 +1020,18 @@ Contains
goto 9999 goto 9999
end if end if
#else #else
if (len > isz) then
if (present(newsz)) then if (present(newsz)) then
isz = (max(len+1,newsz)) isz = max(len+1,1,newsz)
else else if (present(addsz)) then
if (present(addsz)) then isz = max(len,1,isz+addsz)
isz = len+max(1,addsz)
else else
isz = max(len+10, int(1.25*len)) isz = max(len,1,int(1.25*isz))
endif
endif endif
call psb_realloc(isz,v,info,pad=pad) call psb_realloc(isz,v,info,pad=pad)
end if
if (info /= psb_success_) then if (info /= psb_success_) then
info=psb_err_from_subroutine_ info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='psb_realloc') call psb_errpush(info,name,a_err='psb_realloc')
@ -1085,16 +1073,14 @@ Contains
info=psb_err_from_subroutine_ info=psb_err_from_subroutine_
goto 9999 goto 9999
end if end if
isz = psb_size(v)
If (len > psb_size(v)) Then If (len > isz) Then
if (present(newsz)) then if (present(newsz)) then
isz = (max(len+1,newsz)) isz = max(len+1,1,newsz)
else else if (present(addsz)) then
if (present(addsz)) then isz = max(len,1,isz+addsz)
isz = len+max(1,addsz)
else else
isz = max(len+10, int(1.25*len)) isz = max(len,1,int(1.25*isz))
endif
endif endif
call psb_realloc(isz,v,info,pad=pad) call psb_realloc(isz,v,info,pad=pad)

@ -652,7 +652,7 @@ contains
integer(psb_ipk_) :: me, np integer(psb_ipk_) :: me, np
character(len=20) :: name,ch_err character(len=20) :: name,ch_err
logical, allocatable :: mask_(:) logical, allocatable :: mask_(:)
logical :: use_openmp = .true. !!$ logical :: use_openmp = .true.
#ifdef OPENMP #ifdef OPENMP
integer(kind = OMP_lock_kind) :: ins_lck integer(kind = OMP_lock_kind) :: ins_lck
#endif #endif
@ -683,7 +683,6 @@ contains
mglob = idxmap%get_gr() mglob = idxmap%get_gr()
nrow = idxmap%get_lr() nrow = idxmap%get_lr()
!write(0,*) me,name,' before loop ',psb_errstatus_fatal() !write(0,*) me,name,' before loop ',psb_errstatus_fatal()
if (use_openmp) then
#ifdef OPENMP #ifdef OPENMP
!call OMP_init_lock(ins_lck) !call OMP_init_lock(ins_lck)
@ -1049,13 +1048,8 @@ contains
info = -1 info = -1
end if end if
!call OMP_destroy_lock(ins_lck) !call OMP_destroy_lock(ins_lck)
#endif #else
else if (.not.use_openmp) then !!$ else if (.not.use_openmp) then
#ifdef OPENMP
! $ omp parallel
! $ omp critical
!write(0,*) 'In cnv: ',omp_get_num_threads()
#endif
isLoopValid = .true. isLoopValid = .true.
if (idxmap%is_bld()) then if (idxmap%is_bld()) then
@ -1253,13 +1247,8 @@ contains
idx = -1 idx = -1
info = -1 info = -1
end if end if
#ifdef OPENMP
! $ omp end critical
! $ omp end parallel
#endif
if (.not. isLoopValid) goto 9999 if (.not. isLoopValid) goto 9999
end if #endif
!write(0,*) me,name,' after loop ',psb_errstatus_fatal() !write(0,*) me,name,' after loop ',psb_errstatus_fatal()
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
return return

@ -168,6 +168,8 @@ module psb_c_base_mat_mod
procedure, pass(a) :: reallocate_nz => psb_c_coo_reallocate_nz procedure, pass(a) :: reallocate_nz => psb_c_coo_reallocate_nz
procedure, pass(a) :: allocate_mnnz => psb_c_coo_allocate_mnnz procedure, pass(a) :: allocate_mnnz => psb_c_coo_allocate_mnnz
procedure, pass(a) :: ensure_size => psb_c_coo_ensure_size procedure, pass(a) :: ensure_size => psb_c_coo_ensure_size
procedure, pass(a) :: tril => psb_c_coo_tril
procedure, pass(a) :: triu => psb_c_coo_triu
procedure, pass(a) :: cp_to_coo => psb_c_cp_coo_to_coo procedure, pass(a) :: cp_to_coo => psb_c_cp_coo_to_coo
procedure, pass(a) :: cp_from_coo => psb_c_cp_coo_from_coo procedure, pass(a) :: cp_from_coo => psb_c_cp_coo_from_coo
procedure, pass(a) :: cp_to_fmt => psb_c_cp_coo_to_fmt procedure, pass(a) :: cp_to_fmt => psb_c_cp_coo_to_fmt
@ -1894,6 +1896,93 @@ module psb_c_base_mat_mod
integer(psb_ipk_), intent(in), optional :: idir integer(psb_ipk_), intent(in), optional :: idir
end subroutine psb_c_fix_coo end subroutine psb_c_fix_coo
end interface end interface
!
!> Function tril:
!! \memberof psb_c_coo_sparse_mat
!! \brief Copy the lower triangle, i.e. all entries
!! A(I,J) such that J-I <= DIAG
!! default value is DIAG=0, i.e. lower triangle up to
!! the main diagonal.
!! DIAG=-1 means copy the strictly lower triangle
!! DIAG= 1 means copy the lower triangle plus the first diagonal
!! of the upper triangle.
!! Moreover, apply a clipping by copying entries A(I,J) only if
!! IMIN<=I<=IMAX
!! JMIN<=J<=JMAX
!!
!! \param l the output (sub)matrix
!! \param info return code
!! \param diag [0] the last diagonal (J-I) to be considered.
!! \param imin [1] the minimum row index we are interested in
!! \param imax [a\%get_nrows()] the minimum row index we are interested in
!! \param jmin [1] minimum col index
!! \param jmax [a\%get_ncols()] maximum col index
!! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:))
!! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1]
!! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1]
!! ( iren cannot be specified with rscale/cscale)
!! \param append [false] append to ia,ja
!! \param nzin [none] if append, then first new entry should go in entry nzin+1
!! \param u [none] copy of the complementary triangle
!!
!
interface
subroutine psb_c_coo_tril(a,l,info,diag,imin,imax,&
& jmin,jmax,rscale,cscale,u)
import
class(psb_c_coo_sparse_mat), intent(in) :: a
class(psb_c_coo_sparse_mat), intent(out) :: l
integer(psb_ipk_),intent(out) :: info
integer(psb_ipk_), intent(in), optional :: diag,imin,imax,jmin,jmax
logical, intent(in), optional :: rscale,cscale
class(psb_c_coo_sparse_mat), optional, intent(out) :: u
end subroutine psb_c_coo_tril
end interface
!
!> Function triu:
!! \memberof psb_c_coo_sparse_mat
!! \brief Copy the upper triangle, i.e. all entries
!! A(I,J) such that DIAG <= J-I
!! default value is DIAG=0, i.e. upper triangle from
!! the main diagonal up.
!! DIAG= 1 means copy the strictly upper triangle
!! DIAG=-1 means copy the upper triangle plus the first diagonal
!! of the lower triangle.
!! Moreover, apply a clipping by copying entries A(I,J) only if
!! IMIN<=I<=IMAX
!! JMIN<=J<=JMAX
!! Optionally copies the lower triangle at the same time
!!
!! \param u the output (sub)matrix
!! \param info return code
!! \param diag [0] the last diagonal (J-I) to be considered.
!! \param imin [1] the minimum row index we are interested in
!! \param imax [a\%get_nrows()] the minimum row index we are interested in
!! \param jmin [1] minimum col index
!! \param jmax [a\%get_ncols()] maximum col index
!! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:))
!! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1]
!! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1]
!! ( iren cannot be specified with rscale/cscale)
!! \param append [false] append to ia,ja
!! \param nzin [none] if append, then first new entry should go in entry nzin+1
!! \param l [none] copy of the complementary triangle
!!
!
interface
subroutine psb_c_coo_triu(a,u,info,diag,imin,imax,&
& jmin,jmax,rscale,cscale,l)
import
class(psb_c_coo_sparse_mat), intent(in) :: a
class(psb_c_coo_sparse_mat), intent(out) :: u
integer(psb_ipk_),intent(out) :: info
integer(psb_ipk_), intent(in), optional :: diag,imin,imax,jmin,jmax
logical, intent(in), optional :: rscale,cscale
class(psb_c_coo_sparse_mat), optional, intent(out) :: l
end subroutine psb_c_coo_triu
end interface
!> !>
!! \memberof psb_c_coo_sparse_mat !! \memberof psb_c_coo_sparse_mat

@ -481,7 +481,11 @@ contains
implicit none implicit none
class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_base_vect_type), intent(inout) :: x
if (allocated(x%v)) x%v=czero if (allocated(x%v)) then
!$omp workshare
x%v(:)=czero
!$omp end workshare
end if
call x%set_host() call x%set_host()
end subroutine c_base_zero end subroutine c_base_zero

@ -168,6 +168,8 @@ module psb_d_base_mat_mod
procedure, pass(a) :: reallocate_nz => psb_d_coo_reallocate_nz procedure, pass(a) :: reallocate_nz => psb_d_coo_reallocate_nz
procedure, pass(a) :: allocate_mnnz => psb_d_coo_allocate_mnnz procedure, pass(a) :: allocate_mnnz => psb_d_coo_allocate_mnnz
procedure, pass(a) :: ensure_size => psb_d_coo_ensure_size procedure, pass(a) :: ensure_size => psb_d_coo_ensure_size
procedure, pass(a) :: tril => psb_d_coo_tril
procedure, pass(a) :: triu => psb_d_coo_triu
procedure, pass(a) :: cp_to_coo => psb_d_cp_coo_to_coo procedure, pass(a) :: cp_to_coo => psb_d_cp_coo_to_coo
procedure, pass(a) :: cp_from_coo => psb_d_cp_coo_from_coo procedure, pass(a) :: cp_from_coo => psb_d_cp_coo_from_coo
procedure, pass(a) :: cp_to_fmt => psb_d_cp_coo_to_fmt procedure, pass(a) :: cp_to_fmt => psb_d_cp_coo_to_fmt
@ -1894,6 +1896,93 @@ module psb_d_base_mat_mod
integer(psb_ipk_), intent(in), optional :: idir integer(psb_ipk_), intent(in), optional :: idir
end subroutine psb_d_fix_coo end subroutine psb_d_fix_coo
end interface end interface
!
!> Function tril:
!! \memberof psb_d_coo_sparse_mat
!! \brief Copy the lower triangle, i.e. all entries
!! A(I,J) such that J-I <= DIAG
!! default value is DIAG=0, i.e. lower triangle up to
!! the main diagonal.
!! DIAG=-1 means copy the strictly lower triangle
!! DIAG= 1 means copy the lower triangle plus the first diagonal
!! of the upper triangle.
!! Moreover, apply a clipping by copying entries A(I,J) only if
!! IMIN<=I<=IMAX
!! JMIN<=J<=JMAX
!!
!! \param l the output (sub)matrix
!! \param info return code
!! \param diag [0] the last diagonal (J-I) to be considered.
!! \param imin [1] the minimum row index we are interested in
!! \param imax [a\%get_nrows()] the minimum row index we are interested in
!! \param jmin [1] minimum col index
!! \param jmax [a\%get_ncols()] maximum col index
!! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:))
!! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1]
!! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1]
!! ( iren cannot be specified with rscale/cscale)
!! \param append [false] append to ia,ja
!! \param nzin [none] if append, then first new entry should go in entry nzin+1
!! \param u [none] copy of the complementary triangle
!!
!
interface
subroutine psb_d_coo_tril(a,l,info,diag,imin,imax,&
& jmin,jmax,rscale,cscale,u)
import
class(psb_d_coo_sparse_mat), intent(in) :: a
class(psb_d_coo_sparse_mat), intent(out) :: l
integer(psb_ipk_),intent(out) :: info
integer(psb_ipk_), intent(in), optional :: diag,imin,imax,jmin,jmax
logical, intent(in), optional :: rscale,cscale
class(psb_d_coo_sparse_mat), optional, intent(out) :: u
end subroutine psb_d_coo_tril
end interface
!
!> Function triu:
!! \memberof psb_d_coo_sparse_mat
!! \brief Copy the upper triangle, i.e. all entries
!! A(I,J) such that DIAG <= J-I
!! default value is DIAG=0, i.e. upper triangle from
!! the main diagonal up.
!! DIAG= 1 means copy the strictly upper triangle
!! DIAG=-1 means copy the upper triangle plus the first diagonal
!! of the lower triangle.
!! Moreover, apply a clipping by copying entries A(I,J) only if
!! IMIN<=I<=IMAX
!! JMIN<=J<=JMAX
!! Optionally copies the lower triangle at the same time
!!
!! \param u the output (sub)matrix
!! \param info return code
!! \param diag [0] the last diagonal (J-I) to be considered.
!! \param imin [1] the minimum row index we are interested in
!! \param imax [a\%get_nrows()] the minimum row index we are interested in
!! \param jmin [1] minimum col index
!! \param jmax [a\%get_ncols()] maximum col index
!! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:))
!! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1]
!! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1]
!! ( iren cannot be specified with rscale/cscale)
!! \param append [false] append to ia,ja
!! \param nzin [none] if append, then first new entry should go in entry nzin+1
!! \param l [none] copy of the complementary triangle
!!
!
interface
subroutine psb_d_coo_triu(a,u,info,diag,imin,imax,&
& jmin,jmax,rscale,cscale,l)
import
class(psb_d_coo_sparse_mat), intent(in) :: a
class(psb_d_coo_sparse_mat), intent(out) :: u
integer(psb_ipk_),intent(out) :: info
integer(psb_ipk_), intent(in), optional :: diag,imin,imax,jmin,jmax
logical, intent(in), optional :: rscale,cscale
class(psb_d_coo_sparse_mat), optional, intent(out) :: l
end subroutine psb_d_coo_triu
end interface
!> !>
!! \memberof psb_d_coo_sparse_mat !! \memberof psb_d_coo_sparse_mat

@ -488,7 +488,11 @@ contains
implicit none implicit none
class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_base_vect_type), intent(inout) :: x
if (allocated(x%v)) x%v=dzero if (allocated(x%v)) then
!$omp workshare
x%v(:)=dzero
!$omp end workshare
end if
call x%set_host() call x%set_host()
end subroutine d_base_zero end subroutine d_base_zero

@ -417,7 +417,11 @@ contains
implicit none implicit none
class(psb_i_base_vect_type), intent(inout) :: x class(psb_i_base_vect_type), intent(inout) :: x
if (allocated(x%v)) x%v=izero if (allocated(x%v)) then
!$omp workshare
x%v(:)=izero
!$omp end workshare
end if
call x%set_host() call x%set_host()
end subroutine i_base_zero end subroutine i_base_zero

@ -418,7 +418,11 @@ contains
implicit none implicit none
class(psb_l_base_vect_type), intent(inout) :: x class(psb_l_base_vect_type), intent(inout) :: x
if (allocated(x%v)) x%v=lzero if (allocated(x%v)) then
!$omp workshare
x%v(:)=lzero
!$omp end workshare
end if
call x%set_host() call x%set_host()
end subroutine l_base_zero end subroutine l_base_zero

@ -168,6 +168,8 @@ module psb_s_base_mat_mod
procedure, pass(a) :: reallocate_nz => psb_s_coo_reallocate_nz procedure, pass(a) :: reallocate_nz => psb_s_coo_reallocate_nz
procedure, pass(a) :: allocate_mnnz => psb_s_coo_allocate_mnnz procedure, pass(a) :: allocate_mnnz => psb_s_coo_allocate_mnnz
procedure, pass(a) :: ensure_size => psb_s_coo_ensure_size procedure, pass(a) :: ensure_size => psb_s_coo_ensure_size
procedure, pass(a) :: tril => psb_s_coo_tril
procedure, pass(a) :: triu => psb_s_coo_triu
procedure, pass(a) :: cp_to_coo => psb_s_cp_coo_to_coo procedure, pass(a) :: cp_to_coo => psb_s_cp_coo_to_coo
procedure, pass(a) :: cp_from_coo => psb_s_cp_coo_from_coo procedure, pass(a) :: cp_from_coo => psb_s_cp_coo_from_coo
procedure, pass(a) :: cp_to_fmt => psb_s_cp_coo_to_fmt procedure, pass(a) :: cp_to_fmt => psb_s_cp_coo_to_fmt
@ -1894,6 +1896,93 @@ module psb_s_base_mat_mod
integer(psb_ipk_), intent(in), optional :: idir integer(psb_ipk_), intent(in), optional :: idir
end subroutine psb_s_fix_coo end subroutine psb_s_fix_coo
end interface end interface
!
!> Function tril:
!! \memberof psb_s_coo_sparse_mat
!! \brief Copy the lower triangle, i.e. all entries
!! A(I,J) such that J-I <= DIAG
!! default value is DIAG=0, i.e. lower triangle up to
!! the main diagonal.
!! DIAG=-1 means copy the strictly lower triangle
!! DIAG= 1 means copy the lower triangle plus the first diagonal
!! of the upper triangle.
!! Moreover, apply a clipping by copying entries A(I,J) only if
!! IMIN<=I<=IMAX
!! JMIN<=J<=JMAX
!!
!! \param l the output (sub)matrix
!! \param info return code
!! \param diag [0] the last diagonal (J-I) to be considered.
!! \param imin [1] the minimum row index we are interested in
!! \param imax [a\%get_nrows()] the minimum row index we are interested in
!! \param jmin [1] minimum col index
!! \param jmax [a\%get_ncols()] maximum col index
!! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:))
!! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1]
!! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1]
!! ( iren cannot be specified with rscale/cscale)
!! \param append [false] append to ia,ja
!! \param nzin [none] if append, then first new entry should go in entry nzin+1
!! \param u [none] copy of the complementary triangle
!!
!
interface
subroutine psb_s_coo_tril(a,l,info,diag,imin,imax,&
& jmin,jmax,rscale,cscale,u)
import
class(psb_s_coo_sparse_mat), intent(in) :: a
class(psb_s_coo_sparse_mat), intent(out) :: l
integer(psb_ipk_),intent(out) :: info
integer(psb_ipk_), intent(in), optional :: diag,imin,imax,jmin,jmax
logical, intent(in), optional :: rscale,cscale
class(psb_s_coo_sparse_mat), optional, intent(out) :: u
end subroutine psb_s_coo_tril
end interface
!
!> Function triu:
!! \memberof psb_s_coo_sparse_mat
!! \brief Copy the upper triangle, i.e. all entries
!! A(I,J) such that DIAG <= J-I
!! default value is DIAG=0, i.e. upper triangle from
!! the main diagonal up.
!! DIAG= 1 means copy the strictly upper triangle
!! DIAG=-1 means copy the upper triangle plus the first diagonal
!! of the lower triangle.
!! Moreover, apply a clipping by copying entries A(I,J) only if
!! IMIN<=I<=IMAX
!! JMIN<=J<=JMAX
!! Optionally copies the lower triangle at the same time
!!
!! \param u the output (sub)matrix
!! \param info return code
!! \param diag [0] the last diagonal (J-I) to be considered.
!! \param imin [1] the minimum row index we are interested in
!! \param imax [a\%get_nrows()] the minimum row index we are interested in
!! \param jmin [1] minimum col index
!! \param jmax [a\%get_ncols()] maximum col index
!! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:))
!! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1]
!! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1]
!! ( iren cannot be specified with rscale/cscale)
!! \param append [false] append to ia,ja
!! \param nzin [none] if append, then first new entry should go in entry nzin+1
!! \param l [none] copy of the complementary triangle
!!
!
interface
subroutine psb_s_coo_triu(a,u,info,diag,imin,imax,&
& jmin,jmax,rscale,cscale,l)
import
class(psb_s_coo_sparse_mat), intent(in) :: a
class(psb_s_coo_sparse_mat), intent(out) :: u
integer(psb_ipk_),intent(out) :: info
integer(psb_ipk_), intent(in), optional :: diag,imin,imax,jmin,jmax
logical, intent(in), optional :: rscale,cscale
class(psb_s_coo_sparse_mat), optional, intent(out) :: l
end subroutine psb_s_coo_triu
end interface
!> !>
!! \memberof psb_s_coo_sparse_mat !! \memberof psb_s_coo_sparse_mat

@ -488,7 +488,11 @@ contains
implicit none implicit none
class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_base_vect_type), intent(inout) :: x
if (allocated(x%v)) x%v=szero if (allocated(x%v)) then
!$omp workshare
x%v(:)=szero
!$omp end workshare
end if
call x%set_host() call x%set_host()
end subroutine s_base_zero end subroutine s_base_zero

@ -168,6 +168,8 @@ module psb_z_base_mat_mod
procedure, pass(a) :: reallocate_nz => psb_z_coo_reallocate_nz procedure, pass(a) :: reallocate_nz => psb_z_coo_reallocate_nz
procedure, pass(a) :: allocate_mnnz => psb_z_coo_allocate_mnnz procedure, pass(a) :: allocate_mnnz => psb_z_coo_allocate_mnnz
procedure, pass(a) :: ensure_size => psb_z_coo_ensure_size procedure, pass(a) :: ensure_size => psb_z_coo_ensure_size
procedure, pass(a) :: tril => psb_z_coo_tril
procedure, pass(a) :: triu => psb_z_coo_triu
procedure, pass(a) :: cp_to_coo => psb_z_cp_coo_to_coo procedure, pass(a) :: cp_to_coo => psb_z_cp_coo_to_coo
procedure, pass(a) :: cp_from_coo => psb_z_cp_coo_from_coo procedure, pass(a) :: cp_from_coo => psb_z_cp_coo_from_coo
procedure, pass(a) :: cp_to_fmt => psb_z_cp_coo_to_fmt procedure, pass(a) :: cp_to_fmt => psb_z_cp_coo_to_fmt
@ -1894,6 +1896,93 @@ module psb_z_base_mat_mod
integer(psb_ipk_), intent(in), optional :: idir integer(psb_ipk_), intent(in), optional :: idir
end subroutine psb_z_fix_coo end subroutine psb_z_fix_coo
end interface end interface
!
!> Function tril:
!! \memberof psb_z_coo_sparse_mat
!! \brief Copy the lower triangle, i.e. all entries
!! A(I,J) such that J-I <= DIAG
!! default value is DIAG=0, i.e. lower triangle up to
!! the main diagonal.
!! DIAG=-1 means copy the strictly lower triangle
!! DIAG= 1 means copy the lower triangle plus the first diagonal
!! of the upper triangle.
!! Moreover, apply a clipping by copying entries A(I,J) only if
!! IMIN<=I<=IMAX
!! JMIN<=J<=JMAX
!!
!! \param l the output (sub)matrix
!! \param info return code
!! \param diag [0] the last diagonal (J-I) to be considered.
!! \param imin [1] the minimum row index we are interested in
!! \param imax [a\%get_nrows()] the minimum row index we are interested in
!! \param jmin [1] minimum col index
!! \param jmax [a\%get_ncols()] maximum col index
!! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:))
!! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1]
!! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1]
!! ( iren cannot be specified with rscale/cscale)
!! \param append [false] append to ia,ja
!! \param nzin [none] if append, then first new entry should go in entry nzin+1
!! \param u [none] copy of the complementary triangle
!!
!
interface
subroutine psb_z_coo_tril(a,l,info,diag,imin,imax,&
& jmin,jmax,rscale,cscale,u)
import
class(psb_z_coo_sparse_mat), intent(in) :: a
class(psb_z_coo_sparse_mat), intent(out) :: l
integer(psb_ipk_),intent(out) :: info
integer(psb_ipk_), intent(in), optional :: diag,imin,imax,jmin,jmax
logical, intent(in), optional :: rscale,cscale
class(psb_z_coo_sparse_mat), optional, intent(out) :: u
end subroutine psb_z_coo_tril
end interface
!
!> Function triu:
!! \memberof psb_z_coo_sparse_mat
!! \brief Copy the upper triangle, i.e. all entries
!! A(I,J) such that DIAG <= J-I
!! default value is DIAG=0, i.e. upper triangle from
!! the main diagonal up.
!! DIAG= 1 means copy the strictly upper triangle
!! DIAG=-1 means copy the upper triangle plus the first diagonal
!! of the lower triangle.
!! Moreover, apply a clipping by copying entries A(I,J) only if
!! IMIN<=I<=IMAX
!! JMIN<=J<=JMAX
!! Optionally copies the lower triangle at the same time
!!
!! \param u the output (sub)matrix
!! \param info return code
!! \param diag [0] the last diagonal (J-I) to be considered.
!! \param imin [1] the minimum row index we are interested in
!! \param imax [a\%get_nrows()] the minimum row index we are interested in
!! \param jmin [1] minimum col index
!! \param jmax [a\%get_ncols()] maximum col index
!! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:))
!! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1]
!! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1]
!! ( iren cannot be specified with rscale/cscale)
!! \param append [false] append to ia,ja
!! \param nzin [none] if append, then first new entry should go in entry nzin+1
!! \param l [none] copy of the complementary triangle
!!
!
interface
subroutine psb_z_coo_triu(a,u,info,diag,imin,imax,&
& jmin,jmax,rscale,cscale,l)
import
class(psb_z_coo_sparse_mat), intent(in) :: a
class(psb_z_coo_sparse_mat), intent(out) :: u
integer(psb_ipk_),intent(out) :: info
integer(psb_ipk_), intent(in), optional :: diag,imin,imax,jmin,jmax
logical, intent(in), optional :: rscale,cscale
class(psb_z_coo_sparse_mat), optional, intent(out) :: l
end subroutine psb_z_coo_triu
end interface
!> !>
!! \memberof psb_z_coo_sparse_mat !! \memberof psb_z_coo_sparse_mat

@ -481,7 +481,11 @@ contains
implicit none implicit none
class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_base_vect_type), intent(inout) :: x
if (allocated(x%v)) x%v=zzero if (allocated(x%v)) then
!$omp workshare
x%v(:)=zzero
!$omp end workshare
end if
call x%set_host() call x%set_host()
end subroutine z_base_zero end subroutine z_base_zero

@ -2864,7 +2864,9 @@ subroutine psb_c_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
if (nz == 0) return if (nz == 0) return
if (a%is_bld()) then if (a%is_bld()) then
! Structure here is peculiar, because this function can be called
! either within a parallel region, or outside.
! Hence the call to set_nzeros done here.
!$omp critical !$omp critical
nza = a%get_nzeros() nza = a%get_nzeros()
isza = a%get_size() isza = a%get_size()
@ -2879,9 +2881,6 @@ subroutine psb_c_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
nzaold = nza nzaold = nza
nza = nza + nz nza = nza + nz
call a%set_nzeros(nza) call a%set_nzeros(nza)
#if defined(OPENMP)
!write(0,*) 'From thread ',omp_get_thread_num(),nzaold,nz,nza,a%get_nzeros()
#endif
end if end if
!$omp end critical !$omp end critical
if (info /= 0) goto 9999 if (info /= 0) goto 9999
@ -2960,8 +2959,7 @@ contains
end if end if
end do end do
!$OMP END PARALLEL DO !$OMP END PARALLEL DO
nza = nza + nz
!nza = nza + nz
#else #else
do i=1, nz do i=1, nz
ir = ia(i) ir = ia(i)
@ -3489,6 +3487,602 @@ subroutine psb_c_coo_mv_from(a,b)
end subroutine psb_c_coo_mv_from end subroutine psb_c_coo_mv_from
!
! COO implementation of tril/triu
!
subroutine psb_c_coo_tril(a,l,info,&
& diag,imin,imax,jmin,jmax,rscale,cscale,u)
! Output is always in COO format
use psb_error_mod
use psb_const_mod
use psb_c_base_mat_mod, psb_protect_name => psb_c_coo_tril
implicit none
class(psb_c_coo_sparse_mat), intent(in) :: a
class(psb_c_coo_sparse_mat), intent(out) :: l
integer(psb_ipk_),intent(out) :: info
integer(psb_ipk_), intent(in), optional :: diag,imin,imax,jmin,jmax
logical, intent(in), optional :: rscale,cscale
class(psb_c_coo_sparse_mat), optional, intent(out) :: u
integer(psb_ipk_) :: err_act, nzin, nzout, i, j, k
integer(psb_ipk_) :: imin_, imax_, jmin_, jmax_, mb,nb, diag_, nzlin, nzuin, nz
character(len=20) :: name='tril'
logical :: rscale_, cscale_
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
if (present(diag)) then
diag_ = diag
else
diag_ = 0
end if
if (present(imin)) then
imin_ = imin
else
imin_ = 1
end if
if (present(imax)) then
imax_ = imax
else
imax_ = a%get_nrows()
end if
if (present(jmin)) then
jmin_ = jmin
else
jmin_ = 1
end if
if (present(jmax)) then
jmax_ = jmax
else
jmax_ = a%get_ncols()
end if
if (present(rscale)) then
rscale_ = rscale
else
rscale_ = .true.
end if
if (present(cscale)) then
cscale_ = cscale
else
cscale_ = .true.
end if
if (rscale_) then
mb = imax_ - imin_ +1
else
mb = imax_
endif
if (cscale_) then
nb = jmax_ - jmin_ +1
else
nb = jmax_
endif
#if defined(OPENMP)
block
integer(psb_ipk_), allocatable :: lrws(:),urws(:)
integer(psb_ipk_) :: lpnt, upnt, lnz, unz
call psb_realloc(mb,lrws,info)
!$omp workshare
lrws(:) = 0
!$omp end workshare
nz = a%get_nzeros()
call l%allocate(mb,nb,nz)
!write(0,*) 'Invocation of COO%TRIL', present(u),nz
if (present(u)) then
nzlin = l%get_nzeros() ! At this point it should be 0
call u%allocate(mb,nb,nz)
nzuin = u%get_nzeros() ! At this point it should be 0
if (info == 0) call psb_realloc(mb,urws,info)
!$omp workshare
urws(:) = 0
!$omp end workshare
!write(0,*) 'omp version of COO%TRIL/TRIU'
lnz = 0
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws,urws) reduction(+: lnz,unz)
loop1: do k=1,nz
i = a%ia(k)
j = a%ja(k)
if ((i>=imin_).and.(i<=imax_).and.(jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
else
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do loop1
!$omp end parallel do
call psi_exscan(mb,lrws,info)
call psi_exscan(mb,urws,info)
!write(0,*) lrws(:), urws(:)
!$omp parallel do private(i,j,k,lpnt,upnt) shared(imin_,imax_,a)
loop2: do k=1,nz
i = a%ia(k)
j = a%ja(k)
if ((i>=imin_).and.(i<=imax_).and.(jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = a%ia(k)
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
else
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = a%ia(k)
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do loop2
!$omp end parallel do
!write(0,*) 'End of copyout',lnz,unz
call l%set_nzeros(lnz)
call l%fix(info)
call u%set_nzeros(unz)
call u%fix(info)
nzout = u%get_nzeros()
if (rscale_) then
!$omp workshare
u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ >=-1).and.(imin_ == jmin_)) then
call u%set_triangle(.true.)
call u%set_lower(.false.)
end if
else
lnz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws) reduction(+: lnz)
loop3: do k=1,nz
i = a%ia(k)
j = a%ja(k)
if ((i>=imin_).and.(i<=imax_).and.(jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
end if
end if
end do loop3
!$omp end parallel do
call psi_exscan(mb,lrws,info)
!$omp parallel do private(i,j,k,lpnt) shared(imin_,imax_,a)
loop4: do k=1,nz
i = a%ia(k)
j = a%ja(k)
if ((i>=imin_).and.(i<=imax_).and.(jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = a%ia(k)
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
end if
end if
end do loop4
!$omp end parallel do
call l%set_nzeros(lnz)
call l%fix(info)
end if
nzout = l%get_nzeros()
if (rscale_) then
!$omp workshare
l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ <= 0).and.(imin_ == jmin_)) then
call l%set_triangle(.true.)
call l%set_lower(.true.)
end if
end block
#else
nz = a%get_nzeros()
call l%allocate(mb,nb,nz)
if (present(u)) then
nzlin = l%get_nzeros() ! At this point it should be 0
call u%allocate(mb,nb,nz)
nzuin = u%get_nzeros() ! At this point it should be 0
associate(val =>a%val, ja => a%ja, ia=>a%ia)
loop1: do k=1,nz
i = ia(k)
j = ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
nzlin = nzlin + 1
l%ia(nzlin) = i
l%ja(nzlin) = ja(k)
l%val(nzlin) = val(k)
else
nzuin = nzuin + 1
u%ia(nzuin) = i
u%ja(nzuin) = ja(k)
u%val(nzuin) = val(k)
end if
end if
end do loop1
end associate
call l%set_nzeros(nzlin)
call u%set_nzeros(nzuin)
call u%fix(info)
nzout = u%get_nzeros()
if (rscale_) &
& u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
if (cscale_) &
& u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
if ((diag_ >=-1).and.(imin_ == jmin_)) then
call u%set_triangle(.true.)
call u%set_lower(.false.)
end if
else
nzin = l%get_nzeros() ! At this point it should be 0
associate(val =>a%val, ja => a%ja, ia=>a%ia)
loop2: do k=1,nz
i = ia(k)
j = ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
nzin = nzin + 1
l%ia(nzin) = i
l%ja(nzin) = ja(k)
l%val(nzin) = val(k)
end if
end if
end do loop2
end associate
call l%set_nzeros(nzin)
end if
call l%fix(info)
nzout = l%get_nzeros()
if (rscale_) &
& l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
if (cscale_) &
& l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
if ((diag_ <= 0).and.(imin_ == jmin_)) then
call l%set_triangle(.true.)
call l%set_lower(.true.)
end if
#endif
if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_coo_tril
subroutine psb_c_coo_triu(a,u,info,&
& diag,imin,imax,jmin,jmax,rscale,cscale,l)
! Output is always in COO format
use psb_error_mod
use psb_const_mod
use psb_c_base_mat_mod, psb_protect_name => psb_c_coo_triu
implicit none
class(psb_c_coo_sparse_mat), intent(in) :: a
class(psb_c_coo_sparse_mat), intent(out) :: u
integer(psb_ipk_),intent(out) :: info
integer(psb_ipk_), intent(in), optional :: diag,imin,imax,jmin,jmax
logical, intent(in), optional :: rscale,cscale
class(psb_c_coo_sparse_mat), optional, intent(out) :: l
integer(psb_ipk_) :: err_act, nzin, nzout, i, j, k
integer(psb_ipk_) :: imin_, imax_, jmin_, jmax_, mb,nb, diag_, nzlin, nzuin, nz
character(len=20) :: name='triu'
logical :: rscale_, cscale_
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
if (present(diag)) then
diag_ = diag
else
diag_ = 0
end if
if (present(imin)) then
imin_ = imin
else
imin_ = 1
end if
if (present(imax)) then
imax_ = imax
else
imax_ = a%get_nrows()
end if
if (present(jmin)) then
jmin_ = jmin
else
jmin_ = 1
end if
if (present(jmax)) then
jmax_ = jmax
else
jmax_ = a%get_ncols()
end if
if (present(rscale)) then
rscale_ = rscale
else
rscale_ = .true.
end if
if (present(cscale)) then
cscale_ = cscale
else
cscale_ = .true.
end if
if (rscale_) then
mb = imax_ - imin_ +1
else
mb = imax_
endif
if (cscale_) then
nb = jmax_ - jmin_ +1
else
nb = jmax_
endif
#if defined(OPENMP)
block
integer(psb_ipk_), allocatable :: lrws(:),urws(:)
integer(psb_ipk_) :: lpnt, upnt, lnz, unz
call psb_realloc(mb,urws,info)
!$omp workshare
urws(:) = 0
!$omp end workshare
nz = a%get_nzeros()
call u%allocate(mb,nb,nz)
!write(0,*) 'Invocation of COO%TRIL', present(u),nz
if (present(l)) then
nzuin = u%get_nzeros() ! At this point it should be 0
call l%allocate(mb,nb,nz)
nzlin = l%get_nzeros() ! At this point it should be 0
if (info == 0) call psb_realloc(mb,urws,info)
!$omp workshare
lrws(:) = 0
!$omp end workshare
!write(0,*) 'omp version of COO%TRIL/TRIU'
lnz = 0
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws,urws) reduction(+: lnz,unz)
loop1: do k=1,nz
i = a%ia(k)
j = a%ja(k)
if ((i>=imin_).and.(i<=imax_).and.(jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
else
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do loop1
!$omp end parallel do
call psi_exscan(mb,lrws,info)
call psi_exscan(mb,urws,info)
!write(0,*) lrws(:), urws(:)
!$omp parallel do private(i,j,k,lpnt,upnt) shared(imin_,imax_,a)
loop2: do k=1,nz
i = a%ia(k)
j = a%ja(k)
if ((i>=imin_).and.(i<=imax_).and.(jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = a%ia(k)
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
else
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = a%ia(k)
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do loop2
!$omp end parallel do
!write(0,*) 'End of copyout',lnz,unz
call l%set_nzeros(lnz)
call l%fix(info)
call u%set_nzeros(unz)
call u%fix(info)
nzout = l%get_nzeros()
if (rscale_) then
!$omp workshare
l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ <=-1).and.(imin_ == jmin_)) then
call l%set_triangle(.true.)
call l%set_lower(.false.)
end if
else
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,urws) reduction(+: unz)
loop3: do k=1,nz
i = a%ia(k)
j = a%ja(k)
if ((i>=imin_).and.(i<=imax_).and.(jmin_<=j).and.(j<=jmax_)) then
if ((j-i)>=diag_) then
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do loop3
!$omp end parallel do
call psi_exscan(mb,urws,info)
!$omp parallel do private(i,j,k,upnt) shared(imin_,imax_,a)
loop4: do k=1,nz
i = a%ia(k)
j = a%ja(k)
if ((i>=imin_).and.(i<=imax_).and.(jmin_<=j).and.(j<=jmax_)) then
if ((j-i)>=diag_) then
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = a%ia(k)
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do loop4
!$omp end parallel do
call u%set_nzeros(unz)
call u%fix(info)
end if
nzout = u%get_nzeros()
if (rscale_) then
!$omp workshare
u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ >= 0).and.(imin_ == jmin_)) then
call u%set_triangle(.true.)
call u%set_upper(.true.)
end if
end block
#else
nz = a%get_nzeros()
call u%allocate(mb,nb,nz)
if (present(l)) then
nzuin = u%get_nzeros() ! At this point it should be 0
call l%allocate(mb,nb,nz)
nzlin = l%get_nzeros() ! At this point it should be 0
associate(val =>a%val, ja => a%ja, ia=>a%ia)
loop1: do k=1,nz
i = ia(k)
j = ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<diag_) then
nzlin = nzlin + 1
l%ia(nzlin) = i
l%ja(nzlin) = ja(k)
l%val(nzlin) = val(k)
else
nzuin = nzuin + 1
u%ia(nzuin) = i
u%ja(nzuin) = ja(k)
u%val(nzuin) = val(k)
end if
end if
end do loop1
end associate
call u%set_nzeros(nzuin)
call l%set_nzeros(nzlin)
call l%fix(info)
nzout = l%get_nzeros()
if (rscale_) &
& l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
if (cscale_) &
& l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
if ((diag_ <=1).and.(imin_ == jmin_)) then
call l%set_triangle(.true.)
call l%set_lower(.true.)
end if
else
nzin = u%get_nzeros() ! At this point it should be 0
associate(val =>a%val, ja => a%ja, ia=>a%ia)
loop2: do k=1,nz
i = ia(k)
j = ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)>=diag_) then
nzin = nzin + 1
u%ia(nzin) = i
u%ja(nzin) = ja(k)
u%val(nzin) = val(k)
end if
end if
end do loop2
end associate
call u%set_nzeros(nzin)
end if
call u%fix(info)
nzout = u%get_nzeros()
if (rscale_) &
& u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
if (cscale_) &
& u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
if ((diag_ >= 0).and.(imin_ == jmin_)) then
call u%set_triangle(.true.)
call u%set_lower(.false.)
end if
#endif
if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_coo_triu
subroutine psb_c_fix_coo(a,info,idir) subroutine psb_c_fix_coo(a,info,idir)
use psb_const_mod use psb_const_mod
@ -4563,7 +5157,7 @@ function psb_lc_coo_maxval(a) result(res)
#if defined(OPENMP) #if defined(OPENMP)
block block
integer(psb_ipk_) :: i integer(psb_ipk_) :: i
!$omp parallel do private(i) !$omp parallel do private(i) reduction(max:res)
do i=1, nnz do i=1, nnz
res = max(res,abs(a%val(i))) res = max(res,abs(a%val(i)))
end do end do
@ -4630,7 +5224,7 @@ function psb_lc_coo_csnmi(a) result(res)
#if defined(OPENMP) #if defined(OPENMP)
block block
integer(psb_ipk_) :: i integer(psb_ipk_) :: i
!$omp parallel do private(i) !$omp parallel do private(i) reduction(max:res)
do i=1, m do i=1, m
res = max(res,abs(vt(i))) res = max(res,abs(vt(i)))
end do end do
@ -4680,7 +5274,7 @@ function psb_lc_coo_csnm1(a) result(res)
#if defined(OPENMP) #if defined(OPENMP)
block block
integer(psb_ipk_) :: i integer(psb_ipk_) :: i
!$omp parallel do private(i) !$omp parallel do private(i) reduction(max:res)
do i=1, n do i=1, n
res = max(res,abs(vt(i))) res = max(res,abs(vt(i)))
end do end do

@ -2329,9 +2329,28 @@ subroutine psb_c_cp_csc_to_fmt(a,b,info)
b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat
nc = a%get_ncols() nc = a%get_ncols()
nz = a%get_nzeros() nz = a%get_nzeros()
if (.false.) then
if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info) if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info)
if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info) if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info)
if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info) if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nc+1,b%icp,info)
call psb_realloc(nz,b%ia,info)
call psb_realloc(nz,b%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nc+1
b%icp(i)=a%icp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
b%ia(j) = a%ia(j)
b%val(j) = a%val(j)
end do
!$omp end parallel do
end if
call b%set_host() call b%set_host()
class default class default
@ -2443,9 +2462,27 @@ subroutine psb_c_cp_csc_from_fmt(a,b,info)
a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat
nc = b%get_ncols() nc = b%get_ncols()
nz = b%get_nzeros() nz = b%get_nzeros()
if (.false.) then
if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info) if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info)
if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info) if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info)
if (info == 0) call psb_safe_cpy( b%val(1:nz), a%val , info) if (info == 0) call psb_safe_cpy( b%val(1:nz), a%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nc+1,a%icp,info)
call psb_realloc(nz,a%ia,info)
call psb_realloc(nz,a%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nc+1
a%icp(i)=b%icp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
a%ia(j)=b%ia(j)
a%val(j)=b%val(j)
end do
!$omp end parallel do
end if
call a%set_host() call a%set_host()
class default class default

@ -2289,7 +2289,155 @@ subroutine psb_c_csr_tril(a,l,info,&
nb = jmax_ nb = jmax_
endif endif
#if defined(OPENMP)
block
integer(psb_ipk_), allocatable :: lrws(:),urws(:)
integer(psb_ipk_) :: lpnt, upnt, lnz, unz
call psb_realloc(mb,lrws,info)
!$omp workshare
lrws(:) = 0
!$omp end workshare
nz = a%get_nzeros()
call l%allocate(mb,nb,nz)
!write(0,*) 'Invocation of COO%TRIL', present(u),nz
if (present(u)) then
nzlin = l%get_nzeros() ! At this point it should be 0
call u%allocate(mb,nb,nz)
nzuin = u%get_nzeros() ! At this point it should be 0
if (info == 0) call psb_realloc(mb,urws,info)
!$omp workshare
urws(:) = 0
!$omp end workshare
!write(0,*) 'omp version of COO%TRIL/TRIU'
lnz = 0
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws,urws) reduction(+: lnz,unz)
loop1: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
else
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do
end do loop1
!$omp end parallel do
call psi_exscan(mb,lrws,info)
call psi_exscan(mb,urws,info)
!write(0,*) lrws(:), urws(:)
!$omp parallel do private(i,j,k,lpnt,upnt) shared(imin_,imax_,a)
loop2: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = i
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
else
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = i
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do
end do loop2
!$omp end parallel do
!write(0,*) 'End of copyout',lnz,unz
call l%set_nzeros(lnz)
call l%fix(info)
call u%set_nzeros(unz)
call u%fix(info)
nzout = u%get_nzeros()
if (rscale_) then
!$omp workshare
u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ >=-1).and.(imin_ == jmin_)) then
call u%set_triangle(.true.)
call u%set_lower(.false.)
end if
else
lnz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws) reduction(+: lnz)
loop3: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
end if
end if
end do
end do loop3
!$omp end parallel do
call psi_exscan(mb,lrws,info)
!$omp parallel do private(i,j,k,lpnt) shared(imin_,imax_,a)
loop4: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = i
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
end if
end if
end do
end do loop4
!$omp end parallel do
call l%set_nzeros(lnz)
call l%fix(info)
end if
nzout = l%get_nzeros()
if (rscale_) then
!$omp workshare
l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ <= 0).and.(imin_ == jmin_)) then
call l%set_triangle(.true.)
call l%set_lower(.true.)
end if
end block
#else
nz = a%get_nzeros() nz = a%get_nzeros()
call l%allocate(mb,nb,nz) call l%allocate(mb,nb,nz)
@ -2359,7 +2507,7 @@ subroutine psb_c_csr_tril(a,l,info,&
call l%set_triangle(.true.) call l%set_triangle(.true.)
call l%set_lower(.true.) call l%set_lower(.true.)
end if end if
#endif
if (info /= psb_success_) goto 9999 if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
@ -2443,6 +2591,158 @@ subroutine psb_c_csr_triu(a,u,info,&
endif endif
#if defined(OPENMP)
block
integer(psb_ipk_), allocatable :: lrws(:),urws(:)
integer(psb_ipk_) :: lpnt, upnt, lnz, unz
call psb_realloc(mb,urws,info)
!$omp workshare
urws(:) = 0
!$omp end workshare
nz = a%get_nzeros()
call u%allocate(mb,nb,nz)
!write(0,*) 'Invocation of COO%TRIL', present(u),nz
if (present(l)) then
nzuin = u%get_nzeros() ! At this point it should be 0
call l%allocate(mb,nb,nz)
nzlin = l%get_nzeros() ! At this point it should be 0
if (info == 0) call psb_realloc(mb,urws,info)
!$omp workshare
lrws(:) = 0
!$omp end workshare
!write(0,*) 'omp version of COO%TRIL/TRIU'
lnz = 0
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws,urws) reduction(+: lnz,unz)
loop1: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
else
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do
end do loop1
!$omp end parallel do
call psi_exscan(mb,lrws,info)
call psi_exscan(mb,urws,info)
!write(0,*) lrws(:), urws(:)
!$omp parallel do private(i,j,k,lpnt,upnt) shared(imin_,imax_,a)
loop2: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = i
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
else
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = i
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do
end do loop2
!$omp end parallel do
!write(0,*) 'End of copyout',lnz,unz
call l%set_nzeros(lnz)
call l%fix(info)
call u%set_nzeros(unz)
call u%fix(info)
nzout = l%get_nzeros()
if (rscale_) then
!$omp workshare
l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ <=-1).and.(imin_ == jmin_)) then
call l%set_triangle(.true.)
call l%set_lower(.false.)
end if
else
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,urws) reduction(+: unz)
loop3: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)>=diag_) then
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do
end do loop3
!$omp end parallel do
call psi_exscan(mb,urws,info)
!$omp parallel do private(i,j,k,upnt) shared(imin_,imax_,a)
loop4: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)>=diag_) then
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = i
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do
end do loop4
!$omp end parallel do
call u%set_nzeros(unz)
call u%fix(info)
end if
nzout = u%get_nzeros()
if (rscale_) then
!$omp workshare
u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ >= 0).and.(imin_ == jmin_)) then
call u%set_triangle(.true.)
call u%set_upper(.true.)
end if
end block
#else
nz = a%get_nzeros() nz = a%get_nzeros()
call u%allocate(mb,nb,nz) call u%allocate(mb,nb,nz)
@ -2511,7 +2811,7 @@ subroutine psb_c_csr_triu(a,u,info,&
call u%set_triangle(.true.) call u%set_triangle(.true.)
call u%set_upper(.true.) call u%set_upper(.true.)
end if end if
#endif
if (info /= psb_success_) goto 9999 if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
@ -3190,9 +3490,28 @@ subroutine psb_c_cp_csr_to_fmt(a,b,info)
b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat
nr = a%get_nrows() nr = a%get_nrows()
nz = a%get_nzeros() nz = a%get_nzeros()
if (.false.) then
if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info) if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info)
if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info) if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info)
if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info) if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nr+1,b%irp,info)
call psb_realloc(nz,b%ja,info)
call psb_realloc(nz,b%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nr+1
b%irp(i)=a%irp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
b%ja(j) = a%ja(j)
b%val(j) = a%val(j)
end do
!$omp end parallel do
end if
call b%set_host() call b%set_host()
class default class default
@ -3276,9 +3595,27 @@ subroutine psb_c_cp_csr_from_fmt(a,b,info)
a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat
nr = b%get_nrows() nr = b%get_nrows()
nz = b%get_nzeros() nz = b%get_nzeros()
if (.false.) then
if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info) if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info)
if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info) if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info)
if (info == 0) call psb_safe_cpy( b%val(1:nz) , a%val , info) if (info == 0) call psb_safe_cpy( b%val(1:nz) , a%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nr+1,a%irp,info)
call psb_realloc(nz,a%ja,info)
call psb_realloc(nz,a%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nr+1
a%irp(i)=b%irp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
a%ja(j)=b%ja(j)
a%val(j)=b%val(j)
end do
!$omp end parallel do
end if
call a%set_host() call a%set_host()
class default class default

@ -2864,7 +2864,9 @@ subroutine psb_d_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
if (nz == 0) return if (nz == 0) return
if (a%is_bld()) then if (a%is_bld()) then
! Structure here is peculiar, because this function can be called
! either within a parallel region, or outside.
! Hence the call to set_nzeros done here.
!$omp critical !$omp critical
nza = a%get_nzeros() nza = a%get_nzeros()
isza = a%get_size() isza = a%get_size()
@ -2879,9 +2881,6 @@ subroutine psb_d_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
nzaold = nza nzaold = nza
nza = nza + nz nza = nza + nz
call a%set_nzeros(nza) call a%set_nzeros(nza)
#if defined(OPENMP)
!write(0,*) 'From thread ',omp_get_thread_num(),nzaold,nz,nza,a%get_nzeros()
#endif
end if end if
!$omp end critical !$omp end critical
if (info /= 0) goto 9999 if (info /= 0) goto 9999
@ -2960,8 +2959,7 @@ contains
end if end if
end do end do
!$OMP END PARALLEL DO !$OMP END PARALLEL DO
nza = nza + nz
!nza = nza + nz
#else #else
do i=1, nz do i=1, nz
ir = ia(i) ir = ia(i)
@ -3489,6 +3487,602 @@ subroutine psb_d_coo_mv_from(a,b)
end subroutine psb_d_coo_mv_from end subroutine psb_d_coo_mv_from
!
! COO implementation of tril/triu
!
subroutine psb_d_coo_tril(a,l,info,&
& diag,imin,imax,jmin,jmax,rscale,cscale,u)
! Output is always in COO format
use psb_error_mod
use psb_const_mod
use psb_d_base_mat_mod, psb_protect_name => psb_d_coo_tril
implicit none
class(psb_d_coo_sparse_mat), intent(in) :: a
class(psb_d_coo_sparse_mat), intent(out) :: l
integer(psb_ipk_),intent(out) :: info
integer(psb_ipk_), intent(in), optional :: diag,imin,imax,jmin,jmax
logical, intent(in), optional :: rscale,cscale
class(psb_d_coo_sparse_mat), optional, intent(out) :: u
integer(psb_ipk_) :: err_act, nzin, nzout, i, j, k
integer(psb_ipk_) :: imin_, imax_, jmin_, jmax_, mb,nb, diag_, nzlin, nzuin, nz
character(len=20) :: name='tril'
logical :: rscale_, cscale_
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
if (present(diag)) then
diag_ = diag
else
diag_ = 0
end if
if (present(imin)) then
imin_ = imin
else
imin_ = 1
end if
if (present(imax)) then
imax_ = imax
else
imax_ = a%get_nrows()
end if
if (present(jmin)) then
jmin_ = jmin
else
jmin_ = 1
end if
if (present(jmax)) then
jmax_ = jmax
else
jmax_ = a%get_ncols()
end if
if (present(rscale)) then
rscale_ = rscale
else
rscale_ = .true.
end if
if (present(cscale)) then
cscale_ = cscale
else
cscale_ = .true.
end if
if (rscale_) then
mb = imax_ - imin_ +1
else
mb = imax_
endif
if (cscale_) then
nb = jmax_ - jmin_ +1
else
nb = jmax_
endif
#if defined(OPENMP)
block
integer(psb_ipk_), allocatable :: lrws(:),urws(:)
integer(psb_ipk_) :: lpnt, upnt, lnz, unz
call psb_realloc(mb,lrws,info)
!$omp workshare
lrws(:) = 0
!$omp end workshare
nz = a%get_nzeros()
call l%allocate(mb,nb,nz)
!write(0,*) 'Invocation of COO%TRIL', present(u),nz
if (present(u)) then
nzlin = l%get_nzeros() ! At this point it should be 0
call u%allocate(mb,nb,nz)
nzuin = u%get_nzeros() ! At this point it should be 0
if (info == 0) call psb_realloc(mb,urws,info)
!$omp workshare
urws(:) = 0
!$omp end workshare
!write(0,*) 'omp version of COO%TRIL/TRIU'
lnz = 0
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws,urws) reduction(+: lnz,unz)
loop1: do k=1,nz
i = a%ia(k)
j = a%ja(k)
if ((i>=imin_).and.(i<=imax_).and.(jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
else
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do loop1
!$omp end parallel do
call psi_exscan(mb,lrws,info)
call psi_exscan(mb,urws,info)
!write(0,*) lrws(:), urws(:)
!$omp parallel do private(i,j,k,lpnt,upnt) shared(imin_,imax_,a)
loop2: do k=1,nz
i = a%ia(k)
j = a%ja(k)
if ((i>=imin_).and.(i<=imax_).and.(jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = a%ia(k)
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
else
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = a%ia(k)
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do loop2
!$omp end parallel do
!write(0,*) 'End of copyout',lnz,unz
call l%set_nzeros(lnz)
call l%fix(info)
call u%set_nzeros(unz)
call u%fix(info)
nzout = u%get_nzeros()
if (rscale_) then
!$omp workshare
u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ >=-1).and.(imin_ == jmin_)) then
call u%set_triangle(.true.)
call u%set_lower(.false.)
end if
else
lnz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws) reduction(+: lnz)
loop3: do k=1,nz
i = a%ia(k)
j = a%ja(k)
if ((i>=imin_).and.(i<=imax_).and.(jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
end if
end if
end do loop3
!$omp end parallel do
call psi_exscan(mb,lrws,info)
!$omp parallel do private(i,j,k,lpnt) shared(imin_,imax_,a)
loop4: do k=1,nz
i = a%ia(k)
j = a%ja(k)
if ((i>=imin_).and.(i<=imax_).and.(jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = a%ia(k)
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
end if
end if
end do loop4
!$omp end parallel do
call l%set_nzeros(lnz)
call l%fix(info)
end if
nzout = l%get_nzeros()
if (rscale_) then
!$omp workshare
l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ <= 0).and.(imin_ == jmin_)) then
call l%set_triangle(.true.)
call l%set_lower(.true.)
end if
end block
#else
nz = a%get_nzeros()
call l%allocate(mb,nb,nz)
if (present(u)) then
nzlin = l%get_nzeros() ! At this point it should be 0
call u%allocate(mb,nb,nz)
nzuin = u%get_nzeros() ! At this point it should be 0
associate(val =>a%val, ja => a%ja, ia=>a%ia)
loop1: do k=1,nz
i = ia(k)
j = ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
nzlin = nzlin + 1
l%ia(nzlin) = i
l%ja(nzlin) = ja(k)
l%val(nzlin) = val(k)
else
nzuin = nzuin + 1
u%ia(nzuin) = i
u%ja(nzuin) = ja(k)
u%val(nzuin) = val(k)
end if
end if
end do loop1
end associate
call l%set_nzeros(nzlin)
call u%set_nzeros(nzuin)
call u%fix(info)
nzout = u%get_nzeros()
if (rscale_) &
& u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
if (cscale_) &
& u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
if ((diag_ >=-1).and.(imin_ == jmin_)) then
call u%set_triangle(.true.)
call u%set_lower(.false.)
end if
else
nzin = l%get_nzeros() ! At this point it should be 0
associate(val =>a%val, ja => a%ja, ia=>a%ia)
loop2: do k=1,nz
i = ia(k)
j = ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
nzin = nzin + 1
l%ia(nzin) = i
l%ja(nzin) = ja(k)
l%val(nzin) = val(k)
end if
end if
end do loop2
end associate
call l%set_nzeros(nzin)
end if
call l%fix(info)
nzout = l%get_nzeros()
if (rscale_) &
& l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
if (cscale_) &
& l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
if ((diag_ <= 0).and.(imin_ == jmin_)) then
call l%set_triangle(.true.)
call l%set_lower(.true.)
end if
#endif
if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_d_coo_tril
subroutine psb_d_coo_triu(a,u,info,&
& diag,imin,imax,jmin,jmax,rscale,cscale,l)
! Output is always in COO format
use psb_error_mod
use psb_const_mod
use psb_d_base_mat_mod, psb_protect_name => psb_d_coo_triu
implicit none
class(psb_d_coo_sparse_mat), intent(in) :: a
class(psb_d_coo_sparse_mat), intent(out) :: u
integer(psb_ipk_),intent(out) :: info
integer(psb_ipk_), intent(in), optional :: diag,imin,imax,jmin,jmax
logical, intent(in), optional :: rscale,cscale
class(psb_d_coo_sparse_mat), optional, intent(out) :: l
integer(psb_ipk_) :: err_act, nzin, nzout, i, j, k
integer(psb_ipk_) :: imin_, imax_, jmin_, jmax_, mb,nb, diag_, nzlin, nzuin, nz
character(len=20) :: name='triu'
logical :: rscale_, cscale_
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
if (present(diag)) then
diag_ = diag
else
diag_ = 0
end if
if (present(imin)) then
imin_ = imin
else
imin_ = 1
end if
if (present(imax)) then
imax_ = imax
else
imax_ = a%get_nrows()
end if
if (present(jmin)) then
jmin_ = jmin
else
jmin_ = 1
end if
if (present(jmax)) then
jmax_ = jmax
else
jmax_ = a%get_ncols()
end if
if (present(rscale)) then
rscale_ = rscale
else
rscale_ = .true.
end if
if (present(cscale)) then
cscale_ = cscale
else
cscale_ = .true.
end if
if (rscale_) then
mb = imax_ - imin_ +1
else
mb = imax_
endif
if (cscale_) then
nb = jmax_ - jmin_ +1
else
nb = jmax_
endif
#if defined(OPENMP)
block
integer(psb_ipk_), allocatable :: lrws(:),urws(:)
integer(psb_ipk_) :: lpnt, upnt, lnz, unz
call psb_realloc(mb,urws,info)
!$omp workshare
urws(:) = 0
!$omp end workshare
nz = a%get_nzeros()
call u%allocate(mb,nb,nz)
!write(0,*) 'Invocation of COO%TRIL', present(u),nz
if (present(l)) then
nzuin = u%get_nzeros() ! At this point it should be 0
call l%allocate(mb,nb,nz)
nzlin = l%get_nzeros() ! At this point it should be 0
if (info == 0) call psb_realloc(mb,urws,info)
!$omp workshare
lrws(:) = 0
!$omp end workshare
!write(0,*) 'omp version of COO%TRIL/TRIU'
lnz = 0
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws,urws) reduction(+: lnz,unz)
loop1: do k=1,nz
i = a%ia(k)
j = a%ja(k)
if ((i>=imin_).and.(i<=imax_).and.(jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
else
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do loop1
!$omp end parallel do
call psi_exscan(mb,lrws,info)
call psi_exscan(mb,urws,info)
!write(0,*) lrws(:), urws(:)
!$omp parallel do private(i,j,k,lpnt,upnt) shared(imin_,imax_,a)
loop2: do k=1,nz
i = a%ia(k)
j = a%ja(k)
if ((i>=imin_).and.(i<=imax_).and.(jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = a%ia(k)
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
else
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = a%ia(k)
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do loop2
!$omp end parallel do
!write(0,*) 'End of copyout',lnz,unz
call l%set_nzeros(lnz)
call l%fix(info)
call u%set_nzeros(unz)
call u%fix(info)
nzout = l%get_nzeros()
if (rscale_) then
!$omp workshare
l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ <=-1).and.(imin_ == jmin_)) then
call l%set_triangle(.true.)
call l%set_lower(.false.)
end if
else
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,urws) reduction(+: unz)
loop3: do k=1,nz
i = a%ia(k)
j = a%ja(k)
if ((i>=imin_).and.(i<=imax_).and.(jmin_<=j).and.(j<=jmax_)) then
if ((j-i)>=diag_) then
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do loop3
!$omp end parallel do
call psi_exscan(mb,urws,info)
!$omp parallel do private(i,j,k,upnt) shared(imin_,imax_,a)
loop4: do k=1,nz
i = a%ia(k)
j = a%ja(k)
if ((i>=imin_).and.(i<=imax_).and.(jmin_<=j).and.(j<=jmax_)) then
if ((j-i)>=diag_) then
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = a%ia(k)
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do loop4
!$omp end parallel do
call u%set_nzeros(unz)
call u%fix(info)
end if
nzout = u%get_nzeros()
if (rscale_) then
!$omp workshare
u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ >= 0).and.(imin_ == jmin_)) then
call u%set_triangle(.true.)
call u%set_upper(.true.)
end if
end block
#else
nz = a%get_nzeros()
call u%allocate(mb,nb,nz)
if (present(l)) then
nzuin = u%get_nzeros() ! At this point it should be 0
call l%allocate(mb,nb,nz)
nzlin = l%get_nzeros() ! At this point it should be 0
associate(val =>a%val, ja => a%ja, ia=>a%ia)
loop1: do k=1,nz
i = ia(k)
j = ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<diag_) then
nzlin = nzlin + 1
l%ia(nzlin) = i
l%ja(nzlin) = ja(k)
l%val(nzlin) = val(k)
else
nzuin = nzuin + 1
u%ia(nzuin) = i
u%ja(nzuin) = ja(k)
u%val(nzuin) = val(k)
end if
end if
end do loop1
end associate
call u%set_nzeros(nzuin)
call l%set_nzeros(nzlin)
call l%fix(info)
nzout = l%get_nzeros()
if (rscale_) &
& l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
if (cscale_) &
& l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
if ((diag_ <=1).and.(imin_ == jmin_)) then
call l%set_triangle(.true.)
call l%set_lower(.true.)
end if
else
nzin = u%get_nzeros() ! At this point it should be 0
associate(val =>a%val, ja => a%ja, ia=>a%ia)
loop2: do k=1,nz
i = ia(k)
j = ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)>=diag_) then
nzin = nzin + 1
u%ia(nzin) = i
u%ja(nzin) = ja(k)
u%val(nzin) = val(k)
end if
end if
end do loop2
end associate
call u%set_nzeros(nzin)
end if
call u%fix(info)
nzout = u%get_nzeros()
if (rscale_) &
& u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
if (cscale_) &
& u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
if ((diag_ >= 0).and.(imin_ == jmin_)) then
call u%set_triangle(.true.)
call u%set_lower(.false.)
end if
#endif
if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_d_coo_triu
subroutine psb_d_fix_coo(a,info,idir) subroutine psb_d_fix_coo(a,info,idir)
use psb_const_mod use psb_const_mod
@ -4563,7 +5157,7 @@ function psb_ld_coo_maxval(a) result(res)
#if defined(OPENMP) #if defined(OPENMP)
block block
integer(psb_ipk_) :: i integer(psb_ipk_) :: i
!$omp parallel do private(i) !$omp parallel do private(i) reduction(max:res)
do i=1, nnz do i=1, nnz
res = max(res,abs(a%val(i))) res = max(res,abs(a%val(i)))
end do end do
@ -4630,7 +5224,7 @@ function psb_ld_coo_csnmi(a) result(res)
#if defined(OPENMP) #if defined(OPENMP)
block block
integer(psb_ipk_) :: i integer(psb_ipk_) :: i
!$omp parallel do private(i) !$omp parallel do private(i) reduction(max:res)
do i=1, m do i=1, m
res = max(res,abs(vt(i))) res = max(res,abs(vt(i)))
end do end do
@ -4680,7 +5274,7 @@ function psb_ld_coo_csnm1(a) result(res)
#if defined(OPENMP) #if defined(OPENMP)
block block
integer(psb_ipk_) :: i integer(psb_ipk_) :: i
!$omp parallel do private(i) !$omp parallel do private(i) reduction(max:res)
do i=1, n do i=1, n
res = max(res,abs(vt(i))) res = max(res,abs(vt(i)))
end do end do

@ -2329,9 +2329,28 @@ subroutine psb_d_cp_csc_to_fmt(a,b,info)
b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat
nc = a%get_ncols() nc = a%get_ncols()
nz = a%get_nzeros() nz = a%get_nzeros()
if (.false.) then
if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info) if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info)
if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info) if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info)
if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info) if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nc+1,b%icp,info)
call psb_realloc(nz,b%ia,info)
call psb_realloc(nz,b%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nc+1
b%icp(i)=a%icp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
b%ia(j) = a%ia(j)
b%val(j) = a%val(j)
end do
!$omp end parallel do
end if
call b%set_host() call b%set_host()
class default class default
@ -2443,9 +2462,27 @@ subroutine psb_d_cp_csc_from_fmt(a,b,info)
a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat
nc = b%get_ncols() nc = b%get_ncols()
nz = b%get_nzeros() nz = b%get_nzeros()
if (.false.) then
if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info) if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info)
if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info) if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info)
if (info == 0) call psb_safe_cpy( b%val(1:nz), a%val , info) if (info == 0) call psb_safe_cpy( b%val(1:nz), a%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nc+1,a%icp,info)
call psb_realloc(nz,a%ia,info)
call psb_realloc(nz,a%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nc+1
a%icp(i)=b%icp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
a%ia(j)=b%ia(j)
a%val(j)=b%val(j)
end do
!$omp end parallel do
end if
call a%set_host() call a%set_host()
class default class default

@ -2289,7 +2289,155 @@ subroutine psb_d_csr_tril(a,l,info,&
nb = jmax_ nb = jmax_
endif endif
#if defined(OPENMP)
block
integer(psb_ipk_), allocatable :: lrws(:),urws(:)
integer(psb_ipk_) :: lpnt, upnt, lnz, unz
call psb_realloc(mb,lrws,info)
!$omp workshare
lrws(:) = 0
!$omp end workshare
nz = a%get_nzeros()
call l%allocate(mb,nb,nz)
!write(0,*) 'Invocation of COO%TRIL', present(u),nz
if (present(u)) then
nzlin = l%get_nzeros() ! At this point it should be 0
call u%allocate(mb,nb,nz)
nzuin = u%get_nzeros() ! At this point it should be 0
if (info == 0) call psb_realloc(mb,urws,info)
!$omp workshare
urws(:) = 0
!$omp end workshare
!write(0,*) 'omp version of COO%TRIL/TRIU'
lnz = 0
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws,urws) reduction(+: lnz,unz)
loop1: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
else
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do
end do loop1
!$omp end parallel do
call psi_exscan(mb,lrws,info)
call psi_exscan(mb,urws,info)
!write(0,*) lrws(:), urws(:)
!$omp parallel do private(i,j,k,lpnt,upnt) shared(imin_,imax_,a)
loop2: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = i
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
else
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = i
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do
end do loop2
!$omp end parallel do
!write(0,*) 'End of copyout',lnz,unz
call l%set_nzeros(lnz)
call l%fix(info)
call u%set_nzeros(unz)
call u%fix(info)
nzout = u%get_nzeros()
if (rscale_) then
!$omp workshare
u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ >=-1).and.(imin_ == jmin_)) then
call u%set_triangle(.true.)
call u%set_lower(.false.)
end if
else
lnz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws) reduction(+: lnz)
loop3: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
end if
end if
end do
end do loop3
!$omp end parallel do
call psi_exscan(mb,lrws,info)
!$omp parallel do private(i,j,k,lpnt) shared(imin_,imax_,a)
loop4: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = i
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
end if
end if
end do
end do loop4
!$omp end parallel do
call l%set_nzeros(lnz)
call l%fix(info)
end if
nzout = l%get_nzeros()
if (rscale_) then
!$omp workshare
l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ <= 0).and.(imin_ == jmin_)) then
call l%set_triangle(.true.)
call l%set_lower(.true.)
end if
end block
#else
nz = a%get_nzeros() nz = a%get_nzeros()
call l%allocate(mb,nb,nz) call l%allocate(mb,nb,nz)
@ -2359,7 +2507,7 @@ subroutine psb_d_csr_tril(a,l,info,&
call l%set_triangle(.true.) call l%set_triangle(.true.)
call l%set_lower(.true.) call l%set_lower(.true.)
end if end if
#endif
if (info /= psb_success_) goto 9999 if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
@ -2443,6 +2591,158 @@ subroutine psb_d_csr_triu(a,u,info,&
endif endif
#if defined(OPENMP)
block
integer(psb_ipk_), allocatable :: lrws(:),urws(:)
integer(psb_ipk_) :: lpnt, upnt, lnz, unz
call psb_realloc(mb,urws,info)
!$omp workshare
urws(:) = 0
!$omp end workshare
nz = a%get_nzeros()
call u%allocate(mb,nb,nz)
!write(0,*) 'Invocation of COO%TRIL', present(u),nz
if (present(l)) then
nzuin = u%get_nzeros() ! At this point it should be 0
call l%allocate(mb,nb,nz)
nzlin = l%get_nzeros() ! At this point it should be 0
if (info == 0) call psb_realloc(mb,urws,info)
!$omp workshare
lrws(:) = 0
!$omp end workshare
!write(0,*) 'omp version of COO%TRIL/TRIU'
lnz = 0
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws,urws) reduction(+: lnz,unz)
loop1: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
else
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do
end do loop1
!$omp end parallel do
call psi_exscan(mb,lrws,info)
call psi_exscan(mb,urws,info)
!write(0,*) lrws(:), urws(:)
!$omp parallel do private(i,j,k,lpnt,upnt) shared(imin_,imax_,a)
loop2: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = i
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
else
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = i
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do
end do loop2
!$omp end parallel do
!write(0,*) 'End of copyout',lnz,unz
call l%set_nzeros(lnz)
call l%fix(info)
call u%set_nzeros(unz)
call u%fix(info)
nzout = l%get_nzeros()
if (rscale_) then
!$omp workshare
l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ <=-1).and.(imin_ == jmin_)) then
call l%set_triangle(.true.)
call l%set_lower(.false.)
end if
else
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,urws) reduction(+: unz)
loop3: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)>=diag_) then
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do
end do loop3
!$omp end parallel do
call psi_exscan(mb,urws,info)
!$omp parallel do private(i,j,k,upnt) shared(imin_,imax_,a)
loop4: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)>=diag_) then
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = i
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do
end do loop4
!$omp end parallel do
call u%set_nzeros(unz)
call u%fix(info)
end if
nzout = u%get_nzeros()
if (rscale_) then
!$omp workshare
u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ >= 0).and.(imin_ == jmin_)) then
call u%set_triangle(.true.)
call u%set_upper(.true.)
end if
end block
#else
nz = a%get_nzeros() nz = a%get_nzeros()
call u%allocate(mb,nb,nz) call u%allocate(mb,nb,nz)
@ -2511,7 +2811,7 @@ subroutine psb_d_csr_triu(a,u,info,&
call u%set_triangle(.true.) call u%set_triangle(.true.)
call u%set_upper(.true.) call u%set_upper(.true.)
end if end if
#endif
if (info /= psb_success_) goto 9999 if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
@ -3190,9 +3490,28 @@ subroutine psb_d_cp_csr_to_fmt(a,b,info)
b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat
nr = a%get_nrows() nr = a%get_nrows()
nz = a%get_nzeros() nz = a%get_nzeros()
if (.false.) then
if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info) if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info)
if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info) if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info)
if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info) if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nr+1,b%irp,info)
call psb_realloc(nz,b%ja,info)
call psb_realloc(nz,b%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nr+1
b%irp(i)=a%irp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
b%ja(j) = a%ja(j)
b%val(j) = a%val(j)
end do
!$omp end parallel do
end if
call b%set_host() call b%set_host()
class default class default
@ -3276,9 +3595,27 @@ subroutine psb_d_cp_csr_from_fmt(a,b,info)
a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat
nr = b%get_nrows() nr = b%get_nrows()
nz = b%get_nzeros() nz = b%get_nzeros()
if (.false.) then
if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info) if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info)
if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info) if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info)
if (info == 0) call psb_safe_cpy( b%val(1:nz) , a%val , info) if (info == 0) call psb_safe_cpy( b%val(1:nz) , a%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nr+1,a%irp,info)
call psb_realloc(nz,a%ja,info)
call psb_realloc(nz,a%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nr+1
a%irp(i)=b%irp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
a%ja(j)=b%ja(j)
a%val(j)=b%val(j)
end do
!$omp end parallel do
end if
call a%set_host() call a%set_host()
class default class default

@ -2864,7 +2864,9 @@ subroutine psb_s_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
if (nz == 0) return if (nz == 0) return
if (a%is_bld()) then if (a%is_bld()) then
! Structure here is peculiar, because this function can be called
! either within a parallel region, or outside.
! Hence the call to set_nzeros done here.
!$omp critical !$omp critical
nza = a%get_nzeros() nza = a%get_nzeros()
isza = a%get_size() isza = a%get_size()
@ -2879,9 +2881,6 @@ subroutine psb_s_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
nzaold = nza nzaold = nza
nza = nza + nz nza = nza + nz
call a%set_nzeros(nza) call a%set_nzeros(nza)
#if defined(OPENMP)
!write(0,*) 'From thread ',omp_get_thread_num(),nzaold,nz,nza,a%get_nzeros()
#endif
end if end if
!$omp end critical !$omp end critical
if (info /= 0) goto 9999 if (info /= 0) goto 9999
@ -2960,8 +2959,7 @@ contains
end if end if
end do end do
!$OMP END PARALLEL DO !$OMP END PARALLEL DO
nza = nza + nz
!nza = nza + nz
#else #else
do i=1, nz do i=1, nz
ir = ia(i) ir = ia(i)
@ -3489,6 +3487,602 @@ subroutine psb_s_coo_mv_from(a,b)
end subroutine psb_s_coo_mv_from end subroutine psb_s_coo_mv_from
!
! COO implementation of tril/triu
!
subroutine psb_s_coo_tril(a,l,info,&
& diag,imin,imax,jmin,jmax,rscale,cscale,u)
! Output is always in COO format
use psb_error_mod
use psb_const_mod
use psb_s_base_mat_mod, psb_protect_name => psb_s_coo_tril
implicit none
class(psb_s_coo_sparse_mat), intent(in) :: a
class(psb_s_coo_sparse_mat), intent(out) :: l
integer(psb_ipk_),intent(out) :: info
integer(psb_ipk_), intent(in), optional :: diag,imin,imax,jmin,jmax
logical, intent(in), optional :: rscale,cscale
class(psb_s_coo_sparse_mat), optional, intent(out) :: u
integer(psb_ipk_) :: err_act, nzin, nzout, i, j, k
integer(psb_ipk_) :: imin_, imax_, jmin_, jmax_, mb,nb, diag_, nzlin, nzuin, nz
character(len=20) :: name='tril'
logical :: rscale_, cscale_
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
if (present(diag)) then
diag_ = diag
else
diag_ = 0
end if
if (present(imin)) then
imin_ = imin
else
imin_ = 1
end if
if (present(imax)) then
imax_ = imax
else
imax_ = a%get_nrows()
end if
if (present(jmin)) then
jmin_ = jmin
else
jmin_ = 1
end if
if (present(jmax)) then
jmax_ = jmax
else
jmax_ = a%get_ncols()
end if
if (present(rscale)) then
rscale_ = rscale
else
rscale_ = .true.
end if
if (present(cscale)) then
cscale_ = cscale
else
cscale_ = .true.
end if
if (rscale_) then
mb = imax_ - imin_ +1
else
mb = imax_
endif
if (cscale_) then
nb = jmax_ - jmin_ +1
else
nb = jmax_
endif
#if defined(OPENMP)
block
integer(psb_ipk_), allocatable :: lrws(:),urws(:)
integer(psb_ipk_) :: lpnt, upnt, lnz, unz
call psb_realloc(mb,lrws,info)
!$omp workshare
lrws(:) = 0
!$omp end workshare
nz = a%get_nzeros()
call l%allocate(mb,nb,nz)
!write(0,*) 'Invocation of COO%TRIL', present(u),nz
if (present(u)) then
nzlin = l%get_nzeros() ! At this point it should be 0
call u%allocate(mb,nb,nz)
nzuin = u%get_nzeros() ! At this point it should be 0
if (info == 0) call psb_realloc(mb,urws,info)
!$omp workshare
urws(:) = 0
!$omp end workshare
!write(0,*) 'omp version of COO%TRIL/TRIU'
lnz = 0
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws,urws) reduction(+: lnz,unz)
loop1: do k=1,nz
i = a%ia(k)
j = a%ja(k)
if ((i>=imin_).and.(i<=imax_).and.(jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
else
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do loop1
!$omp end parallel do
call psi_exscan(mb,lrws,info)
call psi_exscan(mb,urws,info)
!write(0,*) lrws(:), urws(:)
!$omp parallel do private(i,j,k,lpnt,upnt) shared(imin_,imax_,a)
loop2: do k=1,nz
i = a%ia(k)
j = a%ja(k)
if ((i>=imin_).and.(i<=imax_).and.(jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = a%ia(k)
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
else
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = a%ia(k)
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do loop2
!$omp end parallel do
!write(0,*) 'End of copyout',lnz,unz
call l%set_nzeros(lnz)
call l%fix(info)
call u%set_nzeros(unz)
call u%fix(info)
nzout = u%get_nzeros()
if (rscale_) then
!$omp workshare
u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ >=-1).and.(imin_ == jmin_)) then
call u%set_triangle(.true.)
call u%set_lower(.false.)
end if
else
lnz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws) reduction(+: lnz)
loop3: do k=1,nz
i = a%ia(k)
j = a%ja(k)
if ((i>=imin_).and.(i<=imax_).and.(jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
end if
end if
end do loop3
!$omp end parallel do
call psi_exscan(mb,lrws,info)
!$omp parallel do private(i,j,k,lpnt) shared(imin_,imax_,a)
loop4: do k=1,nz
i = a%ia(k)
j = a%ja(k)
if ((i>=imin_).and.(i<=imax_).and.(jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = a%ia(k)
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
end if
end if
end do loop4
!$omp end parallel do
call l%set_nzeros(lnz)
call l%fix(info)
end if
nzout = l%get_nzeros()
if (rscale_) then
!$omp workshare
l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ <= 0).and.(imin_ == jmin_)) then
call l%set_triangle(.true.)
call l%set_lower(.true.)
end if
end block
#else
nz = a%get_nzeros()
call l%allocate(mb,nb,nz)
if (present(u)) then
nzlin = l%get_nzeros() ! At this point it should be 0
call u%allocate(mb,nb,nz)
nzuin = u%get_nzeros() ! At this point it should be 0
associate(val =>a%val, ja => a%ja, ia=>a%ia)
loop1: do k=1,nz
i = ia(k)
j = ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
nzlin = nzlin + 1
l%ia(nzlin) = i
l%ja(nzlin) = ja(k)
l%val(nzlin) = val(k)
else
nzuin = nzuin + 1
u%ia(nzuin) = i
u%ja(nzuin) = ja(k)
u%val(nzuin) = val(k)
end if
end if
end do loop1
end associate
call l%set_nzeros(nzlin)
call u%set_nzeros(nzuin)
call u%fix(info)
nzout = u%get_nzeros()
if (rscale_) &
& u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
if (cscale_) &
& u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
if ((diag_ >=-1).and.(imin_ == jmin_)) then
call u%set_triangle(.true.)
call u%set_lower(.false.)
end if
else
nzin = l%get_nzeros() ! At this point it should be 0
associate(val =>a%val, ja => a%ja, ia=>a%ia)
loop2: do k=1,nz
i = ia(k)
j = ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
nzin = nzin + 1
l%ia(nzin) = i
l%ja(nzin) = ja(k)
l%val(nzin) = val(k)
end if
end if
end do loop2
end associate
call l%set_nzeros(nzin)
end if
call l%fix(info)
nzout = l%get_nzeros()
if (rscale_) &
& l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
if (cscale_) &
& l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
if ((diag_ <= 0).and.(imin_ == jmin_)) then
call l%set_triangle(.true.)
call l%set_lower(.true.)
end if
#endif
if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_coo_tril
subroutine psb_s_coo_triu(a,u,info,&
& diag,imin,imax,jmin,jmax,rscale,cscale,l)
! Output is always in COO format
use psb_error_mod
use psb_const_mod
use psb_s_base_mat_mod, psb_protect_name => psb_s_coo_triu
implicit none
class(psb_s_coo_sparse_mat), intent(in) :: a
class(psb_s_coo_sparse_mat), intent(out) :: u
integer(psb_ipk_),intent(out) :: info
integer(psb_ipk_), intent(in), optional :: diag,imin,imax,jmin,jmax
logical, intent(in), optional :: rscale,cscale
class(psb_s_coo_sparse_mat), optional, intent(out) :: l
integer(psb_ipk_) :: err_act, nzin, nzout, i, j, k
integer(psb_ipk_) :: imin_, imax_, jmin_, jmax_, mb,nb, diag_, nzlin, nzuin, nz
character(len=20) :: name='triu'
logical :: rscale_, cscale_
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
if (present(diag)) then
diag_ = diag
else
diag_ = 0
end if
if (present(imin)) then
imin_ = imin
else
imin_ = 1
end if
if (present(imax)) then
imax_ = imax
else
imax_ = a%get_nrows()
end if
if (present(jmin)) then
jmin_ = jmin
else
jmin_ = 1
end if
if (present(jmax)) then
jmax_ = jmax
else
jmax_ = a%get_ncols()
end if
if (present(rscale)) then
rscale_ = rscale
else
rscale_ = .true.
end if
if (present(cscale)) then
cscale_ = cscale
else
cscale_ = .true.
end if
if (rscale_) then
mb = imax_ - imin_ +1
else
mb = imax_
endif
if (cscale_) then
nb = jmax_ - jmin_ +1
else
nb = jmax_
endif
#if defined(OPENMP)
block
integer(psb_ipk_), allocatable :: lrws(:),urws(:)
integer(psb_ipk_) :: lpnt, upnt, lnz, unz
call psb_realloc(mb,urws,info)
!$omp workshare
urws(:) = 0
!$omp end workshare
nz = a%get_nzeros()
call u%allocate(mb,nb,nz)
!write(0,*) 'Invocation of COO%TRIL', present(u),nz
if (present(l)) then
nzuin = u%get_nzeros() ! At this point it should be 0
call l%allocate(mb,nb,nz)
nzlin = l%get_nzeros() ! At this point it should be 0
if (info == 0) call psb_realloc(mb,urws,info)
!$omp workshare
lrws(:) = 0
!$omp end workshare
!write(0,*) 'omp version of COO%TRIL/TRIU'
lnz = 0
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws,urws) reduction(+: lnz,unz)
loop1: do k=1,nz
i = a%ia(k)
j = a%ja(k)
if ((i>=imin_).and.(i<=imax_).and.(jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
else
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do loop1
!$omp end parallel do
call psi_exscan(mb,lrws,info)
call psi_exscan(mb,urws,info)
!write(0,*) lrws(:), urws(:)
!$omp parallel do private(i,j,k,lpnt,upnt) shared(imin_,imax_,a)
loop2: do k=1,nz
i = a%ia(k)
j = a%ja(k)
if ((i>=imin_).and.(i<=imax_).and.(jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = a%ia(k)
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
else
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = a%ia(k)
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do loop2
!$omp end parallel do
!write(0,*) 'End of copyout',lnz,unz
call l%set_nzeros(lnz)
call l%fix(info)
call u%set_nzeros(unz)
call u%fix(info)
nzout = l%get_nzeros()
if (rscale_) then
!$omp workshare
l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ <=-1).and.(imin_ == jmin_)) then
call l%set_triangle(.true.)
call l%set_lower(.false.)
end if
else
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,urws) reduction(+: unz)
loop3: do k=1,nz
i = a%ia(k)
j = a%ja(k)
if ((i>=imin_).and.(i<=imax_).and.(jmin_<=j).and.(j<=jmax_)) then
if ((j-i)>=diag_) then
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do loop3
!$omp end parallel do
call psi_exscan(mb,urws,info)
!$omp parallel do private(i,j,k,upnt) shared(imin_,imax_,a)
loop4: do k=1,nz
i = a%ia(k)
j = a%ja(k)
if ((i>=imin_).and.(i<=imax_).and.(jmin_<=j).and.(j<=jmax_)) then
if ((j-i)>=diag_) then
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = a%ia(k)
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do loop4
!$omp end parallel do
call u%set_nzeros(unz)
call u%fix(info)
end if
nzout = u%get_nzeros()
if (rscale_) then
!$omp workshare
u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ >= 0).and.(imin_ == jmin_)) then
call u%set_triangle(.true.)
call u%set_upper(.true.)
end if
end block
#else
nz = a%get_nzeros()
call u%allocate(mb,nb,nz)
if (present(l)) then
nzuin = u%get_nzeros() ! At this point it should be 0
call l%allocate(mb,nb,nz)
nzlin = l%get_nzeros() ! At this point it should be 0
associate(val =>a%val, ja => a%ja, ia=>a%ia)
loop1: do k=1,nz
i = ia(k)
j = ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<diag_) then
nzlin = nzlin + 1
l%ia(nzlin) = i
l%ja(nzlin) = ja(k)
l%val(nzlin) = val(k)
else
nzuin = nzuin + 1
u%ia(nzuin) = i
u%ja(nzuin) = ja(k)
u%val(nzuin) = val(k)
end if
end if
end do loop1
end associate
call u%set_nzeros(nzuin)
call l%set_nzeros(nzlin)
call l%fix(info)
nzout = l%get_nzeros()
if (rscale_) &
& l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
if (cscale_) &
& l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
if ((diag_ <=1).and.(imin_ == jmin_)) then
call l%set_triangle(.true.)
call l%set_lower(.true.)
end if
else
nzin = u%get_nzeros() ! At this point it should be 0
associate(val =>a%val, ja => a%ja, ia=>a%ia)
loop2: do k=1,nz
i = ia(k)
j = ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)>=diag_) then
nzin = nzin + 1
u%ia(nzin) = i
u%ja(nzin) = ja(k)
u%val(nzin) = val(k)
end if
end if
end do loop2
end associate
call u%set_nzeros(nzin)
end if
call u%fix(info)
nzout = u%get_nzeros()
if (rscale_) &
& u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
if (cscale_) &
& u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
if ((diag_ >= 0).and.(imin_ == jmin_)) then
call u%set_triangle(.true.)
call u%set_lower(.false.)
end if
#endif
if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_coo_triu
subroutine psb_s_fix_coo(a,info,idir) subroutine psb_s_fix_coo(a,info,idir)
use psb_const_mod use psb_const_mod
@ -4563,7 +5157,7 @@ function psb_ls_coo_maxval(a) result(res)
#if defined(OPENMP) #if defined(OPENMP)
block block
integer(psb_ipk_) :: i integer(psb_ipk_) :: i
!$omp parallel do private(i) !$omp parallel do private(i) reduction(max:res)
do i=1, nnz do i=1, nnz
res = max(res,abs(a%val(i))) res = max(res,abs(a%val(i)))
end do end do
@ -4630,7 +5224,7 @@ function psb_ls_coo_csnmi(a) result(res)
#if defined(OPENMP) #if defined(OPENMP)
block block
integer(psb_ipk_) :: i integer(psb_ipk_) :: i
!$omp parallel do private(i) !$omp parallel do private(i) reduction(max:res)
do i=1, m do i=1, m
res = max(res,abs(vt(i))) res = max(res,abs(vt(i)))
end do end do
@ -4680,7 +5274,7 @@ function psb_ls_coo_csnm1(a) result(res)
#if defined(OPENMP) #if defined(OPENMP)
block block
integer(psb_ipk_) :: i integer(psb_ipk_) :: i
!$omp parallel do private(i) !$omp parallel do private(i) reduction(max:res)
do i=1, n do i=1, n
res = max(res,abs(vt(i))) res = max(res,abs(vt(i)))
end do end do

@ -2329,9 +2329,28 @@ subroutine psb_s_cp_csc_to_fmt(a,b,info)
b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat
nc = a%get_ncols() nc = a%get_ncols()
nz = a%get_nzeros() nz = a%get_nzeros()
if (.false.) then
if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info) if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info)
if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info) if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info)
if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info) if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nc+1,b%icp,info)
call psb_realloc(nz,b%ia,info)
call psb_realloc(nz,b%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nc+1
b%icp(i)=a%icp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
b%ia(j) = a%ia(j)
b%val(j) = a%val(j)
end do
!$omp end parallel do
end if
call b%set_host() call b%set_host()
class default class default
@ -2443,9 +2462,27 @@ subroutine psb_s_cp_csc_from_fmt(a,b,info)
a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat
nc = b%get_ncols() nc = b%get_ncols()
nz = b%get_nzeros() nz = b%get_nzeros()
if (.false.) then
if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info) if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info)
if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info) if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info)
if (info == 0) call psb_safe_cpy( b%val(1:nz), a%val , info) if (info == 0) call psb_safe_cpy( b%val(1:nz), a%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nc+1,a%icp,info)
call psb_realloc(nz,a%ia,info)
call psb_realloc(nz,a%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nc+1
a%icp(i)=b%icp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
a%ia(j)=b%ia(j)
a%val(j)=b%val(j)
end do
!$omp end parallel do
end if
call a%set_host() call a%set_host()
class default class default

@ -2289,7 +2289,155 @@ subroutine psb_s_csr_tril(a,l,info,&
nb = jmax_ nb = jmax_
endif endif
#if defined(OPENMP)
block
integer(psb_ipk_), allocatable :: lrws(:),urws(:)
integer(psb_ipk_) :: lpnt, upnt, lnz, unz
call psb_realloc(mb,lrws,info)
!$omp workshare
lrws(:) = 0
!$omp end workshare
nz = a%get_nzeros()
call l%allocate(mb,nb,nz)
!write(0,*) 'Invocation of COO%TRIL', present(u),nz
if (present(u)) then
nzlin = l%get_nzeros() ! At this point it should be 0
call u%allocate(mb,nb,nz)
nzuin = u%get_nzeros() ! At this point it should be 0
if (info == 0) call psb_realloc(mb,urws,info)
!$omp workshare
urws(:) = 0
!$omp end workshare
!write(0,*) 'omp version of COO%TRIL/TRIU'
lnz = 0
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws,urws) reduction(+: lnz,unz)
loop1: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
else
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do
end do loop1
!$omp end parallel do
call psi_exscan(mb,lrws,info)
call psi_exscan(mb,urws,info)
!write(0,*) lrws(:), urws(:)
!$omp parallel do private(i,j,k,lpnt,upnt) shared(imin_,imax_,a)
loop2: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = i
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
else
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = i
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do
end do loop2
!$omp end parallel do
!write(0,*) 'End of copyout',lnz,unz
call l%set_nzeros(lnz)
call l%fix(info)
call u%set_nzeros(unz)
call u%fix(info)
nzout = u%get_nzeros()
if (rscale_) then
!$omp workshare
u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ >=-1).and.(imin_ == jmin_)) then
call u%set_triangle(.true.)
call u%set_lower(.false.)
end if
else
lnz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws) reduction(+: lnz)
loop3: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
end if
end if
end do
end do loop3
!$omp end parallel do
call psi_exscan(mb,lrws,info)
!$omp parallel do private(i,j,k,lpnt) shared(imin_,imax_,a)
loop4: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = i
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
end if
end if
end do
end do loop4
!$omp end parallel do
call l%set_nzeros(lnz)
call l%fix(info)
end if
nzout = l%get_nzeros()
if (rscale_) then
!$omp workshare
l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ <= 0).and.(imin_ == jmin_)) then
call l%set_triangle(.true.)
call l%set_lower(.true.)
end if
end block
#else
nz = a%get_nzeros() nz = a%get_nzeros()
call l%allocate(mb,nb,nz) call l%allocate(mb,nb,nz)
@ -2359,7 +2507,7 @@ subroutine psb_s_csr_tril(a,l,info,&
call l%set_triangle(.true.) call l%set_triangle(.true.)
call l%set_lower(.true.) call l%set_lower(.true.)
end if end if
#endif
if (info /= psb_success_) goto 9999 if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
@ -2443,6 +2591,158 @@ subroutine psb_s_csr_triu(a,u,info,&
endif endif
#if defined(OPENMP)
block
integer(psb_ipk_), allocatable :: lrws(:),urws(:)
integer(psb_ipk_) :: lpnt, upnt, lnz, unz
call psb_realloc(mb,urws,info)
!$omp workshare
urws(:) = 0
!$omp end workshare
nz = a%get_nzeros()
call u%allocate(mb,nb,nz)
!write(0,*) 'Invocation of COO%TRIL', present(u),nz
if (present(l)) then
nzuin = u%get_nzeros() ! At this point it should be 0
call l%allocate(mb,nb,nz)
nzlin = l%get_nzeros() ! At this point it should be 0
if (info == 0) call psb_realloc(mb,urws,info)
!$omp workshare
lrws(:) = 0
!$omp end workshare
!write(0,*) 'omp version of COO%TRIL/TRIU'
lnz = 0
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws,urws) reduction(+: lnz,unz)
loop1: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
else
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do
end do loop1
!$omp end parallel do
call psi_exscan(mb,lrws,info)
call psi_exscan(mb,urws,info)
!write(0,*) lrws(:), urws(:)
!$omp parallel do private(i,j,k,lpnt,upnt) shared(imin_,imax_,a)
loop2: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = i
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
else
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = i
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do
end do loop2
!$omp end parallel do
!write(0,*) 'End of copyout',lnz,unz
call l%set_nzeros(lnz)
call l%fix(info)
call u%set_nzeros(unz)
call u%fix(info)
nzout = l%get_nzeros()
if (rscale_) then
!$omp workshare
l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ <=-1).and.(imin_ == jmin_)) then
call l%set_triangle(.true.)
call l%set_lower(.false.)
end if
else
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,urws) reduction(+: unz)
loop3: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)>=diag_) then
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do
end do loop3
!$omp end parallel do
call psi_exscan(mb,urws,info)
!$omp parallel do private(i,j,k,upnt) shared(imin_,imax_,a)
loop4: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)>=diag_) then
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = i
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do
end do loop4
!$omp end parallel do
call u%set_nzeros(unz)
call u%fix(info)
end if
nzout = u%get_nzeros()
if (rscale_) then
!$omp workshare
u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ >= 0).and.(imin_ == jmin_)) then
call u%set_triangle(.true.)
call u%set_upper(.true.)
end if
end block
#else
nz = a%get_nzeros() nz = a%get_nzeros()
call u%allocate(mb,nb,nz) call u%allocate(mb,nb,nz)
@ -2511,7 +2811,7 @@ subroutine psb_s_csr_triu(a,u,info,&
call u%set_triangle(.true.) call u%set_triangle(.true.)
call u%set_upper(.true.) call u%set_upper(.true.)
end if end if
#endif
if (info /= psb_success_) goto 9999 if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
@ -3190,9 +3490,28 @@ subroutine psb_s_cp_csr_to_fmt(a,b,info)
b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat
nr = a%get_nrows() nr = a%get_nrows()
nz = a%get_nzeros() nz = a%get_nzeros()
if (.false.) then
if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info) if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info)
if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info) if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info)
if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info) if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nr+1,b%irp,info)
call psb_realloc(nz,b%ja,info)
call psb_realloc(nz,b%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nr+1
b%irp(i)=a%irp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
b%ja(j) = a%ja(j)
b%val(j) = a%val(j)
end do
!$omp end parallel do
end if
call b%set_host() call b%set_host()
class default class default
@ -3276,9 +3595,27 @@ subroutine psb_s_cp_csr_from_fmt(a,b,info)
a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat
nr = b%get_nrows() nr = b%get_nrows()
nz = b%get_nzeros() nz = b%get_nzeros()
if (.false.) then
if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info) if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info)
if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info) if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info)
if (info == 0) call psb_safe_cpy( b%val(1:nz) , a%val , info) if (info == 0) call psb_safe_cpy( b%val(1:nz) , a%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nr+1,a%irp,info)
call psb_realloc(nz,a%ja,info)
call psb_realloc(nz,a%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nr+1
a%irp(i)=b%irp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
a%ja(j)=b%ja(j)
a%val(j)=b%val(j)
end do
!$omp end parallel do
end if
call a%set_host() call a%set_host()
class default class default

@ -2864,7 +2864,9 @@ subroutine psb_z_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
if (nz == 0) return if (nz == 0) return
if (a%is_bld()) then if (a%is_bld()) then
! Structure here is peculiar, because this function can be called
! either within a parallel region, or outside.
! Hence the call to set_nzeros done here.
!$omp critical !$omp critical
nza = a%get_nzeros() nza = a%get_nzeros()
isza = a%get_size() isza = a%get_size()
@ -2879,9 +2881,6 @@ subroutine psb_z_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
nzaold = nza nzaold = nza
nza = nza + nz nza = nza + nz
call a%set_nzeros(nza) call a%set_nzeros(nza)
#if defined(OPENMP)
!write(0,*) 'From thread ',omp_get_thread_num(),nzaold,nz,nza,a%get_nzeros()
#endif
end if end if
!$omp end critical !$omp end critical
if (info /= 0) goto 9999 if (info /= 0) goto 9999
@ -2960,8 +2959,7 @@ contains
end if end if
end do end do
!$OMP END PARALLEL DO !$OMP END PARALLEL DO
nza = nza + nz
!nza = nza + nz
#else #else
do i=1, nz do i=1, nz
ir = ia(i) ir = ia(i)
@ -3489,6 +3487,602 @@ subroutine psb_z_coo_mv_from(a,b)
end subroutine psb_z_coo_mv_from end subroutine psb_z_coo_mv_from
!
! COO implementation of tril/triu
!
subroutine psb_z_coo_tril(a,l,info,&
& diag,imin,imax,jmin,jmax,rscale,cscale,u)
! Output is always in COO format
use psb_error_mod
use psb_const_mod
use psb_z_base_mat_mod, psb_protect_name => psb_z_coo_tril
implicit none
class(psb_z_coo_sparse_mat), intent(in) :: a
class(psb_z_coo_sparse_mat), intent(out) :: l
integer(psb_ipk_),intent(out) :: info
integer(psb_ipk_), intent(in), optional :: diag,imin,imax,jmin,jmax
logical, intent(in), optional :: rscale,cscale
class(psb_z_coo_sparse_mat), optional, intent(out) :: u
integer(psb_ipk_) :: err_act, nzin, nzout, i, j, k
integer(psb_ipk_) :: imin_, imax_, jmin_, jmax_, mb,nb, diag_, nzlin, nzuin, nz
character(len=20) :: name='tril'
logical :: rscale_, cscale_
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
if (present(diag)) then
diag_ = diag
else
diag_ = 0
end if
if (present(imin)) then
imin_ = imin
else
imin_ = 1
end if
if (present(imax)) then
imax_ = imax
else
imax_ = a%get_nrows()
end if
if (present(jmin)) then
jmin_ = jmin
else
jmin_ = 1
end if
if (present(jmax)) then
jmax_ = jmax
else
jmax_ = a%get_ncols()
end if
if (present(rscale)) then
rscale_ = rscale
else
rscale_ = .true.
end if
if (present(cscale)) then
cscale_ = cscale
else
cscale_ = .true.
end if
if (rscale_) then
mb = imax_ - imin_ +1
else
mb = imax_
endif
if (cscale_) then
nb = jmax_ - jmin_ +1
else
nb = jmax_
endif
#if defined(OPENMP)
block
integer(psb_ipk_), allocatable :: lrws(:),urws(:)
integer(psb_ipk_) :: lpnt, upnt, lnz, unz
call psb_realloc(mb,lrws,info)
!$omp workshare
lrws(:) = 0
!$omp end workshare
nz = a%get_nzeros()
call l%allocate(mb,nb,nz)
!write(0,*) 'Invocation of COO%TRIL', present(u),nz
if (present(u)) then
nzlin = l%get_nzeros() ! At this point it should be 0
call u%allocate(mb,nb,nz)
nzuin = u%get_nzeros() ! At this point it should be 0
if (info == 0) call psb_realloc(mb,urws,info)
!$omp workshare
urws(:) = 0
!$omp end workshare
!write(0,*) 'omp version of COO%TRIL/TRIU'
lnz = 0
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws,urws) reduction(+: lnz,unz)
loop1: do k=1,nz
i = a%ia(k)
j = a%ja(k)
if ((i>=imin_).and.(i<=imax_).and.(jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
else
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do loop1
!$omp end parallel do
call psi_exscan(mb,lrws,info)
call psi_exscan(mb,urws,info)
!write(0,*) lrws(:), urws(:)
!$omp parallel do private(i,j,k,lpnt,upnt) shared(imin_,imax_,a)
loop2: do k=1,nz
i = a%ia(k)
j = a%ja(k)
if ((i>=imin_).and.(i<=imax_).and.(jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = a%ia(k)
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
else
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = a%ia(k)
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do loop2
!$omp end parallel do
!write(0,*) 'End of copyout',lnz,unz
call l%set_nzeros(lnz)
call l%fix(info)
call u%set_nzeros(unz)
call u%fix(info)
nzout = u%get_nzeros()
if (rscale_) then
!$omp workshare
u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ >=-1).and.(imin_ == jmin_)) then
call u%set_triangle(.true.)
call u%set_lower(.false.)
end if
else
lnz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws) reduction(+: lnz)
loop3: do k=1,nz
i = a%ia(k)
j = a%ja(k)
if ((i>=imin_).and.(i<=imax_).and.(jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
end if
end if
end do loop3
!$omp end parallel do
call psi_exscan(mb,lrws,info)
!$omp parallel do private(i,j,k,lpnt) shared(imin_,imax_,a)
loop4: do k=1,nz
i = a%ia(k)
j = a%ja(k)
if ((i>=imin_).and.(i<=imax_).and.(jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = a%ia(k)
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
end if
end if
end do loop4
!$omp end parallel do
call l%set_nzeros(lnz)
call l%fix(info)
end if
nzout = l%get_nzeros()
if (rscale_) then
!$omp workshare
l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ <= 0).and.(imin_ == jmin_)) then
call l%set_triangle(.true.)
call l%set_lower(.true.)
end if
end block
#else
nz = a%get_nzeros()
call l%allocate(mb,nb,nz)
if (present(u)) then
nzlin = l%get_nzeros() ! At this point it should be 0
call u%allocate(mb,nb,nz)
nzuin = u%get_nzeros() ! At this point it should be 0
associate(val =>a%val, ja => a%ja, ia=>a%ia)
loop1: do k=1,nz
i = ia(k)
j = ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
nzlin = nzlin + 1
l%ia(nzlin) = i
l%ja(nzlin) = ja(k)
l%val(nzlin) = val(k)
else
nzuin = nzuin + 1
u%ia(nzuin) = i
u%ja(nzuin) = ja(k)
u%val(nzuin) = val(k)
end if
end if
end do loop1
end associate
call l%set_nzeros(nzlin)
call u%set_nzeros(nzuin)
call u%fix(info)
nzout = u%get_nzeros()
if (rscale_) &
& u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
if (cscale_) &
& u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
if ((diag_ >=-1).and.(imin_ == jmin_)) then
call u%set_triangle(.true.)
call u%set_lower(.false.)
end if
else
nzin = l%get_nzeros() ! At this point it should be 0
associate(val =>a%val, ja => a%ja, ia=>a%ia)
loop2: do k=1,nz
i = ia(k)
j = ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
nzin = nzin + 1
l%ia(nzin) = i
l%ja(nzin) = ja(k)
l%val(nzin) = val(k)
end if
end if
end do loop2
end associate
call l%set_nzeros(nzin)
end if
call l%fix(info)
nzout = l%get_nzeros()
if (rscale_) &
& l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
if (cscale_) &
& l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
if ((diag_ <= 0).and.(imin_ == jmin_)) then
call l%set_triangle(.true.)
call l%set_lower(.true.)
end if
#endif
if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_z_coo_tril
subroutine psb_z_coo_triu(a,u,info,&
& diag,imin,imax,jmin,jmax,rscale,cscale,l)
! Output is always in COO format
use psb_error_mod
use psb_const_mod
use psb_z_base_mat_mod, psb_protect_name => psb_z_coo_triu
implicit none
class(psb_z_coo_sparse_mat), intent(in) :: a
class(psb_z_coo_sparse_mat), intent(out) :: u
integer(psb_ipk_),intent(out) :: info
integer(psb_ipk_), intent(in), optional :: diag,imin,imax,jmin,jmax
logical, intent(in), optional :: rscale,cscale
class(psb_z_coo_sparse_mat), optional, intent(out) :: l
integer(psb_ipk_) :: err_act, nzin, nzout, i, j, k
integer(psb_ipk_) :: imin_, imax_, jmin_, jmax_, mb,nb, diag_, nzlin, nzuin, nz
character(len=20) :: name='triu'
logical :: rscale_, cscale_
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
if (present(diag)) then
diag_ = diag
else
diag_ = 0
end if
if (present(imin)) then
imin_ = imin
else
imin_ = 1
end if
if (present(imax)) then
imax_ = imax
else
imax_ = a%get_nrows()
end if
if (present(jmin)) then
jmin_ = jmin
else
jmin_ = 1
end if
if (present(jmax)) then
jmax_ = jmax
else
jmax_ = a%get_ncols()
end if
if (present(rscale)) then
rscale_ = rscale
else
rscale_ = .true.
end if
if (present(cscale)) then
cscale_ = cscale
else
cscale_ = .true.
end if
if (rscale_) then
mb = imax_ - imin_ +1
else
mb = imax_
endif
if (cscale_) then
nb = jmax_ - jmin_ +1
else
nb = jmax_
endif
#if defined(OPENMP)
block
integer(psb_ipk_), allocatable :: lrws(:),urws(:)
integer(psb_ipk_) :: lpnt, upnt, lnz, unz
call psb_realloc(mb,urws,info)
!$omp workshare
urws(:) = 0
!$omp end workshare
nz = a%get_nzeros()
call u%allocate(mb,nb,nz)
!write(0,*) 'Invocation of COO%TRIL', present(u),nz
if (present(l)) then
nzuin = u%get_nzeros() ! At this point it should be 0
call l%allocate(mb,nb,nz)
nzlin = l%get_nzeros() ! At this point it should be 0
if (info == 0) call psb_realloc(mb,urws,info)
!$omp workshare
lrws(:) = 0
!$omp end workshare
!write(0,*) 'omp version of COO%TRIL/TRIU'
lnz = 0
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws,urws) reduction(+: lnz,unz)
loop1: do k=1,nz
i = a%ia(k)
j = a%ja(k)
if ((i>=imin_).and.(i<=imax_).and.(jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
else
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do loop1
!$omp end parallel do
call psi_exscan(mb,lrws,info)
call psi_exscan(mb,urws,info)
!write(0,*) lrws(:), urws(:)
!$omp parallel do private(i,j,k,lpnt,upnt) shared(imin_,imax_,a)
loop2: do k=1,nz
i = a%ia(k)
j = a%ja(k)
if ((i>=imin_).and.(i<=imax_).and.(jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = a%ia(k)
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
else
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = a%ia(k)
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do loop2
!$omp end parallel do
!write(0,*) 'End of copyout',lnz,unz
call l%set_nzeros(lnz)
call l%fix(info)
call u%set_nzeros(unz)
call u%fix(info)
nzout = l%get_nzeros()
if (rscale_) then
!$omp workshare
l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ <=-1).and.(imin_ == jmin_)) then
call l%set_triangle(.true.)
call l%set_lower(.false.)
end if
else
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,urws) reduction(+: unz)
loop3: do k=1,nz
i = a%ia(k)
j = a%ja(k)
if ((i>=imin_).and.(i<=imax_).and.(jmin_<=j).and.(j<=jmax_)) then
if ((j-i)>=diag_) then
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do loop3
!$omp end parallel do
call psi_exscan(mb,urws,info)
!$omp parallel do private(i,j,k,upnt) shared(imin_,imax_,a)
loop4: do k=1,nz
i = a%ia(k)
j = a%ja(k)
if ((i>=imin_).and.(i<=imax_).and.(jmin_<=j).and.(j<=jmax_)) then
if ((j-i)>=diag_) then
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = a%ia(k)
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do loop4
!$omp end parallel do
call u%set_nzeros(unz)
call u%fix(info)
end if
nzout = u%get_nzeros()
if (rscale_) then
!$omp workshare
u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ >= 0).and.(imin_ == jmin_)) then
call u%set_triangle(.true.)
call u%set_upper(.true.)
end if
end block
#else
nz = a%get_nzeros()
call u%allocate(mb,nb,nz)
if (present(l)) then
nzuin = u%get_nzeros() ! At this point it should be 0
call l%allocate(mb,nb,nz)
nzlin = l%get_nzeros() ! At this point it should be 0
associate(val =>a%val, ja => a%ja, ia=>a%ia)
loop1: do k=1,nz
i = ia(k)
j = ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<diag_) then
nzlin = nzlin + 1
l%ia(nzlin) = i
l%ja(nzlin) = ja(k)
l%val(nzlin) = val(k)
else
nzuin = nzuin + 1
u%ia(nzuin) = i
u%ja(nzuin) = ja(k)
u%val(nzuin) = val(k)
end if
end if
end do loop1
end associate
call u%set_nzeros(nzuin)
call l%set_nzeros(nzlin)
call l%fix(info)
nzout = l%get_nzeros()
if (rscale_) &
& l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
if (cscale_) &
& l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
if ((diag_ <=1).and.(imin_ == jmin_)) then
call l%set_triangle(.true.)
call l%set_lower(.true.)
end if
else
nzin = u%get_nzeros() ! At this point it should be 0
associate(val =>a%val, ja => a%ja, ia=>a%ia)
loop2: do k=1,nz
i = ia(k)
j = ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)>=diag_) then
nzin = nzin + 1
u%ia(nzin) = i
u%ja(nzin) = ja(k)
u%val(nzin) = val(k)
end if
end if
end do loop2
end associate
call u%set_nzeros(nzin)
end if
call u%fix(info)
nzout = u%get_nzeros()
if (rscale_) &
& u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
if (cscale_) &
& u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
if ((diag_ >= 0).and.(imin_ == jmin_)) then
call u%set_triangle(.true.)
call u%set_lower(.false.)
end if
#endif
if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_z_coo_triu
subroutine psb_z_fix_coo(a,info,idir) subroutine psb_z_fix_coo(a,info,idir)
use psb_const_mod use psb_const_mod
@ -4563,7 +5157,7 @@ function psb_lz_coo_maxval(a) result(res)
#if defined(OPENMP) #if defined(OPENMP)
block block
integer(psb_ipk_) :: i integer(psb_ipk_) :: i
!$omp parallel do private(i) !$omp parallel do private(i) reduction(max:res)
do i=1, nnz do i=1, nnz
res = max(res,abs(a%val(i))) res = max(res,abs(a%val(i)))
end do end do
@ -4630,7 +5224,7 @@ function psb_lz_coo_csnmi(a) result(res)
#if defined(OPENMP) #if defined(OPENMP)
block block
integer(psb_ipk_) :: i integer(psb_ipk_) :: i
!$omp parallel do private(i) !$omp parallel do private(i) reduction(max:res)
do i=1, m do i=1, m
res = max(res,abs(vt(i))) res = max(res,abs(vt(i)))
end do end do
@ -4680,7 +5274,7 @@ function psb_lz_coo_csnm1(a) result(res)
#if defined(OPENMP) #if defined(OPENMP)
block block
integer(psb_ipk_) :: i integer(psb_ipk_) :: i
!$omp parallel do private(i) !$omp parallel do private(i) reduction(max:res)
do i=1, n do i=1, n
res = max(res,abs(vt(i))) res = max(res,abs(vt(i)))
end do end do

@ -2329,9 +2329,28 @@ subroutine psb_z_cp_csc_to_fmt(a,b,info)
b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat
nc = a%get_ncols() nc = a%get_ncols()
nz = a%get_nzeros() nz = a%get_nzeros()
if (.false.) then
if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info) if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info)
if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info) if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info)
if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info) if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nc+1,b%icp,info)
call psb_realloc(nz,b%ia,info)
call psb_realloc(nz,b%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nc+1
b%icp(i)=a%icp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
b%ia(j) = a%ia(j)
b%val(j) = a%val(j)
end do
!$omp end parallel do
end if
call b%set_host() call b%set_host()
class default class default
@ -2443,9 +2462,27 @@ subroutine psb_z_cp_csc_from_fmt(a,b,info)
a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat
nc = b%get_ncols() nc = b%get_ncols()
nz = b%get_nzeros() nz = b%get_nzeros()
if (.false.) then
if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info) if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info)
if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info) if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info)
if (info == 0) call psb_safe_cpy( b%val(1:nz), a%val , info) if (info == 0) call psb_safe_cpy( b%val(1:nz), a%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nc+1,a%icp,info)
call psb_realloc(nz,a%ia,info)
call psb_realloc(nz,a%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nc+1
a%icp(i)=b%icp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
a%ia(j)=b%ia(j)
a%val(j)=b%val(j)
end do
!$omp end parallel do
end if
call a%set_host() call a%set_host()
class default class default

@ -2289,7 +2289,155 @@ subroutine psb_z_csr_tril(a,l,info,&
nb = jmax_ nb = jmax_
endif endif
#if defined(OPENMP)
block
integer(psb_ipk_), allocatable :: lrws(:),urws(:)
integer(psb_ipk_) :: lpnt, upnt, lnz, unz
call psb_realloc(mb,lrws,info)
!$omp workshare
lrws(:) = 0
!$omp end workshare
nz = a%get_nzeros()
call l%allocate(mb,nb,nz)
!write(0,*) 'Invocation of COO%TRIL', present(u),nz
if (present(u)) then
nzlin = l%get_nzeros() ! At this point it should be 0
call u%allocate(mb,nb,nz)
nzuin = u%get_nzeros() ! At this point it should be 0
if (info == 0) call psb_realloc(mb,urws,info)
!$omp workshare
urws(:) = 0
!$omp end workshare
!write(0,*) 'omp version of COO%TRIL/TRIU'
lnz = 0
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws,urws) reduction(+: lnz,unz)
loop1: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
else
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do
end do loop1
!$omp end parallel do
call psi_exscan(mb,lrws,info)
call psi_exscan(mb,urws,info)
!write(0,*) lrws(:), urws(:)
!$omp parallel do private(i,j,k,lpnt,upnt) shared(imin_,imax_,a)
loop2: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = i
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
else
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = i
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do
end do loop2
!$omp end parallel do
!write(0,*) 'End of copyout',lnz,unz
call l%set_nzeros(lnz)
call l%fix(info)
call u%set_nzeros(unz)
call u%fix(info)
nzout = u%get_nzeros()
if (rscale_) then
!$omp workshare
u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ >=-1).and.(imin_ == jmin_)) then
call u%set_triangle(.true.)
call u%set_lower(.false.)
end if
else
lnz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws) reduction(+: lnz)
loop3: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
end if
end if
end do
end do loop3
!$omp end parallel do
call psi_exscan(mb,lrws,info)
!$omp parallel do private(i,j,k,lpnt) shared(imin_,imax_,a)
loop4: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<=diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = i
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
end if
end if
end do
end do loop4
!$omp end parallel do
call l%set_nzeros(lnz)
call l%fix(info)
end if
nzout = l%get_nzeros()
if (rscale_) then
!$omp workshare
l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ <= 0).and.(imin_ == jmin_)) then
call l%set_triangle(.true.)
call l%set_lower(.true.)
end if
end block
#else
nz = a%get_nzeros() nz = a%get_nzeros()
call l%allocate(mb,nb,nz) call l%allocate(mb,nb,nz)
@ -2359,7 +2507,7 @@ subroutine psb_z_csr_tril(a,l,info,&
call l%set_triangle(.true.) call l%set_triangle(.true.)
call l%set_lower(.true.) call l%set_lower(.true.)
end if end if
#endif
if (info /= psb_success_) goto 9999 if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
@ -2443,6 +2591,158 @@ subroutine psb_z_csr_triu(a,u,info,&
endif endif
#if defined(OPENMP)
block
integer(psb_ipk_), allocatable :: lrws(:),urws(:)
integer(psb_ipk_) :: lpnt, upnt, lnz, unz
call psb_realloc(mb,urws,info)
!$omp workshare
urws(:) = 0
!$omp end workshare
nz = a%get_nzeros()
call u%allocate(mb,nb,nz)
!write(0,*) 'Invocation of COO%TRIL', present(u),nz
if (present(l)) then
nzuin = u%get_nzeros() ! At this point it should be 0
call l%allocate(mb,nb,nz)
nzlin = l%get_nzeros() ! At this point it should be 0
if (info == 0) call psb_realloc(mb,urws,info)
!$omp workshare
lrws(:) = 0
!$omp end workshare
!write(0,*) 'omp version of COO%TRIL/TRIU'
lnz = 0
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws,urws) reduction(+: lnz,unz)
loop1: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<diag_) then
!$omp atomic update
lrws(i-imin_+1) = lrws(i-imin_+1) +1
!$omp end atomic
lnz = lnz + 1
else
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do
end do loop1
!$omp end parallel do
call psi_exscan(mb,lrws,info)
call psi_exscan(mb,urws,info)
!write(0,*) lrws(:), urws(:)
!$omp parallel do private(i,j,k,lpnt,upnt) shared(imin_,imax_,a)
loop2: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)<diag_) then
!$omp atomic capture
lrws(i-imin_+1) = lrws(i-imin_+1) +1
lpnt = lrws(i-imin_+1)
!$omp end atomic
l%ia(lpnt) = i
l%ja(lpnt) = a%ja(k)
l%val(lpnt) = a%val(k)
else
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = i
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do
end do loop2
!$omp end parallel do
!write(0,*) 'End of copyout',lnz,unz
call l%set_nzeros(lnz)
call l%fix(info)
call u%set_nzeros(unz)
call u%fix(info)
nzout = l%get_nzeros()
if (rscale_) then
!$omp workshare
l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ <=-1).and.(imin_ == jmin_)) then
call l%set_triangle(.true.)
call l%set_lower(.false.)
end if
else
unz = 0
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,urws) reduction(+: unz)
loop3: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)>=diag_) then
!$omp atomic update
urws(i-imin_+1) = urws(i-imin_+1) +1
!$omp end atomic
unz = unz + 1
end if
end if
end do
end do loop3
!$omp end parallel do
call psi_exscan(mb,urws,info)
!$omp parallel do private(i,j,k,upnt) shared(imin_,imax_,a)
loop4: do i=imin_,imax_
do k = a%irp(i),a%irp(i+1)-1
j = a%ja(k)
if ((jmin_<=j).and.(j<=jmax_)) then
if ((j-i)>=diag_) then
!$omp atomic capture
urws(i-imin_+1) = urws(i-imin_+1) +1
upnt = urws(i-imin_+1)
!$omp end atomic
u%ia(upnt) = i
u%ja(upnt) = a%ja(k)
u%val(upnt) = a%val(k)
end if
end if
end do
end do loop4
!$omp end parallel do
call u%set_nzeros(unz)
call u%fix(info)
end if
nzout = u%get_nzeros()
if (rscale_) then
!$omp workshare
u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
!$omp end workshare
end if
if (cscale_) then
!$omp workshare
u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
!$omp end workshare
end if
if ((diag_ >= 0).and.(imin_ == jmin_)) then
call u%set_triangle(.true.)
call u%set_upper(.true.)
end if
end block
#else
nz = a%get_nzeros() nz = a%get_nzeros()
call u%allocate(mb,nb,nz) call u%allocate(mb,nb,nz)
@ -2511,7 +2811,7 @@ subroutine psb_z_csr_triu(a,u,info,&
call u%set_triangle(.true.) call u%set_triangle(.true.)
call u%set_upper(.true.) call u%set_upper(.true.)
end if end if
#endif
if (info /= psb_success_) goto 9999 if (info /= psb_success_) goto 9999
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
@ -3190,9 +3490,28 @@ subroutine psb_z_cp_csr_to_fmt(a,b,info)
b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat
nr = a%get_nrows() nr = a%get_nrows()
nz = a%get_nzeros() nz = a%get_nzeros()
if (.false.) then
if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info) if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info)
if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info) if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info)
if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info) if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nr+1,b%irp,info)
call psb_realloc(nz,b%ja,info)
call psb_realloc(nz,b%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nr+1
b%irp(i)=a%irp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
b%ja(j) = a%ja(j)
b%val(j) = a%val(j)
end do
!$omp end parallel do
end if
call b%set_host() call b%set_host()
class default class default
@ -3276,9 +3595,27 @@ subroutine psb_z_cp_csr_from_fmt(a,b,info)
a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat
nr = b%get_nrows() nr = b%get_nrows()
nz = b%get_nzeros() nz = b%get_nzeros()
if (.false.) then
if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info) if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info)
if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info) if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info)
if (info == 0) call psb_safe_cpy( b%val(1:nz) , a%val , info) if (info == 0) call psb_safe_cpy( b%val(1:nz) , a%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nr+1,a%irp,info)
call psb_realloc(nz,a%ja,info)
call psb_realloc(nz,a%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nr+1
a%irp(i)=b%irp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
a%ja(j)=b%ja(j)
a%val(j)=b%val(j)
end do
!$omp end parallel do
end if
call a%set_host() call a%set_host()
class default class default

Loading…
Cancel
Save