Merge branch 'dev-openmp' into development

master
Salvatore Filippone 2 years ago
commit 61d054850d

@ -131,7 +131,7 @@ Contains
! ...Local Variables ! ...Local Variables
complex(psb_spk_),allocatable :: tmp(:) complex(psb_spk_),allocatable :: tmp(:)
integer(psb_mpk_) :: dim, lb_, lbi,ub_ integer(psb_mpk_) :: dim, lb_, lbi,ub_, i
integer(psb_ipk_) :: err_act,err integer(psb_ipk_) :: err_act,err
character(len=30) :: name character(len=30) :: name
logical, parameter :: debug=.false. logical, parameter :: debug=.false.
@ -179,7 +179,10 @@ Contains
end if end if
endif endif
if (present(pad)) then if (present(pad)) then
rrax(lb_-1+dim+1:lb_-1+len) = pad !$omp parallel do private(i) shared(dim,len)
do i=lb_-1+dim+1,lb_-1+len
rrax(i) = pad
end do
endif endif
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
return return
@ -204,7 +207,7 @@ Contains
complex(psb_spk_),allocatable :: tmp(:,:) complex(psb_spk_),allocatable :: tmp(:,:)
integer(psb_ipk_) :: err_act,err integer(psb_ipk_) :: err_act,err
integer(psb_mpk_) :: dim,dim2,lb1_, lb2_, ub1_, ub2_,lbi1, lbi2 integer(psb_mpk_) :: dim,dim2,lb1_, lb2_, ub1_, ub2_,lbi1, lbi2, i
character(len=30) :: name character(len=30) :: name
name='psb_r_m_c_rk2' name='psb_r_m_c_rk2'
@ -267,8 +270,14 @@ Contains
end if end if
endif endif
if (present(pad)) then if (present(pad)) then
rrax(lb1_-1+dim+1:lb1_-1+len1,:) = pad !$omp parallel do private(i) shared(lb1_,dim,len1)
rrax(lb1_:lb1_-1+dim,lb2_-1+dim2+1:lb2_-1+len2) = pad do i=lb1_-1+dim+1,lb1_-1+len1
rrax(i,:) = pad
end do
!$omp parallel do private(i) shared(lb1_,dim,len1,lb2_,dim2,len2)
do i=lb1_,lb1_-1+len1
rrax(i,lb2_-1+dim2+1:lb2_-1+len2) = pad
end do
endif endif
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
return return
@ -982,7 +991,48 @@ Contains
goto 9999 goto 9999
end if end if
!!$ If (len > psb_size(v)) Then
!!$ if (present(newsz)) then
!!$ isz = (max(len+1,newsz))
!!$ else
!!$ if (present(addsz)) then
!!$ isz = len+max(1,addsz)
!!$ else
!!$ isz = max(len+10, int(1.25*len))
!!$ endif
!!$ endif
!!$
!!$ call psb_realloc(isz,v,info,pad=pad)
!!$ if (info /= psb_success_) then
!!$ info=psb_err_from_subroutine_
!!$ call psb_errpush(info,name,a_err='psb_realloc')
!!$ goto 9999
!!$ End If
!!$ end If
If (len > psb_size(v)) Then If (len > psb_size(v)) Then
#if defined(OPENMP)
!$OMP CRITICAL
if (len > psb_size(v)) then
if (present(newsz)) then
isz = (max(len+1,newsz))
else
if (present(addsz)) then
isz = len+max(1,addsz)
else
isz = max(len+10, int(1.25*len))
endif
endif
call psb_realloc(isz,v,info,pad=pad)
end if
!$OMP END CRITICAL
if (info /= psb_success_) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='psb_realloc')
goto 9999
end if
#else
if (present(newsz)) then if (present(newsz)) then
isz = (max(len+1,newsz)) isz = (max(len+1,newsz))
else else
@ -999,6 +1049,7 @@ Contains
call psb_errpush(info,name,a_err='psb_realloc') call psb_errpush(info,name,a_err='psb_realloc')
goto 9999 goto 9999
End If End If
#endif
end If end If
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)

@ -131,7 +131,7 @@ Contains
! ...Local Variables ! ...Local Variables
real(psb_dpk_),allocatable :: tmp(:) real(psb_dpk_),allocatable :: tmp(:)
integer(psb_mpk_) :: dim, lb_, lbi,ub_ integer(psb_mpk_) :: dim, lb_, lbi,ub_, i
integer(psb_ipk_) :: err_act,err integer(psb_ipk_) :: err_act,err
character(len=30) :: name character(len=30) :: name
logical, parameter :: debug=.false. logical, parameter :: debug=.false.
@ -179,7 +179,10 @@ Contains
end if end if
endif endif
if (present(pad)) then if (present(pad)) then
rrax(lb_-1+dim+1:lb_-1+len) = pad !$omp parallel do private(i) shared(dim,len)
do i=lb_-1+dim+1,lb_-1+len
rrax(i) = pad
end do
endif endif
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
return return
@ -204,7 +207,7 @@ Contains
real(psb_dpk_),allocatable :: tmp(:,:) real(psb_dpk_),allocatable :: tmp(:,:)
integer(psb_ipk_) :: err_act,err integer(psb_ipk_) :: err_act,err
integer(psb_mpk_) :: dim,dim2,lb1_, lb2_, ub1_, ub2_,lbi1, lbi2 integer(psb_mpk_) :: dim,dim2,lb1_, lb2_, ub1_, ub2_,lbi1, lbi2, i
character(len=30) :: name character(len=30) :: name
name='psb_r_m_d_rk2' name='psb_r_m_d_rk2'
@ -267,8 +270,14 @@ Contains
end if end if
endif endif
if (present(pad)) then if (present(pad)) then
rrax(lb1_-1+dim+1:lb1_-1+len1,:) = pad !$omp parallel do private(i) shared(lb1_,dim,len1)
rrax(lb1_:lb1_-1+dim,lb2_-1+dim2+1:lb2_-1+len2) = pad do i=lb1_-1+dim+1,lb1_-1+len1
rrax(i,:) = pad
end do
!$omp parallel do private(i) shared(lb1_,dim,len1,lb2_,dim2,len2)
do i=lb1_,lb1_-1+len1
rrax(i,lb2_-1+dim2+1:lb2_-1+len2) = pad
end do
endif endif
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
return return
@ -982,7 +991,48 @@ Contains
goto 9999 goto 9999
end if end if
!!$ If (len > psb_size(v)) Then
!!$ if (present(newsz)) then
!!$ isz = (max(len+1,newsz))
!!$ else
!!$ if (present(addsz)) then
!!$ isz = len+max(1,addsz)
!!$ else
!!$ isz = max(len+10, int(1.25*len))
!!$ endif
!!$ endif
!!$
!!$ call psb_realloc(isz,v,info,pad=pad)
!!$ if (info /= psb_success_) then
!!$ info=psb_err_from_subroutine_
!!$ call psb_errpush(info,name,a_err='psb_realloc')
!!$ goto 9999
!!$ End If
!!$ end If
If (len > psb_size(v)) Then If (len > psb_size(v)) Then
#if defined(OPENMP)
!$OMP CRITICAL
if (len > psb_size(v)) then
if (present(newsz)) then
isz = (max(len+1,newsz))
else
if (present(addsz)) then
isz = len+max(1,addsz)
else
isz = max(len+10, int(1.25*len))
endif
endif
call psb_realloc(isz,v,info,pad=pad)
end if
!$OMP END CRITICAL
if (info /= psb_success_) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='psb_realloc')
goto 9999
end if
#else
if (present(newsz)) then if (present(newsz)) then
isz = (max(len+1,newsz)) isz = (max(len+1,newsz))
else else
@ -999,6 +1049,7 @@ Contains
call psb_errpush(info,name,a_err='psb_realloc') call psb_errpush(info,name,a_err='psb_realloc')
goto 9999 goto 9999
End If End If
#endif
end If end If
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)

@ -131,7 +131,7 @@ Contains
! ...Local Variables ! ...Local Variables
integer(psb_epk_),allocatable :: tmp(:) integer(psb_epk_),allocatable :: tmp(:)
integer(psb_mpk_) :: dim, lb_, lbi,ub_ integer(psb_mpk_) :: dim, lb_, lbi,ub_, i
integer(psb_ipk_) :: err_act,err integer(psb_ipk_) :: err_act,err
character(len=30) :: name character(len=30) :: name
logical, parameter :: debug=.false. logical, parameter :: debug=.false.
@ -179,7 +179,10 @@ Contains
end if end if
endif endif
if (present(pad)) then if (present(pad)) then
rrax(lb_-1+dim+1:lb_-1+len) = pad !$omp parallel do private(i) shared(dim,len)
do i=lb_-1+dim+1,lb_-1+len
rrax(i) = pad
end do
endif endif
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
return return
@ -204,7 +207,7 @@ Contains
integer(psb_epk_),allocatable :: tmp(:,:) integer(psb_epk_),allocatable :: tmp(:,:)
integer(psb_ipk_) :: err_act,err integer(psb_ipk_) :: err_act,err
integer(psb_mpk_) :: dim,dim2,lb1_, lb2_, ub1_, ub2_,lbi1, lbi2 integer(psb_mpk_) :: dim,dim2,lb1_, lb2_, ub1_, ub2_,lbi1, lbi2, i
character(len=30) :: name character(len=30) :: name
name='psb_r_m_e_rk2' name='psb_r_m_e_rk2'
@ -267,8 +270,14 @@ Contains
end if end if
endif endif
if (present(pad)) then if (present(pad)) then
rrax(lb1_-1+dim+1:lb1_-1+len1,:) = pad !$omp parallel do private(i) shared(lb1_,dim,len1)
rrax(lb1_:lb1_-1+dim,lb2_-1+dim2+1:lb2_-1+len2) = pad do i=lb1_-1+dim+1,lb1_-1+len1
rrax(i,:) = pad
end do
!$omp parallel do private(i) shared(lb1_,dim,len1,lb2_,dim2,len2)
do i=lb1_,lb1_-1+len1
rrax(i,lb2_-1+dim2+1:lb2_-1+len2) = pad
end do
endif endif
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
return return
@ -982,7 +991,48 @@ Contains
goto 9999 goto 9999
end if end if
!!$ If (len > psb_size(v)) Then
!!$ if (present(newsz)) then
!!$ isz = (max(len+1,newsz))
!!$ else
!!$ if (present(addsz)) then
!!$ isz = len+max(1,addsz)
!!$ else
!!$ isz = max(len+10, int(1.25*len))
!!$ endif
!!$ endif
!!$
!!$ call psb_realloc(isz,v,info,pad=pad)
!!$ if (info /= psb_success_) then
!!$ info=psb_err_from_subroutine_
!!$ call psb_errpush(info,name,a_err='psb_realloc')
!!$ goto 9999
!!$ End If
!!$ end If
If (len > psb_size(v)) Then If (len > psb_size(v)) Then
#if defined(OPENMP)
!$OMP CRITICAL
if (len > psb_size(v)) then
if (present(newsz)) then
isz = (max(len+1,newsz))
else
if (present(addsz)) then
isz = len+max(1,addsz)
else
isz = max(len+10, int(1.25*len))
endif
endif
call psb_realloc(isz,v,info,pad=pad)
end if
!$OMP END CRITICAL
if (info /= psb_success_) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='psb_realloc')
goto 9999
end if
#else
if (present(newsz)) then if (present(newsz)) then
isz = (max(len+1,newsz)) isz = (max(len+1,newsz))
else else
@ -999,6 +1049,7 @@ Contains
call psb_errpush(info,name,a_err='psb_realloc') call psb_errpush(info,name,a_err='psb_realloc')
goto 9999 goto 9999
End If End If
#endif
end If end If
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)

@ -131,7 +131,7 @@ Contains
! ...Local Variables ! ...Local Variables
integer(psb_i2pk_),allocatable :: tmp(:) integer(psb_i2pk_),allocatable :: tmp(:)
integer(psb_mpk_) :: dim, lb_, lbi,ub_ integer(psb_mpk_) :: dim, lb_, lbi,ub_, i
integer(psb_ipk_) :: err_act,err integer(psb_ipk_) :: err_act,err
character(len=30) :: name character(len=30) :: name
logical, parameter :: debug=.false. logical, parameter :: debug=.false.
@ -179,7 +179,10 @@ Contains
end if end if
endif endif
if (present(pad)) then if (present(pad)) then
rrax(lb_-1+dim+1:lb_-1+len) = pad !$omp parallel do private(i) shared(dim,len)
do i=lb_-1+dim+1,lb_-1+len
rrax(i) = pad
end do
endif endif
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
return return
@ -204,7 +207,7 @@ Contains
integer(psb_i2pk_),allocatable :: tmp(:,:) integer(psb_i2pk_),allocatable :: tmp(:,:)
integer(psb_ipk_) :: err_act,err integer(psb_ipk_) :: err_act,err
integer(psb_mpk_) :: dim,dim2,lb1_, lb2_, ub1_, ub2_,lbi1, lbi2 integer(psb_mpk_) :: dim,dim2,lb1_, lb2_, ub1_, ub2_,lbi1, lbi2, i
character(len=30) :: name character(len=30) :: name
name='psb_r_m_i2_rk2' name='psb_r_m_i2_rk2'
@ -267,8 +270,14 @@ Contains
end if end if
endif endif
if (present(pad)) then if (present(pad)) then
rrax(lb1_-1+dim+1:lb1_-1+len1,:) = pad !$omp parallel do private(i) shared(lb1_,dim,len1)
rrax(lb1_:lb1_-1+dim,lb2_-1+dim2+1:lb2_-1+len2) = pad do i=lb1_-1+dim+1,lb1_-1+len1
rrax(i,:) = pad
end do
!$omp parallel do private(i) shared(lb1_,dim,len1,lb2_,dim2,len2)
do i=lb1_,lb1_-1+len1
rrax(i,lb2_-1+dim2+1:lb2_-1+len2) = pad
end do
endif endif
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
return return
@ -982,7 +991,48 @@ Contains
goto 9999 goto 9999
end if end if
!!$ If (len > psb_size(v)) Then
!!$ if (present(newsz)) then
!!$ isz = (max(len+1,newsz))
!!$ else
!!$ if (present(addsz)) then
!!$ isz = len+max(1,addsz)
!!$ else
!!$ isz = max(len+10, int(1.25*len))
!!$ endif
!!$ endif
!!$
!!$ call psb_realloc(isz,v,info,pad=pad)
!!$ if (info /= psb_success_) then
!!$ info=psb_err_from_subroutine_
!!$ call psb_errpush(info,name,a_err='psb_realloc')
!!$ goto 9999
!!$ End If
!!$ end If
If (len > psb_size(v)) Then If (len > psb_size(v)) Then
#if defined(OPENMP)
!$OMP CRITICAL
if (len > psb_size(v)) then
if (present(newsz)) then
isz = (max(len+1,newsz))
else
if (present(addsz)) then
isz = len+max(1,addsz)
else
isz = max(len+10, int(1.25*len))
endif
endif
call psb_realloc(isz,v,info,pad=pad)
end if
!$OMP END CRITICAL
if (info /= psb_success_) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='psb_realloc')
goto 9999
end if
#else
if (present(newsz)) then if (present(newsz)) then
isz = (max(len+1,newsz)) isz = (max(len+1,newsz))
else else
@ -999,6 +1049,7 @@ Contains
call psb_errpush(info,name,a_err='psb_realloc') call psb_errpush(info,name,a_err='psb_realloc')
goto 9999 goto 9999
End If End If
#endif
end If end If
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)

@ -131,7 +131,7 @@ Contains
! ...Local Variables ! ...Local Variables
integer(psb_mpk_),allocatable :: tmp(:) integer(psb_mpk_),allocatable :: tmp(:)
integer(psb_mpk_) :: dim, lb_, lbi,ub_ integer(psb_mpk_) :: dim, lb_, lbi,ub_, i
integer(psb_ipk_) :: err_act,err integer(psb_ipk_) :: err_act,err
character(len=30) :: name character(len=30) :: name
logical, parameter :: debug=.false. logical, parameter :: debug=.false.
@ -179,7 +179,10 @@ Contains
end if end if
endif endif
if (present(pad)) then if (present(pad)) then
rrax(lb_-1+dim+1:lb_-1+len) = pad !$omp parallel do private(i) shared(dim,len)
do i=lb_-1+dim+1,lb_-1+len
rrax(i) = pad
end do
endif endif
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
return return
@ -204,7 +207,7 @@ Contains
integer(psb_mpk_),allocatable :: tmp(:,:) integer(psb_mpk_),allocatable :: tmp(:,:)
integer(psb_ipk_) :: err_act,err integer(psb_ipk_) :: err_act,err
integer(psb_mpk_) :: dim,dim2,lb1_, lb2_, ub1_, ub2_,lbi1, lbi2 integer(psb_mpk_) :: dim,dim2,lb1_, lb2_, ub1_, ub2_,lbi1, lbi2, i
character(len=30) :: name character(len=30) :: name
name='psb_r_m_m_rk2' name='psb_r_m_m_rk2'
@ -267,8 +270,14 @@ Contains
end if end if
endif endif
if (present(pad)) then if (present(pad)) then
rrax(lb1_-1+dim+1:lb1_-1+len1,:) = pad !$omp parallel do private(i) shared(lb1_,dim,len1)
rrax(lb1_:lb1_-1+dim,lb2_-1+dim2+1:lb2_-1+len2) = pad do i=lb1_-1+dim+1,lb1_-1+len1
rrax(i,:) = pad
end do
!$omp parallel do private(i) shared(lb1_,dim,len1,lb2_,dim2,len2)
do i=lb1_,lb1_-1+len1
rrax(i,lb2_-1+dim2+1:lb2_-1+len2) = pad
end do
endif endif
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
return return
@ -982,7 +991,48 @@ Contains
goto 9999 goto 9999
end if end if
!!$ If (len > psb_size(v)) Then
!!$ if (present(newsz)) then
!!$ isz = (max(len+1,newsz))
!!$ else
!!$ if (present(addsz)) then
!!$ isz = len+max(1,addsz)
!!$ else
!!$ isz = max(len+10, int(1.25*len))
!!$ endif
!!$ endif
!!$
!!$ call psb_realloc(isz,v,info,pad=pad)
!!$ if (info /= psb_success_) then
!!$ info=psb_err_from_subroutine_
!!$ call psb_errpush(info,name,a_err='psb_realloc')
!!$ goto 9999
!!$ End If
!!$ end If
If (len > psb_size(v)) Then If (len > psb_size(v)) Then
#if defined(OPENMP)
!$OMP CRITICAL
if (len > psb_size(v)) then
if (present(newsz)) then
isz = (max(len+1,newsz))
else
if (present(addsz)) then
isz = len+max(1,addsz)
else
isz = max(len+10, int(1.25*len))
endif
endif
call psb_realloc(isz,v,info,pad=pad)
end if
!$OMP END CRITICAL
if (info /= psb_success_) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='psb_realloc')
goto 9999
end if
#else
if (present(newsz)) then if (present(newsz)) then
isz = (max(len+1,newsz)) isz = (max(len+1,newsz))
else else
@ -999,6 +1049,7 @@ Contains
call psb_errpush(info,name,a_err='psb_realloc') call psb_errpush(info,name,a_err='psb_realloc')
goto 9999 goto 9999
End If End If
#endif
end If end If
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)

@ -131,7 +131,7 @@ Contains
! ...Local Variables ! ...Local Variables
real(psb_spk_),allocatable :: tmp(:) real(psb_spk_),allocatable :: tmp(:)
integer(psb_mpk_) :: dim, lb_, lbi,ub_ integer(psb_mpk_) :: dim, lb_, lbi,ub_, i
integer(psb_ipk_) :: err_act,err integer(psb_ipk_) :: err_act,err
character(len=30) :: name character(len=30) :: name
logical, parameter :: debug=.false. logical, parameter :: debug=.false.
@ -179,7 +179,10 @@ Contains
end if end if
endif endif
if (present(pad)) then if (present(pad)) then
rrax(lb_-1+dim+1:lb_-1+len) = pad !$omp parallel do private(i) shared(dim,len)
do i=lb_-1+dim+1,lb_-1+len
rrax(i) = pad
end do
endif endif
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
return return
@ -204,7 +207,7 @@ Contains
real(psb_spk_),allocatable :: tmp(:,:) real(psb_spk_),allocatable :: tmp(:,:)
integer(psb_ipk_) :: err_act,err integer(psb_ipk_) :: err_act,err
integer(psb_mpk_) :: dim,dim2,lb1_, lb2_, ub1_, ub2_,lbi1, lbi2 integer(psb_mpk_) :: dim,dim2,lb1_, lb2_, ub1_, ub2_,lbi1, lbi2, i
character(len=30) :: name character(len=30) :: name
name='psb_r_m_s_rk2' name='psb_r_m_s_rk2'
@ -267,8 +270,14 @@ Contains
end if end if
endif endif
if (present(pad)) then if (present(pad)) then
rrax(lb1_-1+dim+1:lb1_-1+len1,:) = pad !$omp parallel do private(i) shared(lb1_,dim,len1)
rrax(lb1_:lb1_-1+dim,lb2_-1+dim2+1:lb2_-1+len2) = pad do i=lb1_-1+dim+1,lb1_-1+len1
rrax(i,:) = pad
end do
!$omp parallel do private(i) shared(lb1_,dim,len1,lb2_,dim2,len2)
do i=lb1_,lb1_-1+len1
rrax(i,lb2_-1+dim2+1:lb2_-1+len2) = pad
end do
endif endif
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
return return
@ -982,7 +991,48 @@ Contains
goto 9999 goto 9999
end if end if
!!$ If (len > psb_size(v)) Then
!!$ if (present(newsz)) then
!!$ isz = (max(len+1,newsz))
!!$ else
!!$ if (present(addsz)) then
!!$ isz = len+max(1,addsz)
!!$ else
!!$ isz = max(len+10, int(1.25*len))
!!$ endif
!!$ endif
!!$
!!$ call psb_realloc(isz,v,info,pad=pad)
!!$ if (info /= psb_success_) then
!!$ info=psb_err_from_subroutine_
!!$ call psb_errpush(info,name,a_err='psb_realloc')
!!$ goto 9999
!!$ End If
!!$ end If
If (len > psb_size(v)) Then If (len > psb_size(v)) Then
#if defined(OPENMP)
!$OMP CRITICAL
if (len > psb_size(v)) then
if (present(newsz)) then
isz = (max(len+1,newsz))
else
if (present(addsz)) then
isz = len+max(1,addsz)
else
isz = max(len+10, int(1.25*len))
endif
endif
call psb_realloc(isz,v,info,pad=pad)
end if
!$OMP END CRITICAL
if (info /= psb_success_) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='psb_realloc')
goto 9999
end if
#else
if (present(newsz)) then if (present(newsz)) then
isz = (max(len+1,newsz)) isz = (max(len+1,newsz))
else else
@ -999,6 +1049,7 @@ Contains
call psb_errpush(info,name,a_err='psb_realloc') call psb_errpush(info,name,a_err='psb_realloc')
goto 9999 goto 9999
End If End If
#endif
end If end If
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)

@ -131,7 +131,7 @@ Contains
! ...Local Variables ! ...Local Variables
complex(psb_dpk_),allocatable :: tmp(:) complex(psb_dpk_),allocatable :: tmp(:)
integer(psb_mpk_) :: dim, lb_, lbi,ub_ integer(psb_mpk_) :: dim, lb_, lbi,ub_, i
integer(psb_ipk_) :: err_act,err integer(psb_ipk_) :: err_act,err
character(len=30) :: name character(len=30) :: name
logical, parameter :: debug=.false. logical, parameter :: debug=.false.
@ -179,7 +179,10 @@ Contains
end if end if
endif endif
if (present(pad)) then if (present(pad)) then
rrax(lb_-1+dim+1:lb_-1+len) = pad !$omp parallel do private(i) shared(dim,len)
do i=lb_-1+dim+1,lb_-1+len
rrax(i) = pad
end do
endif endif
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
return return
@ -204,7 +207,7 @@ Contains
complex(psb_dpk_),allocatable :: tmp(:,:) complex(psb_dpk_),allocatable :: tmp(:,:)
integer(psb_ipk_) :: err_act,err integer(psb_ipk_) :: err_act,err
integer(psb_mpk_) :: dim,dim2,lb1_, lb2_, ub1_, ub2_,lbi1, lbi2 integer(psb_mpk_) :: dim,dim2,lb1_, lb2_, ub1_, ub2_,lbi1, lbi2, i
character(len=30) :: name character(len=30) :: name
name='psb_r_m_z_rk2' name='psb_r_m_z_rk2'
@ -267,8 +270,14 @@ Contains
end if end if
endif endif
if (present(pad)) then if (present(pad)) then
rrax(lb1_-1+dim+1:lb1_-1+len1,:) = pad !$omp parallel do private(i) shared(lb1_,dim,len1)
rrax(lb1_:lb1_-1+dim,lb2_-1+dim2+1:lb2_-1+len2) = pad do i=lb1_-1+dim+1,lb1_-1+len1
rrax(i,:) = pad
end do
!$omp parallel do private(i) shared(lb1_,dim,len1,lb2_,dim2,len2)
do i=lb1_,lb1_-1+len1
rrax(i,lb2_-1+dim2+1:lb2_-1+len2) = pad
end do
endif endif
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
return return
@ -982,7 +991,48 @@ Contains
goto 9999 goto 9999
end if end if
!!$ If (len > psb_size(v)) Then
!!$ if (present(newsz)) then
!!$ isz = (max(len+1,newsz))
!!$ else
!!$ if (present(addsz)) then
!!$ isz = len+max(1,addsz)
!!$ else
!!$ isz = max(len+10, int(1.25*len))
!!$ endif
!!$ endif
!!$
!!$ call psb_realloc(isz,v,info,pad=pad)
!!$ if (info /= psb_success_) then
!!$ info=psb_err_from_subroutine_
!!$ call psb_errpush(info,name,a_err='psb_realloc')
!!$ goto 9999
!!$ End If
!!$ end If
If (len > psb_size(v)) Then If (len > psb_size(v)) Then
#if defined(OPENMP)
!$OMP CRITICAL
if (len > psb_size(v)) then
if (present(newsz)) then
isz = (max(len+1,newsz))
else
if (present(addsz)) then
isz = len+max(1,addsz)
else
isz = max(len+10, int(1.25*len))
endif
endif
call psb_realloc(isz,v,info,pad=pad)
end if
!$OMP END CRITICAL
if (info /= psb_success_) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='psb_realloc')
goto 9999
end if
#else
if (present(newsz)) then if (present(newsz)) then
isz = (max(len+1,newsz)) isz = (max(len+1,newsz))
else else
@ -999,6 +1049,7 @@ Contains
call psb_errpush(info,name,a_err='psb_realloc') call psb_errpush(info,name,a_err='psb_realloc')
goto 9999 goto 9999
End If End If
#endif
end If end If
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)

@ -629,13 +629,17 @@ contains
end subroutine hash_g2ls2_ins end subroutine hash_g2ls2_ins
! #################### THESIS ####################
subroutine hash_g2lv1_ins(idx,idxmap,info,mask,lidx) subroutine hash_g2lv1_ins(idx,idxmap,info,mask,lidx)
use psb_error_mod use psb_error_mod
use psb_realloc_mod use psb_realloc_mod
use psb_sort_mod use psb_sort_mod
use psb_penv_mod use psb_penv_mod
!$ use omp_lib
implicit none implicit none
class(psb_hash_map), intent(inout) :: idxmap class(psb_hash_map), intent(inout) :: idxmap
integer(psb_lpk_), intent(inout) :: idx(:) integer(psb_lpk_), intent(inout) :: idx(:)
integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(out) :: info
@ -648,6 +652,12 @@ contains
type(psb_ctxt_type) :: ctxt type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: me, np integer(psb_ipk_) :: me, np
character(len=20) :: name,ch_err character(len=20) :: name,ch_err
logical :: use_openmp = .false.
logical, volatile :: isLoopValid
!$ integer(kind = OMP_lock_kind) :: ins_lck
!$ use_openmp = .true.
info = psb_success_ info = psb_success_
name = 'hash_g2l_ins' name = 'hash_g2l_ins'
@ -664,6 +674,7 @@ contains
return return
end if end if
end if end if
if (present(lidx)) then if (present(lidx)) then
if (size(lidx) < size(idx)) then if (size(lidx) < size(idx)) then
info = -1 info = -1
@ -671,37 +682,277 @@ contains
end if end if
end if end if
mglob = idxmap%get_gr() mglob = idxmap%get_gr()
nrow = idxmap%get_lr() nrow = idxmap%get_lr()
if (idxmap%is_bld()) then if (idxmap%is_bld()) then
if (use_openmp) then
!$ call OMP_init_lock(ins_lck)
isLoopValid = .true.
ncol = idxmap%get_lc()
end if
if (present(lidx)) then if (present(lidx)) then
if (present(mask)) then if (present(mask)) then
do i = 1, is if (use_openmp) then
ncol = idxmap%get_lc() !$OMP PARALLEL DO default(none) schedule(STATIC) &
if (mask(i)) then !$OMP shared(name,is,mask,lidx,idx,mglob,idxmap,ncol,nrow,ins_lck,laddsz) &
!$OMP private(i,ip,lip,tlip,nxt,info) &
!$OMP reduction(.AND.:isLoopValid)
do i = 1, is
if (mask(i)) then
ip = idx(i)
if ((ip < 1 ).or.(ip>mglob)) then
idx(i) = -1
cycle
endif
! At first, we check the index presence in 'idxmap'. Usually
! the index is found. If it is not found, we repeat the checking,
! but inside a critical region.
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
if (lip < 0) then
tlip = lip
nxt = lidx(i)
if (nxt <= nrow) then
idx(i) = -1
cycle
endif
! We check again if the index is already in 'idxmap', this
! time inside a critical region (we assume that the index
! is often already existing).
!$ call OMP_set_lock(ins_lck)
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
! Index not found
if (lip < 0) then
! Locking system to handle concurrent hashmap read/write.
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info)
if (info >= 0) then
! 'nxt' is not equal to 'tlip' when the key is already inside
! the hash map. In that case 'tlip' is the value corresponding
! to the existing mapping.
if (nxt == tlip) then
ncol = MAX(ncol,nxt)
!$ call OMP_unset_lock(ins_lck)
call psb_ensure_size(ncol,idxmap%loc_to_glob,info,&
& pad=-1_psb_lpk_,addsz=laddsz)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_ai_,name,&
&a_err='psb_ensure_size',i_err=(/info/))
isLoopValid = .false.
cycle
end if
idxmap%loc_to_glob(nxt) = ip
else
!$ call OMP_unset_lock(ins_lck)
end if
info = psb_success_
else
!$ call OMP_unset_lock(ins_lck)
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='SearchInsKeyVal',i_err=(/info/))
isLoopValid = .false.
cycle
end if
else
!$ call OMP_unset_lock(ins_lck)
end if
end if
idx(i) = lip
info = psb_success_
else
idx(i) = -1
end if
end do
!$OMP END PARALLEL DO
call idxmap%set_lc(ncol)
if (.not. isLoopValid) then
goto 9999
end if
else
do i = 1, is
ncol = idxmap%get_lc()
if (mask(i)) then
ip = idx(i)
if ((ip < 1 ).or.(ip>mglob) ) then
idx(i) = -1
cycle
endif
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
if (lip < 0) then
tlip = lip
nxt = lidx(i)
if (nxt <= nrow) then
idx(i) = -1
cycle
endif
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info)
if (info >=0) then
if (nxt == tlip) then
ncol = max(ncol,nxt)
call psb_ensure_size(ncol,idxmap%loc_to_glob,info,&
& pad=-1_psb_lpk_,addsz=laddsz)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_ai_,name,&
&a_err='psb_ensure_size',i_err=(/info/))
goto 9999
end if
idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(ncol)
endif
info = psb_success_
else
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='SearchInsKeyVal',i_err=(/info/))
goto 9999
end if
end if
idx(i) = lip
info = psb_success_
else
idx(i) = -1
end if
enddo
end if
else if (.not.present(mask)) then
if (use_openmp) then
!$OMP PARALLEL DO default(none) schedule(STATIC) &
!$OMP shared(name,is,idx,lidx,mglob,idxmap,ncol,nrow,ins_lck,laddsz) &
!$OMP private(i,ip,lip,tlip,nxt,info) &
!$OMP reduction(.AND.:isLoopValid)
do i = 1, is
ip = idx(i) ip = idx(i)
if ((ip < 1 ).or.(ip>mglob) ) then
if ((ip < 1 ).or.(ip>mglob)) then
idx(i) = -1 idx(i) = -1
cycle cycle
endif endif
! In OMP logic the index research limit is turned off. It is
! a correct way to fit the subroutine?
call hash_inner_cnv(ip,lip,idxmap%hashvmask,& call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol) & idxmap%hashv,idxmap%glb_lc,ncol)
if (lip < 0) then
tlip = lip
nxt = lidx(i)
if (nxt <= nrow) then
idx(i) = -1
cycle
endif
! We check again if the index is already in 'idxmap', this
! time inside a critical region (we assume that the index
! is often already existing).
!$ call OMP_set_lock(ins_lck)
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
if (lip < 0) then
! Locking system to handle concurrent write/access. Under checking!
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info)
!$ call OMP_unset_lock(ins_lck)
if (info >= 0) then
! 'nxt' is not equal to 'tlip' when the key is already inside
! the hash map. In that case 'tlip' is the value corresponding
! to the existing mapping.
if (nxt == tlip) then
ncol = MAX(ncol,nxt)
!$ call OMP_unset_lock(ins_lck)
! Under checking!
call psb_ensure_size(ncol,idxmap%loc_to_glob,info,&
& pad=-1_psb_lpk_,addsz=laddsz)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_ai_,name,&
&a_err='psb_ensure_size',i_err=(/info/))
isLoopValid = .false.
cycle
end if
idxmap%loc_to_glob(nxt) = ip
else
!$ call OMP_unset_lock(ins_lck)
end if
info = psb_success_
else
!$ call OMP_unset_lock(ins_lck)
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='SearchInsKeyVal',i_err=(/info/))
isLoopValid = .false.
cycle
end if
else
!$ call OMP_unset_lock(ins_lck)
end if
end if
idx(i) = lip
info = psb_success_
end do
!$OMP END PARALLEL DO
call idxmap%set_lc(ncol)
if (.not. isLoopValid) then
goto 9999
end if
else
do i = 1, is
ncol = idxmap%get_lc()
ip = idx(i)
if ((ip < 1 ).or.(ip>mglob)) then
idx(i) = -1
cycle
endif
call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,&
& idxmap%glb_lc,ncol)
if (lip < 0) then if (lip < 0) then
tlip = lip
nxt = lidx(i) nxt = lidx(i)
if (nxt <= nrow) then if (nxt <= nrow) then
idx(i) = -1 idx(i) = -1
cycle cycle
endif endif
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info)
lip = tlip
if (info >=0) then if (info >=0) then
if (nxt == tlip) then if (nxt == lip) then
ncol = max(ncol,nxt) ncol = max(nxt,ncol)
call psb_ensure_size(ncol,idxmap%loc_to_glob,info,& call psb_ensure_size(ncol,idxmap%loc_to_glob,info,pad=-1_psb_lpk_,addsz=laddsz)
& pad=-1_psb_lpk_,addsz=laddsz)
if (info /= psb_success_) then if (info /= psb_success_) then
info=1
call psb_errpush(psb_err_from_subroutine_ai_,name,& call psb_errpush(psb_err_from_subroutine_ai_,name,&
&a_err='psb_ensure_size',i_err=(/info/)) &a_err='psb_ensure_size',i_err=(/info/))
goto 9999 goto 9999
@ -718,64 +969,238 @@ contains
end if end if
idx(i) = lip idx(i) = lip
info = psb_success_ info = psb_success_
else enddo
idx(i) = -1 end if
end if
else if (.not.present(lidx)) then
if (present(mask)) then
if (use_openmp) then
!$OMP PARALLEL DO default(none) schedule(STATIC) &
!$OMP shared(name,is,idx,mask,mglob,idxmap,ncol,nrow,ins_lck,laddsz) &
!$OMP private(i,ip,lip,tlip,nxt,info) &
!$OMP reduction(.AND.:isLoopValid)
do i = 1, is
if (mask(i)) then
ip = idx(i)
if ((ip < 1 ).or.(ip>mglob)) then
idx(i) = -1
cycle
endif
! At first, we check the index presence in 'idxmap'. Usually
! the index is found. If it is not found, we repeat the checking,
! but inside a critical region.
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
if (lip < 0) then
! We check again if the index is already in 'idxmap', this
! time inside a critical region (we assume that the index
! is often already existing).
!$ call OMP_set_lock(ins_lck)
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
! Index not found
if (lip < 0) then
! Locking system to handle concurrent hashmap write/access.
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info)
lip = tlip
else
!$ call OMP_unset_lock(ins_lck)
end if
idx(i) = lip
info = psb_success_
end if
if (info >= 0) then
! 'nxt' is not equal to 'tlip' when the key is already inside
! the hash map. In that case 'tlip' is the value corresponding
! to the existing mapping.
if (nxt == tlip) then
ncol = MAX(ncol,nxt)
!$ call OMP_unset_lock(ins_lck)
call psb_ensure_size(ncol,idxmap%loc_to_glob,info,&
& pad=-1_psb_lpk_,addsz=laddsz)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_ai_,name,&
&a_err='psb_ensure_size',i_err=(/info/))
isLoopValid = .false.
cycle
end if
idxmap%loc_to_glob(nxt) = ip
else
!$ call OMP_unset_lock(ins_lck)
end if
info = psb_success_
else
!$ call OMP_unset_lock(ins_lck)
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='SearchInsKeyVal',i_err=(/info/))
isLoopValid = .false.
cycle
end if
else
idx(i) = -1
end if
end do
!$OMP END PARALLEL DO
call idxmap%set_lc(ncol)
if (.not. isLoopValid) then
goto 9999
end if end if
enddo
else
do i = 1, is
ncol = idxmap%get_lc()
if (mask(i)) then
ip = idx(i)
if ((ip < 1 ).or.(ip>mglob)) then
idx(i) = -1
cycle
endif
nxt = ncol + 1
call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,&
& idxmap%glb_lc,ncol)
if (lip < 0) then
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info)
lip = tlip
end if
if (info >=0) then
if (nxt == lip) then
ncol = nxt
call psb_ensure_size(ncol,idxmap%loc_to_glob,info,&
& pad=-1_psb_lpk_,addsz=laddsz)
if (info /= psb_success_) then
info=1
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='psb_ensure_size',i_err=(/info/))
goto 9999
end if
idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(ncol)
endif
info = psb_success_
else
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='SearchInsKeyVal',i_err=(/info/))
goto 9999
end if
idx(i) = lip
info = psb_success_
else
idx(i) = -1
end if
enddo
end if
else if (.not.present(mask)) then else if (.not.present(mask)) then
if (use_openmp) then
!$OMP PARALLEL DO default(none) schedule(STATIC) &
!$OMP shared(name,is,idx,mglob,idxmap,ncol,nrow,ins_lck,laddsz) &
!$OMP private(i,ip,lip,tlip,nxt,info) &
!$OMP reduction(.AND.:isLoopValid)
do i = 1, is
do i = 1, is ip = idx(i)
ncol = idxmap%get_lc()
ip = idx(i) if ((ip < 1 ).or.(ip>mglob)) then
if ((ip < 1 ).or.(ip>mglob)) then
idx(i) = -1
cycle
endif
call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,&
& idxmap%glb_lc,ncol)
if (lip < 0) then
nxt = lidx(i)
if (nxt <= nrow) then
idx(i) = -1 idx(i) = -1
cycle cycle
endif endif
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info)
lip = tlip
if (info >=0) then ! At first, we check the index presence in 'idxmap'. Usually
if (nxt == lip) then ! the index is found. If it is not found, we repeat the checking,
ncol = max(nxt,ncol) ! but inside a critical region.
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
if (lip < 0) then
! We check again if the index is already in 'idxmap', this
! time inside a critical region (we assume that the index
! is often already existing).
!$ call OMP_set_lock(ins_lck)
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
! Index not found
if (lip < 0) then
! Locking system to handle concurrent hashmap write/access.
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info)
lip = tlip
else
!$ call OMP_unset_lock(ins_lck)
end if
idx(i) = lip
info = psb_success_
end if
if (info >= 0) then
! 'nxt' is not equal to 'tlip' when the key is already inside
! the hash map. In that case 'tlip' is the value corresponding
! to the existing mapping.
if (nxt == tlip) then
ncol = MAX(ncol,nxt)
!$ call OMP_unset_lock(ins_lck)
call psb_ensure_size(ncol,idxmap%loc_to_glob,info,& call psb_ensure_size(ncol,idxmap%loc_to_glob,info,&
& pad=-1_psb_lpk_,addsz=laddsz) & pad=-1_psb_lpk_,addsz=laddsz)
if (info /= psb_success_) then if (info /= psb_success_) then
info=1
call psb_errpush(psb_err_from_subroutine_ai_,name,& call psb_errpush(psb_err_from_subroutine_ai_,name,&
&a_err='psb_ensure_size',i_err=(/info/)) &a_err='psb_ensure_size',i_err=(/info/))
goto 9999
!$ isLoopValid = .false.
cycle
end if end if
idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(ncol) idxmap%loc_to_glob(nxt) = ip
endif else
!$ call OMP_unset_lock(ins_lck)
end if
info = psb_success_ info = psb_success_
else else
!$ call OMP_unset_lock(ins_lck)
call psb_errpush(psb_err_from_subroutine_ai_,name,& call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='SearchInsKeyVal',i_err=(/info/)) & a_err='SearchInsKeyVal',i_err=(/info/))
goto 9999
isLoopValid = .false.
cycle
end if end if
end if end do
idx(i) = lip !$OMP END PARALLEL DO
info = psb_success_
enddo
end if call idxmap%set_lc(ncol)
else if (.not.present(lidx)) then if (.not. isLoopValid) then
goto 9999
end if
if (present(mask)) then else
do i = 1, is do i = 1, is
ncol = idxmap%get_lc() ncol = idxmap%get_lc()
if (mask(i)) then
ip = idx(i) ip = idx(i)
if ((ip < 1 ).or.(ip>mglob)) then if ((ip < 1 ).or.(ip>mglob)) then
idx(i) = -1 idx(i) = -1
@ -796,8 +1221,9 @@ contains
& pad=-1_psb_lpk_,addsz=laddsz) & pad=-1_psb_lpk_,addsz=laddsz)
if (info /= psb_success_) then if (info /= psb_success_) then
info=1 info=1
ch_err='psb_ensure_size'
call psb_errpush(psb_err_from_subroutine_ai_,name,& call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='psb_ensure_size',i_err=(/info/)) &a_err=ch_err,i_err=(/info,izero,izero,izero,izero/))
goto 9999 goto 9999
end if end if
idxmap%loc_to_glob(nxt) = ip idxmap%loc_to_glob(nxt) = ip
@ -805,68 +1231,28 @@ contains
endif endif
info = psb_success_ info = psb_success_
else else
ch_err='SearchInsKeyVal'
call psb_errpush(psb_err_from_subroutine_ai_,name,& call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='SearchInsKeyVal',i_err=(/info/)) & a_err=ch_err,i_err=(/info,izero,izero,izero,izero/))
goto 9999 goto 9999
end if end if
idx(i) = lip idx(i) = lip
info = psb_success_ info = psb_success_
else enddo
idx(i) = -1 end if
end if
enddo
else if (.not.present(mask)) then
do i = 1, is
ncol = idxmap%get_lc()
ip = idx(i)
if ((ip < 1 ).or.(ip>mglob)) then
idx(i) = -1
cycle
endif
nxt = ncol + 1
call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,&
& idxmap%glb_lc,ncol)
if (lip < 0) then
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info)
lip = tlip
end if
if (info >=0) then
if (nxt == lip) then
ncol = nxt
call psb_ensure_size(ncol,idxmap%loc_to_glob,info,&
& pad=-1_psb_lpk_,addsz=laddsz)
if (info /= psb_success_) then
info=1
ch_err='psb_ensure_size'
call psb_errpush(psb_err_from_subroutine_ai_,name,&
&a_err=ch_err,i_err=(/info,izero,izero,izero,izero/))
goto 9999
end if
idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(ncol)
endif
info = psb_success_
else
ch_err='SearchInsKeyVal'
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err=ch_err,i_err=(/info,izero,izero,izero,izero/))
goto 9999
end if
idx(i) = lip
info = psb_success_
enddo
end if end if
end if end if
if (use_openmp) then
!$ call OMP_destroy_lock(ins_lck)
end if
else else
! Wrong state ! Wrong state
idx = -1 idx = -1
info = -1 info = -1
end if end if
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
return return
@ -876,6 +1262,8 @@ contains
end subroutine hash_g2lv1_ins end subroutine hash_g2lv1_ins
! ################## END THESIS #########################
subroutine hash_g2lv2_ins(idxin,idxout,idxmap,info,mask,lidx) subroutine hash_g2lv2_ins(idxin,idxout,idxmap,info,mask,lidx)
use psb_realloc_mod use psb_realloc_mod
implicit none implicit none

@ -1866,6 +1866,30 @@ module psb_c_base_mat_mod
end subroutine psb_c_fix_coo_inner end subroutine psb_c_fix_coo_inner
end interface end interface
interface
subroutine psb_c_fix_coo_inner_colmajor(nr,nc,nzin,dupl,&
& ia,ja,val,iaux,nzout,info)
import
integer(psb_ipk_), intent(in) :: nr,nc,nzin,dupl
integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:)
complex(psb_spk_), intent(inout) :: val(:)
integer(psb_ipk_), intent(out) :: nzout
integer(psb_ipk_), intent(out) :: info
end subroutine psb_c_fix_coo_inner_colmajor
end interface
interface
subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,&
& ia,ja,val,iaux,nzout,info)
import
integer(psb_ipk_), intent(in) :: nr,nc,nzin,dupl
integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:)
complex(psb_spk_), intent(inout) :: val(:)
integer(psb_ipk_), intent(out) :: nzout
integer(psb_ipk_), intent(out) :: info
end subroutine psb_c_fix_coo_inner_rowmajor
end interface
! !
!> Function fix_coo !> Function fix_coo
!! \memberof psb_c_coo_sparse_mat !! \memberof psb_c_coo_sparse_mat

@ -1208,7 +1208,7 @@ contains
if (beta == cone) then if (beta == cone) then
return return
else else
!$omp parallel do private(i) !$omp parallel do private(i) shared(beta)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) z%v(i) = beta*z%v(i)
end do end do
@ -1226,7 +1226,7 @@ contains
z%v(i) = z%v(i) + y(i)*x(i) z%v(i) = z%v(i) + y(i)*x(i)
end do end do
else else
!$omp parallel do private(i) !$omp parallel do private(i) shared(beta)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) + y(i)*x(i) z%v(i) = beta*z%v(i) + y(i)*x(i)
end do end do
@ -1243,24 +1243,24 @@ contains
z%v(i) = z%v(i) - y(i)*x(i) z%v(i) = z%v(i) - y(i)*x(i)
end do end do
else else
!$omp parallel do private(i) !$omp parallel do private(i) shared(beta)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) - y(i)*x(i) z%v(i) = beta*z%v(i) - y(i)*x(i)
end do end do
end if end if
else else
if (beta == czero) then if (beta == czero) then
!$omp parallel do private(i) !$omp parallel do private(i) shared(alpha)
do i=1, n do i=1, n
z%v(i) = alpha*y(i)*x(i) z%v(i) = alpha*y(i)*x(i)
end do end do
else if (beta == cone) then else if (beta == cone) then
!$omp parallel do private(i) !$omp parallel do private(i) shared(alpha)
do i=1, n do i=1, n
z%v(i) = z%v(i) + alpha*y(i)*x(i) z%v(i) = z%v(i) + alpha*y(i)*x(i)
end do end do
else else
!$omp parallel do private(i) !$omp parallel do private(i) shared(alpha, beta)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) + alpha*y(i)*x(i) z%v(i) = beta*z%v(i) + alpha*y(i)*x(i)
end do end do

@ -1866,6 +1866,30 @@ module psb_d_base_mat_mod
end subroutine psb_d_fix_coo_inner end subroutine psb_d_fix_coo_inner
end interface end interface
interface
subroutine psb_d_fix_coo_inner_colmajor(nr,nc,nzin,dupl,&
& ia,ja,val,iaux,nzout,info)
import
integer(psb_ipk_), intent(in) :: nr,nc,nzin,dupl
integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:)
real(psb_dpk_), intent(inout) :: val(:)
integer(psb_ipk_), intent(out) :: nzout
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_fix_coo_inner_colmajor
end interface
interface
subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,&
& ia,ja,val,iaux,nzout,info)
import
integer(psb_ipk_), intent(in) :: nr,nc,nzin,dupl
integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:)
real(psb_dpk_), intent(inout) :: val(:)
integer(psb_ipk_), intent(out) :: nzout
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_fix_coo_inner_rowmajor
end interface
! !
!> Function fix_coo !> Function fix_coo
!! \memberof psb_d_coo_sparse_mat !! \memberof psb_d_coo_sparse_mat

@ -1215,7 +1215,7 @@ contains
if (beta == done) then if (beta == done) then
return return
else else
!$omp parallel do private(i) !$omp parallel do private(i) shared(beta)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) z%v(i) = beta*z%v(i)
end do end do
@ -1233,7 +1233,7 @@ contains
z%v(i) = z%v(i) + y(i)*x(i) z%v(i) = z%v(i) + y(i)*x(i)
end do end do
else else
!$omp parallel do private(i) !$omp parallel do private(i) shared(beta)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) + y(i)*x(i) z%v(i) = beta*z%v(i) + y(i)*x(i)
end do end do
@ -1250,24 +1250,24 @@ contains
z%v(i) = z%v(i) - y(i)*x(i) z%v(i) = z%v(i) - y(i)*x(i)
end do end do
else else
!$omp parallel do private(i) !$omp parallel do private(i) shared(beta)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) - y(i)*x(i) z%v(i) = beta*z%v(i) - y(i)*x(i)
end do end do
end if end if
else else
if (beta == dzero) then if (beta == dzero) then
!$omp parallel do private(i) !$omp parallel do private(i) shared(alpha)
do i=1, n do i=1, n
z%v(i) = alpha*y(i)*x(i) z%v(i) = alpha*y(i)*x(i)
end do end do
else if (beta == done) then else if (beta == done) then
!$omp parallel do private(i) !$omp parallel do private(i) shared(alpha)
do i=1, n do i=1, n
z%v(i) = z%v(i) + alpha*y(i)*x(i) z%v(i) = z%v(i) + alpha*y(i)*x(i)
end do end do
else else
!$omp parallel do private(i) !$omp parallel do private(i) shared(alpha, beta)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) + alpha*y(i)*x(i) z%v(i) = beta*z%v(i) + alpha*y(i)*x(i)
end do end do

@ -1866,6 +1866,30 @@ module psb_s_base_mat_mod
end subroutine psb_s_fix_coo_inner end subroutine psb_s_fix_coo_inner
end interface end interface
interface
subroutine psb_s_fix_coo_inner_colmajor(nr,nc,nzin,dupl,&
& ia,ja,val,iaux,nzout,info)
import
integer(psb_ipk_), intent(in) :: nr,nc,nzin,dupl
integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:)
real(psb_spk_), intent(inout) :: val(:)
integer(psb_ipk_), intent(out) :: nzout
integer(psb_ipk_), intent(out) :: info
end subroutine psb_s_fix_coo_inner_colmajor
end interface
interface
subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,&
& ia,ja,val,iaux,nzout,info)
import
integer(psb_ipk_), intent(in) :: nr,nc,nzin,dupl
integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:)
real(psb_spk_), intent(inout) :: val(:)
integer(psb_ipk_), intent(out) :: nzout
integer(psb_ipk_), intent(out) :: info
end subroutine psb_s_fix_coo_inner_rowmajor
end interface
! !
!> Function fix_coo !> Function fix_coo
!! \memberof psb_s_coo_sparse_mat !! \memberof psb_s_coo_sparse_mat

@ -1215,7 +1215,7 @@ contains
if (beta == sone) then if (beta == sone) then
return return
else else
!$omp parallel do private(i) !$omp parallel do private(i) shared(beta)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) z%v(i) = beta*z%v(i)
end do end do
@ -1233,7 +1233,7 @@ contains
z%v(i) = z%v(i) + y(i)*x(i) z%v(i) = z%v(i) + y(i)*x(i)
end do end do
else else
!$omp parallel do private(i) !$omp parallel do private(i) shared(beta)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) + y(i)*x(i) z%v(i) = beta*z%v(i) + y(i)*x(i)
end do end do
@ -1250,24 +1250,24 @@ contains
z%v(i) = z%v(i) - y(i)*x(i) z%v(i) = z%v(i) - y(i)*x(i)
end do end do
else else
!$omp parallel do private(i) !$omp parallel do private(i) shared(beta)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) - y(i)*x(i) z%v(i) = beta*z%v(i) - y(i)*x(i)
end do end do
end if end if
else else
if (beta == szero) then if (beta == szero) then
!$omp parallel do private(i) !$omp parallel do private(i) shared(alpha)
do i=1, n do i=1, n
z%v(i) = alpha*y(i)*x(i) z%v(i) = alpha*y(i)*x(i)
end do end do
else if (beta == sone) then else if (beta == sone) then
!$omp parallel do private(i) !$omp parallel do private(i) shared(alpha)
do i=1, n do i=1, n
z%v(i) = z%v(i) + alpha*y(i)*x(i) z%v(i) = z%v(i) + alpha*y(i)*x(i)
end do end do
else else
!$omp parallel do private(i) !$omp parallel do private(i) shared(alpha, beta)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) + alpha*y(i)*x(i) z%v(i) = beta*z%v(i) + alpha*y(i)*x(i)
end do end do

@ -1866,6 +1866,30 @@ module psb_z_base_mat_mod
end subroutine psb_z_fix_coo_inner end subroutine psb_z_fix_coo_inner
end interface end interface
interface
subroutine psb_z_fix_coo_inner_colmajor(nr,nc,nzin,dupl,&
& ia,ja,val,iaux,nzout,info)
import
integer(psb_ipk_), intent(in) :: nr,nc,nzin,dupl
integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:)
complex(psb_dpk_), intent(inout) :: val(:)
integer(psb_ipk_), intent(out) :: nzout
integer(psb_ipk_), intent(out) :: info
end subroutine psb_z_fix_coo_inner_colmajor
end interface
interface
subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,&
& ia,ja,val,iaux,nzout,info)
import
integer(psb_ipk_), intent(in) :: nr,nc,nzin,dupl
integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:)
complex(psb_dpk_), intent(inout) :: val(:)
integer(psb_ipk_), intent(out) :: nzout
integer(psb_ipk_), intent(out) :: info
end subroutine psb_z_fix_coo_inner_rowmajor
end interface
! !
!> Function fix_coo !> Function fix_coo
!! \memberof psb_z_coo_sparse_mat !! \memberof psb_z_coo_sparse_mat

@ -1208,7 +1208,7 @@ contains
if (beta == zone) then if (beta == zone) then
return return
else else
!$omp parallel do private(i) !$omp parallel do private(i) shared(beta)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) z%v(i) = beta*z%v(i)
end do end do
@ -1226,7 +1226,7 @@ contains
z%v(i) = z%v(i) + y(i)*x(i) z%v(i) = z%v(i) + y(i)*x(i)
end do end do
else else
!$omp parallel do private(i) !$omp parallel do private(i) shared(beta)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) + y(i)*x(i) z%v(i) = beta*z%v(i) + y(i)*x(i)
end do end do
@ -1243,24 +1243,24 @@ contains
z%v(i) = z%v(i) - y(i)*x(i) z%v(i) = z%v(i) - y(i)*x(i)
end do end do
else else
!$omp parallel do private(i) !$omp parallel do private(i) shared(beta)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) - y(i)*x(i) z%v(i) = beta*z%v(i) - y(i)*x(i)
end do end do
end if end if
else else
if (beta == zzero) then if (beta == zzero) then
!$omp parallel do private(i) !$omp parallel do private(i) shared(alpha)
do i=1, n do i=1, n
z%v(i) = alpha*y(i)*x(i) z%v(i) = alpha*y(i)*x(i)
end do end do
else if (beta == zone) then else if (beta == zone) then
!$omp parallel do private(i) !$omp parallel do private(i) shared(alpha)
do i=1, n do i=1, n
z%v(i) = z%v(i) + alpha*y(i)*x(i) z%v(i) = z%v(i) + alpha*y(i)*x(i)
end do end do
else else
!$omp parallel do private(i) !$omp parallel do private(i) shared(alpha, beta)
do i=1, n do i=1, n
z%v(i) = beta*z%v(i) + alpha*y(i)*x(i) z%v(i) = beta*z%v(i) + alpha*y(i)*x(i)
end do end do

File diff suppressed because it is too large Load Diff

@ -1328,8 +1328,8 @@ function psb_c_csr_csnmi(a) result(res)
res = szero res = szero
if (a%is_dev()) call a%sync() if (a%is_dev()) call a%sync()
!$omp parallel do private(i,acc) reduction(max: res) !$omp parallel do private(i,j,acc) reduction(max: res)
do i = 1, a%get_nrows() do i = 1, a%get_nrows()
acc = szero acc = szero
do j=a%irp(i),a%irp(i+1)-1 do j=a%irp(i),a%irp(i+1)-1
acc = acc + abs(a%val(j)) acc = acc + abs(a%val(j))
@ -1562,8 +1562,12 @@ subroutine psb_c_csr_get_diag(a,d,info)
if (a%is_unit()) then if (a%is_unit()) then
d(1:mnm) = cone !$omp parallel do private(i)
do i=1, mnm
d(i) = cone
end do
else else
!$omp parallel do private(i,j,k)
do i=1, mnm do i=1, mnm
d(i) = czero d(i) = czero
do k=a%irp(i),a%irp(i+1)-1 do k=a%irp(i),a%irp(i+1)-1
@ -1574,6 +1578,7 @@ subroutine psb_c_csr_get_diag(a,d,info)
enddo enddo
end do end do
end if end if
!$omp parallel do private(i)
do i=mnm+1,size(d) do i=mnm+1,size(d)
d(i) = czero d(i) = czero
end do end do
@ -1629,6 +1634,7 @@ subroutine psb_c_csr_scal(d,a,info,side)
goto 9999 goto 9999
end if end if
!$omp parallel do private(i,j)
do i=1, m do i=1, m
do j = a%irp(i), a%irp(i+1) -1 do j = a%irp(i), a%irp(i+1) -1
a%val(j) = a%val(j) * d(i) a%val(j) = a%val(j) * d(i)
@ -1643,6 +1649,7 @@ subroutine psb_c_csr_scal(d,a,info,side)
goto 9999 goto 9999
end if end if
!$omp parallel do private(i,j)
do i=1,a%get_nzeros() do i=1,a%get_nzeros()
j = a%ja(i) j = a%ja(i)
a%val(i) = a%val(i) * d(j) a%val(i) = a%val(i) * d(j)
@ -1681,6 +1688,7 @@ subroutine psb_c_csr_scals(d,a,info)
call a%make_nonunit() call a%make_nonunit()
end if end if
!$omp parallel do private(i)
do i=1,a%get_nzeros() do i=1,a%get_nzeros()
a%val(i) = a%val(i) * d a%val(i) = a%val(i) * d
enddo enddo
@ -2851,6 +2859,13 @@ subroutine psb_c_cp_csr_from_coo(a,b,info)
integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_), Parameter :: maxtry=8
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='c_cp_csr_from_coo' character(len=20) :: name='c_cp_csr_from_coo'
logical :: use_openmp = .false.
!$ integer(psb_ipk_), allocatable :: sum(:)
!$ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j
!$ integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads
!$ use_openmp = .true.
info = psb_success_ info = psb_success_
debug_unit = psb_get_debug_unit() debug_unit = psb_get_debug_unit()
@ -2871,7 +2886,7 @@ subroutine psb_c_cp_csr_from_coo(a,b,info)
call move_alloc(tmp%ia,itemp) call move_alloc(tmp%ia,itemp)
call move_alloc(tmp%ja,a%ja) call move_alloc(tmp%ja,a%ja)
call move_alloc(tmp%val,a%val) call move_alloc(tmp%val,a%val)
call psb_realloc(nr+1,a%irp,info) call psb_realloc(max(nr+1,nc+1),a%irp,info)
call tmp%free() call tmp%free()
else else
@ -2889,22 +2904,97 @@ subroutine psb_c_cp_csr_from_coo(a,b,info)
call psb_safe_ab_cpy(b%ia,itemp,info) call psb_safe_ab_cpy(b%ia,itemp,info)
if (info == psb_success_) call psb_safe_ab_cpy(b%ja,a%ja,info) if (info == psb_success_) call psb_safe_ab_cpy(b%ja,a%ja,info)
if (info == psb_success_) call psb_safe_ab_cpy(b%val,a%val,info) if (info == psb_success_) call psb_safe_ab_cpy(b%val,a%val,info)
if (info == psb_success_) call psb_realloc(nr+1,a%irp,info) if (info == psb_success_) call psb_realloc(max(nr+1,nc+1),a%irp,info)
endif endif
a%irp(:) = 0 a%irp(:) = 0
do k=1,nza
i = itemp(k) !!$ if (use_openmp) then
a%irp(i) = a%irp(i) + 1 !!$ !$ maxthreads = omp_get_max_threads()
end do !!$ !$ allocate(sum(maxthreads+1))
ip = 1 !!$ !$ sum(:) = 0
do i=1,nr !!$ !$ sum(1) = 1
ncl = a%irp(i) !!$
a%irp(i) = ip !!$ !$OMP PARALLEL default(none) &
ip = ip + ncl !!$ !$OMP shared(nza,itemp,a,nthreads,sum,nr) &
end do !!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
a%irp(nr+1) = ip !!$
!!$ !$OMP DO schedule(STATIC) &
!!$ !$OMP private(k,i)
!!$ do k=1,nza
!!$ i = itemp(k)
!!$ a%irp(i) = a%irp(i) + 1
!!$ end do
!!$ !$OMP END DO
!!$
!!$ !$OMP SINGLE
!!$ !$ nthreads = omp_get_num_threads()
!!$ !$OMP END SINGLE
!!$
!!$ !$ ithread = omp_get_thread_num()
!!$
!!$ !$ work = nr/nthreads
!!$ !$ if (ithread < MOD(nr,nthreads)) then
!!$ !$ work = work + 1
!!$ !$ first_idx = ithread*work + 1
!!$ !$ else
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
!!$ !$ end if
!!$
!!$ !$ last_idx = first_idx + work - 1
!!$
!!$ !$ s = 0
!!$ !$ do i=first_idx,last_idx
!!$ !$ s = s + a%irp(i)
!!$ !$ end do
!!$ !$ if (work > 0) then
!!$ !$ sum(ithread+2) = s
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$OMP SINGLE
!!$ !$ do i=2,nthreads+1
!!$ !$ sum(i) = sum(i) + sum(i-1)
!!$ !$ end do
!!$ !$OMP END SINGLE
!!$
!!$ !$ if (work > 0) then
!!$ !$ saved_elem = a%irp(first_idx)
!!$ !$ end if
!!$ !$ if (ithread == 0) then
!!$ !$ a%irp(1) = 1
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$ if (work > 0) then
!!$ !$ old_val = a%irp(first_idx+1)
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!!$ !$ end if
!!$
!!$ !$ do i=first_idx+2,last_idx+1
!!$ !$ nxt_val = a%irp(i)
!!$ !$ a%irp(i) = a%irp(i-1) + old_val
!!$ !$ old_val = nxt_val
!!$ !$ end do
!!$
!!$ !$OMP END PARALLEL
!!$ else
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
!!$ end if
call a%set_host() call a%set_host()
@ -3020,6 +3110,13 @@ subroutine psb_c_mv_csr_from_coo(a,b,info)
integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_), Parameter :: maxtry=8
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='mv_from_coo' character(len=20) :: name='mv_from_coo'
logical :: use_openmp = .false.
! $ integer(psb_ipk_), allocatable :: sum(:)
! $ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s
! $ integer(psb_ipk_) :: nxt_val,old_val,saved_elem
! $ use_openmp = .true.
info = psb_success_ info = psb_success_
debug_unit = psb_get_debug_unit() debug_unit = psb_get_debug_unit()
@ -3040,22 +3137,93 @@ subroutine psb_c_mv_csr_from_coo(a,b,info)
call move_alloc(b%ia,itemp) call move_alloc(b%ia,itemp)
call move_alloc(b%ja,a%ja) call move_alloc(b%ja,a%ja)
call move_alloc(b%val,a%val) call move_alloc(b%val,a%val)
call psb_realloc(nr+1,a%irp,info) call psb_realloc(max(nr+1,nc+1),a%irp,info)
call b%free() call b%free()
a%irp(:) = 0 a%irp(:) = 0
do k=1,nza
i = itemp(k) !!$ if (use_openmp) then
a%irp(i) = a%irp(i) + 1 !!$ !$OMP PARALLEL default(none) &
end do !!$ !$OMP shared(sum,nthreads,nr,a,itemp,nza) &
ip = 1 !!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
do i=1,nr !!$
ncl = a%irp(i) !!$ !$OMP DO schedule(STATIC) &
a%irp(i) = ip !!$ !$OMP private(k,i)
ip = ip + ncl !!$ do k=1,nza
end do !!$ i = itemp(k)
a%irp(nr+1) = ip !!$ a%irp(i) = a%irp(i) + 1
!!$ end do
!!$ !$OMP END DO
!!$
!!$ !$OMP SINGLE
!!$ !$ nthreads = omp_get_num_threads()
!!$ !$ allocate(sum(nthreads+1))
!!$ !$ sum(:) = 0
!!$ !$ sum(1) = 1
!!$ !$OMP END SINGLE
!!$
!!$ !$ ithread = omp_get_thread_num()
!!$
!!$ !$ work = nr/nthreads
!!$ !$ if (ithread < MOD(nr,nthreads)) then
!!$ !$ work = work + 1
!!$ !$ first_idx = ithread*work + 1
!!$ !$ else
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
!!$ !$ end if
!!$
!!$ !$ last_idx = first_idx + work - 1
!!$
!!$ !$ s = 0
!!$ !$ do i=first_idx,last_idx
!!$ !$ s = s + a%irp(i)
!!$ !$ end do
!!$ !$ if (work > 0) then
!!$ !$ sum(ithread+2) = s
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$OMP SINGLE
!!$ !$ do i=2,nthreads+1
!!$ !$ sum(i) = sum(i) + sum(i-1)
!!$ !$ end do
!!$ !$OMP END SINGLE
!!$
!!$ !$ if (work > 0) then
!!$ !$ saved_elem = a%irp(first_idx)
!!$ !$ end if
!!$ !$ if (ithread == 0) then
!!$ !$ a%irp(1) = 1
!!$ !$ end if
!!$
!!$ !$ if (work > 0) then
!!$ !$ old_val = a%irp(first_idx+1)
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!!$ !$ end if
!!$
!!$ !$ do i=first_idx+2,last_idx+1
!!$ !$ nxt_val = a%irp(i)
!!$ !$ a%irp(i) = a%irp(i-1) + old_val
!!$ !$ old_val = nxt_val
!!$ !$ end do
!!$
!!$ !$OMP END PARALLEL
!!$ else
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
!!$ end if
call a%set_host() call a%set_host()
end subroutine psb_c_mv_csr_from_coo end subroutine psb_c_mv_csr_from_coo

File diff suppressed because it is too large Load Diff

@ -1328,8 +1328,8 @@ function psb_d_csr_csnmi(a) result(res)
res = dzero res = dzero
if (a%is_dev()) call a%sync() if (a%is_dev()) call a%sync()
!$omp parallel do private(i,acc) reduction(max: res) !$omp parallel do private(i,j,acc) reduction(max: res)
do i = 1, a%get_nrows() do i = 1, a%get_nrows()
acc = dzero acc = dzero
do j=a%irp(i),a%irp(i+1)-1 do j=a%irp(i),a%irp(i+1)-1
acc = acc + abs(a%val(j)) acc = acc + abs(a%val(j))
@ -1562,8 +1562,12 @@ subroutine psb_d_csr_get_diag(a,d,info)
if (a%is_unit()) then if (a%is_unit()) then
d(1:mnm) = done !$omp parallel do private(i)
do i=1, mnm
d(i) = done
end do
else else
!$omp parallel do private(i,j,k)
do i=1, mnm do i=1, mnm
d(i) = dzero d(i) = dzero
do k=a%irp(i),a%irp(i+1)-1 do k=a%irp(i),a%irp(i+1)-1
@ -1574,6 +1578,7 @@ subroutine psb_d_csr_get_diag(a,d,info)
enddo enddo
end do end do
end if end if
!$omp parallel do private(i)
do i=mnm+1,size(d) do i=mnm+1,size(d)
d(i) = dzero d(i) = dzero
end do end do
@ -1629,6 +1634,7 @@ subroutine psb_d_csr_scal(d,a,info,side)
goto 9999 goto 9999
end if end if
!$omp parallel do private(i,j)
do i=1, m do i=1, m
do j = a%irp(i), a%irp(i+1) -1 do j = a%irp(i), a%irp(i+1) -1
a%val(j) = a%val(j) * d(i) a%val(j) = a%val(j) * d(i)
@ -1643,6 +1649,7 @@ subroutine psb_d_csr_scal(d,a,info,side)
goto 9999 goto 9999
end if end if
!$omp parallel do private(i,j)
do i=1,a%get_nzeros() do i=1,a%get_nzeros()
j = a%ja(i) j = a%ja(i)
a%val(i) = a%val(i) * d(j) a%val(i) = a%val(i) * d(j)
@ -1681,6 +1688,7 @@ subroutine psb_d_csr_scals(d,a,info)
call a%make_nonunit() call a%make_nonunit()
end if end if
!$omp parallel do private(i)
do i=1,a%get_nzeros() do i=1,a%get_nzeros()
a%val(i) = a%val(i) * d a%val(i) = a%val(i) * d
enddo enddo
@ -2851,6 +2859,13 @@ subroutine psb_d_cp_csr_from_coo(a,b,info)
integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_), Parameter :: maxtry=8
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='d_cp_csr_from_coo' character(len=20) :: name='d_cp_csr_from_coo'
logical :: use_openmp = .false.
!$ integer(psb_ipk_), allocatable :: sum(:)
!$ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j
!$ integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads
!$ use_openmp = .true.
info = psb_success_ info = psb_success_
debug_unit = psb_get_debug_unit() debug_unit = psb_get_debug_unit()
@ -2871,7 +2886,7 @@ subroutine psb_d_cp_csr_from_coo(a,b,info)
call move_alloc(tmp%ia,itemp) call move_alloc(tmp%ia,itemp)
call move_alloc(tmp%ja,a%ja) call move_alloc(tmp%ja,a%ja)
call move_alloc(tmp%val,a%val) call move_alloc(tmp%val,a%val)
call psb_realloc(nr+1,a%irp,info) call psb_realloc(max(nr+1,nc+1),a%irp,info)
call tmp%free() call tmp%free()
else else
@ -2889,22 +2904,97 @@ subroutine psb_d_cp_csr_from_coo(a,b,info)
call psb_safe_ab_cpy(b%ia,itemp,info) call psb_safe_ab_cpy(b%ia,itemp,info)
if (info == psb_success_) call psb_safe_ab_cpy(b%ja,a%ja,info) if (info == psb_success_) call psb_safe_ab_cpy(b%ja,a%ja,info)
if (info == psb_success_) call psb_safe_ab_cpy(b%val,a%val,info) if (info == psb_success_) call psb_safe_ab_cpy(b%val,a%val,info)
if (info == psb_success_) call psb_realloc(nr+1,a%irp,info) if (info == psb_success_) call psb_realloc(max(nr+1,nc+1),a%irp,info)
endif endif
a%irp(:) = 0 a%irp(:) = 0
do k=1,nza
i = itemp(k) !!$ if (use_openmp) then
a%irp(i) = a%irp(i) + 1 !!$ !$ maxthreads = omp_get_max_threads()
end do !!$ !$ allocate(sum(maxthreads+1))
ip = 1 !!$ !$ sum(:) = 0
do i=1,nr !!$ !$ sum(1) = 1
ncl = a%irp(i) !!$
a%irp(i) = ip !!$ !$OMP PARALLEL default(none) &
ip = ip + ncl !!$ !$OMP shared(nza,itemp,a,nthreads,sum,nr) &
end do !!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
a%irp(nr+1) = ip !!$
!!$ !$OMP DO schedule(STATIC) &
!!$ !$OMP private(k,i)
!!$ do k=1,nza
!!$ i = itemp(k)
!!$ a%irp(i) = a%irp(i) + 1
!!$ end do
!!$ !$OMP END DO
!!$
!!$ !$OMP SINGLE
!!$ !$ nthreads = omp_get_num_threads()
!!$ !$OMP END SINGLE
!!$
!!$ !$ ithread = omp_get_thread_num()
!!$
!!$ !$ work = nr/nthreads
!!$ !$ if (ithread < MOD(nr,nthreads)) then
!!$ !$ work = work + 1
!!$ !$ first_idx = ithread*work + 1
!!$ !$ else
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
!!$ !$ end if
!!$
!!$ !$ last_idx = first_idx + work - 1
!!$
!!$ !$ s = 0
!!$ !$ do i=first_idx,last_idx
!!$ !$ s = s + a%irp(i)
!!$ !$ end do
!!$ !$ if (work > 0) then
!!$ !$ sum(ithread+2) = s
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$OMP SINGLE
!!$ !$ do i=2,nthreads+1
!!$ !$ sum(i) = sum(i) + sum(i-1)
!!$ !$ end do
!!$ !$OMP END SINGLE
!!$
!!$ !$ if (work > 0) then
!!$ !$ saved_elem = a%irp(first_idx)
!!$ !$ end if
!!$ !$ if (ithread == 0) then
!!$ !$ a%irp(1) = 1
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$ if (work > 0) then
!!$ !$ old_val = a%irp(first_idx+1)
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!!$ !$ end if
!!$
!!$ !$ do i=first_idx+2,last_idx+1
!!$ !$ nxt_val = a%irp(i)
!!$ !$ a%irp(i) = a%irp(i-1) + old_val
!!$ !$ old_val = nxt_val
!!$ !$ end do
!!$
!!$ !$OMP END PARALLEL
!!$ else
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
!!$ end if
call a%set_host() call a%set_host()
@ -3020,6 +3110,13 @@ subroutine psb_d_mv_csr_from_coo(a,b,info)
integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_), Parameter :: maxtry=8
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='mv_from_coo' character(len=20) :: name='mv_from_coo'
logical :: use_openmp = .false.
! $ integer(psb_ipk_), allocatable :: sum(:)
! $ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s
! $ integer(psb_ipk_) :: nxt_val,old_val,saved_elem
! $ use_openmp = .true.
info = psb_success_ info = psb_success_
debug_unit = psb_get_debug_unit() debug_unit = psb_get_debug_unit()
@ -3040,22 +3137,93 @@ subroutine psb_d_mv_csr_from_coo(a,b,info)
call move_alloc(b%ia,itemp) call move_alloc(b%ia,itemp)
call move_alloc(b%ja,a%ja) call move_alloc(b%ja,a%ja)
call move_alloc(b%val,a%val) call move_alloc(b%val,a%val)
call psb_realloc(nr+1,a%irp,info) call psb_realloc(max(nr+1,nc+1),a%irp,info)
call b%free() call b%free()
a%irp(:) = 0 a%irp(:) = 0
do k=1,nza
i = itemp(k) !!$ if (use_openmp) then
a%irp(i) = a%irp(i) + 1 !!$ !$OMP PARALLEL default(none) &
end do !!$ !$OMP shared(sum,nthreads,nr,a,itemp,nza) &
ip = 1 !!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
do i=1,nr !!$
ncl = a%irp(i) !!$ !$OMP DO schedule(STATIC) &
a%irp(i) = ip !!$ !$OMP private(k,i)
ip = ip + ncl !!$ do k=1,nza
end do !!$ i = itemp(k)
a%irp(nr+1) = ip !!$ a%irp(i) = a%irp(i) + 1
!!$ end do
!!$ !$OMP END DO
!!$
!!$ !$OMP SINGLE
!!$ !$ nthreads = omp_get_num_threads()
!!$ !$ allocate(sum(nthreads+1))
!!$ !$ sum(:) = 0
!!$ !$ sum(1) = 1
!!$ !$OMP END SINGLE
!!$
!!$ !$ ithread = omp_get_thread_num()
!!$
!!$ !$ work = nr/nthreads
!!$ !$ if (ithread < MOD(nr,nthreads)) then
!!$ !$ work = work + 1
!!$ !$ first_idx = ithread*work + 1
!!$ !$ else
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
!!$ !$ end if
!!$
!!$ !$ last_idx = first_idx + work - 1
!!$
!!$ !$ s = 0
!!$ !$ do i=first_idx,last_idx
!!$ !$ s = s + a%irp(i)
!!$ !$ end do
!!$ !$ if (work > 0) then
!!$ !$ sum(ithread+2) = s
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$OMP SINGLE
!!$ !$ do i=2,nthreads+1
!!$ !$ sum(i) = sum(i) + sum(i-1)
!!$ !$ end do
!!$ !$OMP END SINGLE
!!$
!!$ !$ if (work > 0) then
!!$ !$ saved_elem = a%irp(first_idx)
!!$ !$ end if
!!$ !$ if (ithread == 0) then
!!$ !$ a%irp(1) = 1
!!$ !$ end if
!!$
!!$ !$ if (work > 0) then
!!$ !$ old_val = a%irp(first_idx+1)
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!!$ !$ end if
!!$
!!$ !$ do i=first_idx+2,last_idx+1
!!$ !$ nxt_val = a%irp(i)
!!$ !$ a%irp(i) = a%irp(i-1) + old_val
!!$ !$ old_val = nxt_val
!!$ !$ end do
!!$
!!$ !$OMP END PARALLEL
!!$ else
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
!!$ end if
call a%set_host() call a%set_host()
end subroutine psb_d_mv_csr_from_coo end subroutine psb_d_mv_csr_from_coo

File diff suppressed because it is too large Load Diff

@ -1328,8 +1328,8 @@ function psb_s_csr_csnmi(a) result(res)
res = szero res = szero
if (a%is_dev()) call a%sync() if (a%is_dev()) call a%sync()
!$omp parallel do private(i,acc) reduction(max: res) !$omp parallel do private(i,j,acc) reduction(max: res)
do i = 1, a%get_nrows() do i = 1, a%get_nrows()
acc = szero acc = szero
do j=a%irp(i),a%irp(i+1)-1 do j=a%irp(i),a%irp(i+1)-1
acc = acc + abs(a%val(j)) acc = acc + abs(a%val(j))
@ -1562,8 +1562,12 @@ subroutine psb_s_csr_get_diag(a,d,info)
if (a%is_unit()) then if (a%is_unit()) then
d(1:mnm) = sone !$omp parallel do private(i)
do i=1, mnm
d(i) = sone
end do
else else
!$omp parallel do private(i,j,k)
do i=1, mnm do i=1, mnm
d(i) = szero d(i) = szero
do k=a%irp(i),a%irp(i+1)-1 do k=a%irp(i),a%irp(i+1)-1
@ -1574,6 +1578,7 @@ subroutine psb_s_csr_get_diag(a,d,info)
enddo enddo
end do end do
end if end if
!$omp parallel do private(i)
do i=mnm+1,size(d) do i=mnm+1,size(d)
d(i) = szero d(i) = szero
end do end do
@ -1629,6 +1634,7 @@ subroutine psb_s_csr_scal(d,a,info,side)
goto 9999 goto 9999
end if end if
!$omp parallel do private(i,j)
do i=1, m do i=1, m
do j = a%irp(i), a%irp(i+1) -1 do j = a%irp(i), a%irp(i+1) -1
a%val(j) = a%val(j) * d(i) a%val(j) = a%val(j) * d(i)
@ -1643,6 +1649,7 @@ subroutine psb_s_csr_scal(d,a,info,side)
goto 9999 goto 9999
end if end if
!$omp parallel do private(i,j)
do i=1,a%get_nzeros() do i=1,a%get_nzeros()
j = a%ja(i) j = a%ja(i)
a%val(i) = a%val(i) * d(j) a%val(i) = a%val(i) * d(j)
@ -1681,6 +1688,7 @@ subroutine psb_s_csr_scals(d,a,info)
call a%make_nonunit() call a%make_nonunit()
end if end if
!$omp parallel do private(i)
do i=1,a%get_nzeros() do i=1,a%get_nzeros()
a%val(i) = a%val(i) * d a%val(i) = a%val(i) * d
enddo enddo
@ -2851,6 +2859,13 @@ subroutine psb_s_cp_csr_from_coo(a,b,info)
integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_), Parameter :: maxtry=8
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='s_cp_csr_from_coo' character(len=20) :: name='s_cp_csr_from_coo'
logical :: use_openmp = .false.
!$ integer(psb_ipk_), allocatable :: sum(:)
!$ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j
!$ integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads
!$ use_openmp = .true.
info = psb_success_ info = psb_success_
debug_unit = psb_get_debug_unit() debug_unit = psb_get_debug_unit()
@ -2871,7 +2886,7 @@ subroutine psb_s_cp_csr_from_coo(a,b,info)
call move_alloc(tmp%ia,itemp) call move_alloc(tmp%ia,itemp)
call move_alloc(tmp%ja,a%ja) call move_alloc(tmp%ja,a%ja)
call move_alloc(tmp%val,a%val) call move_alloc(tmp%val,a%val)
call psb_realloc(nr+1,a%irp,info) call psb_realloc(max(nr+1,nc+1),a%irp,info)
call tmp%free() call tmp%free()
else else
@ -2889,22 +2904,97 @@ subroutine psb_s_cp_csr_from_coo(a,b,info)
call psb_safe_ab_cpy(b%ia,itemp,info) call psb_safe_ab_cpy(b%ia,itemp,info)
if (info == psb_success_) call psb_safe_ab_cpy(b%ja,a%ja,info) if (info == psb_success_) call psb_safe_ab_cpy(b%ja,a%ja,info)
if (info == psb_success_) call psb_safe_ab_cpy(b%val,a%val,info) if (info == psb_success_) call psb_safe_ab_cpy(b%val,a%val,info)
if (info == psb_success_) call psb_realloc(nr+1,a%irp,info) if (info == psb_success_) call psb_realloc(max(nr+1,nc+1),a%irp,info)
endif endif
a%irp(:) = 0 a%irp(:) = 0
do k=1,nza
i = itemp(k) !!$ if (use_openmp) then
a%irp(i) = a%irp(i) + 1 !!$ !$ maxthreads = omp_get_max_threads()
end do !!$ !$ allocate(sum(maxthreads+1))
ip = 1 !!$ !$ sum(:) = 0
do i=1,nr !!$ !$ sum(1) = 1
ncl = a%irp(i) !!$
a%irp(i) = ip !!$ !$OMP PARALLEL default(none) &
ip = ip + ncl !!$ !$OMP shared(nza,itemp,a,nthreads,sum,nr) &
end do !!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
a%irp(nr+1) = ip !!$
!!$ !$OMP DO schedule(STATIC) &
!!$ !$OMP private(k,i)
!!$ do k=1,nza
!!$ i = itemp(k)
!!$ a%irp(i) = a%irp(i) + 1
!!$ end do
!!$ !$OMP END DO
!!$
!!$ !$OMP SINGLE
!!$ !$ nthreads = omp_get_num_threads()
!!$ !$OMP END SINGLE
!!$
!!$ !$ ithread = omp_get_thread_num()
!!$
!!$ !$ work = nr/nthreads
!!$ !$ if (ithread < MOD(nr,nthreads)) then
!!$ !$ work = work + 1
!!$ !$ first_idx = ithread*work + 1
!!$ !$ else
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
!!$ !$ end if
!!$
!!$ !$ last_idx = first_idx + work - 1
!!$
!!$ !$ s = 0
!!$ !$ do i=first_idx,last_idx
!!$ !$ s = s + a%irp(i)
!!$ !$ end do
!!$ !$ if (work > 0) then
!!$ !$ sum(ithread+2) = s
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$OMP SINGLE
!!$ !$ do i=2,nthreads+1
!!$ !$ sum(i) = sum(i) + sum(i-1)
!!$ !$ end do
!!$ !$OMP END SINGLE
!!$
!!$ !$ if (work > 0) then
!!$ !$ saved_elem = a%irp(first_idx)
!!$ !$ end if
!!$ !$ if (ithread == 0) then
!!$ !$ a%irp(1) = 1
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$ if (work > 0) then
!!$ !$ old_val = a%irp(first_idx+1)
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!!$ !$ end if
!!$
!!$ !$ do i=first_idx+2,last_idx+1
!!$ !$ nxt_val = a%irp(i)
!!$ !$ a%irp(i) = a%irp(i-1) + old_val
!!$ !$ old_val = nxt_val
!!$ !$ end do
!!$
!!$ !$OMP END PARALLEL
!!$ else
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
!!$ end if
call a%set_host() call a%set_host()
@ -3020,6 +3110,13 @@ subroutine psb_s_mv_csr_from_coo(a,b,info)
integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_), Parameter :: maxtry=8
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='mv_from_coo' character(len=20) :: name='mv_from_coo'
logical :: use_openmp = .false.
! $ integer(psb_ipk_), allocatable :: sum(:)
! $ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s
! $ integer(psb_ipk_) :: nxt_val,old_val,saved_elem
! $ use_openmp = .true.
info = psb_success_ info = psb_success_
debug_unit = psb_get_debug_unit() debug_unit = psb_get_debug_unit()
@ -3040,22 +3137,93 @@ subroutine psb_s_mv_csr_from_coo(a,b,info)
call move_alloc(b%ia,itemp) call move_alloc(b%ia,itemp)
call move_alloc(b%ja,a%ja) call move_alloc(b%ja,a%ja)
call move_alloc(b%val,a%val) call move_alloc(b%val,a%val)
call psb_realloc(nr+1,a%irp,info) call psb_realloc(max(nr+1,nc+1),a%irp,info)
call b%free() call b%free()
a%irp(:) = 0 a%irp(:) = 0
do k=1,nza
i = itemp(k) !!$ if (use_openmp) then
a%irp(i) = a%irp(i) + 1 !!$ !$OMP PARALLEL default(none) &
end do !!$ !$OMP shared(sum,nthreads,nr,a,itemp,nza) &
ip = 1 !!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
do i=1,nr !!$
ncl = a%irp(i) !!$ !$OMP DO schedule(STATIC) &
a%irp(i) = ip !!$ !$OMP private(k,i)
ip = ip + ncl !!$ do k=1,nza
end do !!$ i = itemp(k)
a%irp(nr+1) = ip !!$ a%irp(i) = a%irp(i) + 1
!!$ end do
!!$ !$OMP END DO
!!$
!!$ !$OMP SINGLE
!!$ !$ nthreads = omp_get_num_threads()
!!$ !$ allocate(sum(nthreads+1))
!!$ !$ sum(:) = 0
!!$ !$ sum(1) = 1
!!$ !$OMP END SINGLE
!!$
!!$ !$ ithread = omp_get_thread_num()
!!$
!!$ !$ work = nr/nthreads
!!$ !$ if (ithread < MOD(nr,nthreads)) then
!!$ !$ work = work + 1
!!$ !$ first_idx = ithread*work + 1
!!$ !$ else
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
!!$ !$ end if
!!$
!!$ !$ last_idx = first_idx + work - 1
!!$
!!$ !$ s = 0
!!$ !$ do i=first_idx,last_idx
!!$ !$ s = s + a%irp(i)
!!$ !$ end do
!!$ !$ if (work > 0) then
!!$ !$ sum(ithread+2) = s
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$OMP SINGLE
!!$ !$ do i=2,nthreads+1
!!$ !$ sum(i) = sum(i) + sum(i-1)
!!$ !$ end do
!!$ !$OMP END SINGLE
!!$
!!$ !$ if (work > 0) then
!!$ !$ saved_elem = a%irp(first_idx)
!!$ !$ end if
!!$ !$ if (ithread == 0) then
!!$ !$ a%irp(1) = 1
!!$ !$ end if
!!$
!!$ !$ if (work > 0) then
!!$ !$ old_val = a%irp(first_idx+1)
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!!$ !$ end if
!!$
!!$ !$ do i=first_idx+2,last_idx+1
!!$ !$ nxt_val = a%irp(i)
!!$ !$ a%irp(i) = a%irp(i-1) + old_val
!!$ !$ old_val = nxt_val
!!$ !$ end do
!!$
!!$ !$OMP END PARALLEL
!!$ else
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
!!$ end if
call a%set_host() call a%set_host()
end subroutine psb_s_mv_csr_from_coo end subroutine psb_s_mv_csr_from_coo

File diff suppressed because it is too large Load Diff

@ -1328,8 +1328,8 @@ function psb_z_csr_csnmi(a) result(res)
res = dzero res = dzero
if (a%is_dev()) call a%sync() if (a%is_dev()) call a%sync()
!$omp parallel do private(i,acc) reduction(max: res) !$omp parallel do private(i,j,acc) reduction(max: res)
do i = 1, a%get_nrows() do i = 1, a%get_nrows()
acc = dzero acc = dzero
do j=a%irp(i),a%irp(i+1)-1 do j=a%irp(i),a%irp(i+1)-1
acc = acc + abs(a%val(j)) acc = acc + abs(a%val(j))
@ -1562,8 +1562,12 @@ subroutine psb_z_csr_get_diag(a,d,info)
if (a%is_unit()) then if (a%is_unit()) then
d(1:mnm) = zone !$omp parallel do private(i)
do i=1, mnm
d(i) = zone
end do
else else
!$omp parallel do private(i,j,k)
do i=1, mnm do i=1, mnm
d(i) = zzero d(i) = zzero
do k=a%irp(i),a%irp(i+1)-1 do k=a%irp(i),a%irp(i+1)-1
@ -1574,6 +1578,7 @@ subroutine psb_z_csr_get_diag(a,d,info)
enddo enddo
end do end do
end if end if
!$omp parallel do private(i)
do i=mnm+1,size(d) do i=mnm+1,size(d)
d(i) = zzero d(i) = zzero
end do end do
@ -1629,6 +1634,7 @@ subroutine psb_z_csr_scal(d,a,info,side)
goto 9999 goto 9999
end if end if
!$omp parallel do private(i,j)
do i=1, m do i=1, m
do j = a%irp(i), a%irp(i+1) -1 do j = a%irp(i), a%irp(i+1) -1
a%val(j) = a%val(j) * d(i) a%val(j) = a%val(j) * d(i)
@ -1643,6 +1649,7 @@ subroutine psb_z_csr_scal(d,a,info,side)
goto 9999 goto 9999
end if end if
!$omp parallel do private(i,j)
do i=1,a%get_nzeros() do i=1,a%get_nzeros()
j = a%ja(i) j = a%ja(i)
a%val(i) = a%val(i) * d(j) a%val(i) = a%val(i) * d(j)
@ -1681,6 +1688,7 @@ subroutine psb_z_csr_scals(d,a,info)
call a%make_nonunit() call a%make_nonunit()
end if end if
!$omp parallel do private(i)
do i=1,a%get_nzeros() do i=1,a%get_nzeros()
a%val(i) = a%val(i) * d a%val(i) = a%val(i) * d
enddo enddo
@ -2851,6 +2859,13 @@ subroutine psb_z_cp_csr_from_coo(a,b,info)
integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_), Parameter :: maxtry=8
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='z_cp_csr_from_coo' character(len=20) :: name='z_cp_csr_from_coo'
logical :: use_openmp = .false.
!$ integer(psb_ipk_), allocatable :: sum(:)
!$ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j
!$ integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads
!$ use_openmp = .true.
info = psb_success_ info = psb_success_
debug_unit = psb_get_debug_unit() debug_unit = psb_get_debug_unit()
@ -2871,7 +2886,7 @@ subroutine psb_z_cp_csr_from_coo(a,b,info)
call move_alloc(tmp%ia,itemp) call move_alloc(tmp%ia,itemp)
call move_alloc(tmp%ja,a%ja) call move_alloc(tmp%ja,a%ja)
call move_alloc(tmp%val,a%val) call move_alloc(tmp%val,a%val)
call psb_realloc(nr+1,a%irp,info) call psb_realloc(max(nr+1,nc+1),a%irp,info)
call tmp%free() call tmp%free()
else else
@ -2889,22 +2904,97 @@ subroutine psb_z_cp_csr_from_coo(a,b,info)
call psb_safe_ab_cpy(b%ia,itemp,info) call psb_safe_ab_cpy(b%ia,itemp,info)
if (info == psb_success_) call psb_safe_ab_cpy(b%ja,a%ja,info) if (info == psb_success_) call psb_safe_ab_cpy(b%ja,a%ja,info)
if (info == psb_success_) call psb_safe_ab_cpy(b%val,a%val,info) if (info == psb_success_) call psb_safe_ab_cpy(b%val,a%val,info)
if (info == psb_success_) call psb_realloc(nr+1,a%irp,info) if (info == psb_success_) call psb_realloc(max(nr+1,nc+1),a%irp,info)
endif endif
a%irp(:) = 0 a%irp(:) = 0
do k=1,nza
i = itemp(k) !!$ if (use_openmp) then
a%irp(i) = a%irp(i) + 1 !!$ !$ maxthreads = omp_get_max_threads()
end do !!$ !$ allocate(sum(maxthreads+1))
ip = 1 !!$ !$ sum(:) = 0
do i=1,nr !!$ !$ sum(1) = 1
ncl = a%irp(i) !!$
a%irp(i) = ip !!$ !$OMP PARALLEL default(none) &
ip = ip + ncl !!$ !$OMP shared(nza,itemp,a,nthreads,sum,nr) &
end do !!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
a%irp(nr+1) = ip !!$
!!$ !$OMP DO schedule(STATIC) &
!!$ !$OMP private(k,i)
!!$ do k=1,nza
!!$ i = itemp(k)
!!$ a%irp(i) = a%irp(i) + 1
!!$ end do
!!$ !$OMP END DO
!!$
!!$ !$OMP SINGLE
!!$ !$ nthreads = omp_get_num_threads()
!!$ !$OMP END SINGLE
!!$
!!$ !$ ithread = omp_get_thread_num()
!!$
!!$ !$ work = nr/nthreads
!!$ !$ if (ithread < MOD(nr,nthreads)) then
!!$ !$ work = work + 1
!!$ !$ first_idx = ithread*work + 1
!!$ !$ else
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
!!$ !$ end if
!!$
!!$ !$ last_idx = first_idx + work - 1
!!$
!!$ !$ s = 0
!!$ !$ do i=first_idx,last_idx
!!$ !$ s = s + a%irp(i)
!!$ !$ end do
!!$ !$ if (work > 0) then
!!$ !$ sum(ithread+2) = s
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$OMP SINGLE
!!$ !$ do i=2,nthreads+1
!!$ !$ sum(i) = sum(i) + sum(i-1)
!!$ !$ end do
!!$ !$OMP END SINGLE
!!$
!!$ !$ if (work > 0) then
!!$ !$ saved_elem = a%irp(first_idx)
!!$ !$ end if
!!$ !$ if (ithread == 0) then
!!$ !$ a%irp(1) = 1
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$ if (work > 0) then
!!$ !$ old_val = a%irp(first_idx+1)
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!!$ !$ end if
!!$
!!$ !$ do i=first_idx+2,last_idx+1
!!$ !$ nxt_val = a%irp(i)
!!$ !$ a%irp(i) = a%irp(i-1) + old_val
!!$ !$ old_val = nxt_val
!!$ !$ end do
!!$
!!$ !$OMP END PARALLEL
!!$ else
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
!!$ end if
call a%set_host() call a%set_host()
@ -3020,6 +3110,13 @@ subroutine psb_z_mv_csr_from_coo(a,b,info)
integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_), Parameter :: maxtry=8
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='mv_from_coo' character(len=20) :: name='mv_from_coo'
logical :: use_openmp = .false.
! $ integer(psb_ipk_), allocatable :: sum(:)
! $ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s
! $ integer(psb_ipk_) :: nxt_val,old_val,saved_elem
! $ use_openmp = .true.
info = psb_success_ info = psb_success_
debug_unit = psb_get_debug_unit() debug_unit = psb_get_debug_unit()
@ -3040,22 +3137,93 @@ subroutine psb_z_mv_csr_from_coo(a,b,info)
call move_alloc(b%ia,itemp) call move_alloc(b%ia,itemp)
call move_alloc(b%ja,a%ja) call move_alloc(b%ja,a%ja)
call move_alloc(b%val,a%val) call move_alloc(b%val,a%val)
call psb_realloc(nr+1,a%irp,info) call psb_realloc(max(nr+1,nc+1),a%irp,info)
call b%free() call b%free()
a%irp(:) = 0 a%irp(:) = 0
do k=1,nza
i = itemp(k) !!$ if (use_openmp) then
a%irp(i) = a%irp(i) + 1 !!$ !$OMP PARALLEL default(none) &
end do !!$ !$OMP shared(sum,nthreads,nr,a,itemp,nza) &
ip = 1 !!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
do i=1,nr !!$
ncl = a%irp(i) !!$ !$OMP DO schedule(STATIC) &
a%irp(i) = ip !!$ !$OMP private(k,i)
ip = ip + ncl !!$ do k=1,nza
end do !!$ i = itemp(k)
a%irp(nr+1) = ip !!$ a%irp(i) = a%irp(i) + 1
!!$ end do
!!$ !$OMP END DO
!!$
!!$ !$OMP SINGLE
!!$ !$ nthreads = omp_get_num_threads()
!!$ !$ allocate(sum(nthreads+1))
!!$ !$ sum(:) = 0
!!$ !$ sum(1) = 1
!!$ !$OMP END SINGLE
!!$
!!$ !$ ithread = omp_get_thread_num()
!!$
!!$ !$ work = nr/nthreads
!!$ !$ if (ithread < MOD(nr,nthreads)) then
!!$ !$ work = work + 1
!!$ !$ first_idx = ithread*work + 1
!!$ !$ else
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
!!$ !$ end if
!!$
!!$ !$ last_idx = first_idx + work - 1
!!$
!!$ !$ s = 0
!!$ !$ do i=first_idx,last_idx
!!$ !$ s = s + a%irp(i)
!!$ !$ end do
!!$ !$ if (work > 0) then
!!$ !$ sum(ithread+2) = s
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$OMP SINGLE
!!$ !$ do i=2,nthreads+1
!!$ !$ sum(i) = sum(i) + sum(i-1)
!!$ !$ end do
!!$ !$OMP END SINGLE
!!$
!!$ !$ if (work > 0) then
!!$ !$ saved_elem = a%irp(first_idx)
!!$ !$ end if
!!$ !$ if (ithread == 0) then
!!$ !$ a%irp(1) = 1
!!$ !$ end if
!!$
!!$ !$ if (work > 0) then
!!$ !$ old_val = a%irp(first_idx+1)
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!!$ !$ end if
!!$
!!$ !$ do i=first_idx+2,last_idx+1
!!$ !$ nxt_val = a%irp(i)
!!$ !$ a%irp(i) = a%irp(i-1) + old_val
!!$ !$ old_val = nxt_val
!!$ !$ end do
!!$
!!$ !$OMP END PARALLEL
!!$ else
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
!!$ end if
call a%set_host() call a%set_host()
end subroutine psb_z_mv_csr_from_coo end subroutine psb_z_mv_csr_from_coo

@ -244,36 +244,38 @@ subroutine psb_cd_inloc(v, ctxt, desc, info, globalcheck,idx,usehash)
call psb_errpush(info,name,l_err=l_err) call psb_errpush(info,name,l_err=l_err)
goto 9999 goto 9999
end if end if
if (check_) then
! Sort, eliminate duplicates, then ! Sort, eliminate duplicates, then
! scramble back into original position. ! scramble back into original position.
ix(1) = -1 ix(1) = -1
if (present(idx)) then if (present(idx)) then
if (size(idx) >= loc_row) then if (size(idx) >= loc_row) then
!$omp parallel do private(i)
do i=1, loc_row
ix(i) = idx(i)
end do
end if
end if
if (ix(1) == -1) then
!$omp parallel do private(i)
do i=1, loc_row do i=1, loc_row
ix(i) = idx(i) ix(i) = i
end do end do
end if end if
end if call psb_msort(vl,ix,flag=psb_sort_keep_idx_)
if (ix(1) == -1) then nlu = min(1,loc_row)
do i=1, loc_row do i=2,loc_row
ix(i) = i if (vl(i) /= vl(nlu)) then
nlu = nlu + 1
vl(nlu) = vl(i)
ix(nlu) = ix(i)
end if
end do end do
end if call psb_msort(ix(1:nlu),vl(1:nlu),flag=psb_sort_keep_idx_)
call psb_msort(vl,ix,flag=psb_sort_keep_idx_)
nlu = min(1,loc_row)
do i=2,loc_row
if (vl(i) /= vl(nlu)) then
nlu = nlu + 1
vl(nlu) = vl(i)
ix(nlu) = ix(i)
end if
end do
call psb_msort(ix(1:nlu),vl(1:nlu),flag=psb_sort_keep_idx_)
if (debug_size) &
& write(debug_unit,*) me,' ',trim(name),': After sort ',nlu
if (debug_size) &
& write(debug_unit,*) me,' ',trim(name),': After sort ',nlu
end if
call psb_nullify_desc(desc) call psb_nullify_desc(desc)
if (do_timings) then if (do_timings) then
call psb_barrier(ctxt) call psb_barrier(ctxt)

@ -1,3 +1,39 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! File: psb_cdall.f90
!
! Subroutine: psb_cdall
! Allocate descriptor Outer routine
!
subroutine psb_cdall(ctxt, desc, info,mg,ng,parts,& subroutine psb_cdall(ctxt, desc, info,mg,ng,parts,&
& vg,vl,flag,nl,repl,globalcheck,lidx,usehash) & vg,vl,flag,nl,repl,globalcheck,lidx,usehash)
use psb_desc_mod use psb_desc_mod
@ -62,14 +98,15 @@ subroutine psb_cdall(ctxt, desc, info,mg,ng,parts,&
logical :: usehash_ logical :: usehash_
integer(psb_ipk_), allocatable :: itmpv(:) integer(psb_ipk_), allocatable :: itmpv(:)
integer(psb_lpk_), allocatable :: lvl(:) integer(psb_lpk_), allocatable :: lvl(:)
logical, parameter :: timings=.false.
real(psb_dpk_) :: t0, t1
if (psb_get_errstatus() /= 0) return if (psb_get_errstatus() /= 0) return
info=psb_success_ info=psb_success_
name = 'psb_cdall' name = 'psb_cdall'
call psb_erractionsave(err_act) call psb_erractionsave(err_act)
if (timings) t0 = psb_wtime()
call psb_info(ctxt, me, np) call psb_info(ctxt, me, np)
if (count((/ present(vg),present(vl),& if (count((/ present(vg),present(vl),&
& present(parts),present(nl), present(repl) /)) < 1) then & present(parts),present(nl), present(repl) /)) < 1) then
@ -189,7 +226,11 @@ subroutine psb_cdall(ctxt, desc, info,mg,ng,parts,&
endif endif
if (info /= psb_success_) goto 9999 if (info /= psb_success_) goto 9999
if (timings) then
t1 = psb_wtime()
write(0,*) name,' 1st phase:',t1-t0
t0 = psb_wtime()
end if
! Finish off ! Finish off
lr = desc%indxmap%get_lr() lr = desc%indxmap%get_lr()
call psb_realloc(max(1,lr/2),desc%halo_index, info) call psb_realloc(max(1,lr/2),desc%halo_index, info)
@ -203,6 +244,11 @@ subroutine psb_cdall(ctxt, desc, info,mg,ng,parts,&
desc%ext_index(:) = -1 desc%ext_index(:) = -1
call psb_cd_set_bld(desc,info) call psb_cd_set_bld(desc,info)
if (info /= psb_success_) goto 9999 if (info /= psb_success_) goto 9999
if (timings) then
t1 = psb_wtime()
write(0,*) name,' 2nd phase:',t1-t0
t0 = psb_wtime()
end if
9998 continue 9998 continue
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)

@ -151,6 +151,7 @@ subroutine psb_cdalv(v, ctxt, desc, info, flag)
itmpov = 0 itmpov = 0
temp_ovrlap(:) = -1 temp_ovrlap(:) = -1
!$omp parallel do private(i) reduction(+:counter)
do i=1,m do i=1,m
if (((v(i)-flag_) > np-1).or.((v(i)-flag_) < 0)) then if (((v(i)-flag_) > np-1).or.((v(i)-flag_) < 0)) then
@ -158,7 +159,7 @@ subroutine psb_cdalv(v, ctxt, desc, info, flag)
l_err(1)=3 l_err(1)=3
l_err(2)=v(i) - flag_ l_err(2)=v(i) - flag_
l_err(3)=i l_err(3)=i
exit !exit
end if end if
if ((v(i)-flag_) == me) then if ((v(i)-flag_) == me) then

@ -239,7 +239,7 @@ subroutine psb_c_diag_precbld(a,desc_a,prec,info,amold,vmold,imold)
class(psb_c_base_sparse_mat), intent(in), optional :: amold class(psb_c_base_sparse_mat), intent(in), optional :: amold
class(psb_c_base_vect_type), intent(in), optional :: vmold class(psb_c_base_vect_type), intent(in), optional :: vmold
class(psb_i_base_vect_type), intent(in), optional :: imold class(psb_i_base_vect_type), intent(in), optional :: imold
integer(psb_ipk_) :: err_act, nrow,i integer(psb_ipk_) :: err_act, nrow,ncol,i
character(len=20) :: name='c_diag_precbld' character(len=20) :: name='c_diag_precbld'
call psb_erractionsave(err_act) call psb_erractionsave(err_act)
@ -247,6 +247,7 @@ subroutine psb_c_diag_precbld(a,desc_a,prec,info,amold,vmold,imold)
info = psb_success_ info = psb_success_
call prec%set_ctxt(desc_a%get_ctxt()) call prec%set_ctxt(desc_a%get_ctxt())
nrow = desc_a%get_local_rows() nrow = desc_a%get_local_rows()
ncol = desc_a%get_local_cols()
prec%d=a%get_diag(info) prec%d=a%get_diag(info)
if (info /= psb_success_) then if (info /= psb_success_) then
@ -255,7 +256,8 @@ subroutine psb_c_diag_precbld(a,desc_a,prec,info,amold,vmold,imold)
goto 9999 goto 9999
end if end if
call psb_realloc(desc_a%get_local_cols(),prec%d,info,pad=cone) call psb_realloc(ncol,prec%d,info)
!$omp parallel do private(i)
do i=1,nrow do i=1,nrow
if (prec%d(i) == dzero) then if (prec%d(i) == dzero) then
prec%d(i) = cone prec%d(i) = cone
@ -263,6 +265,10 @@ subroutine psb_c_diag_precbld(a,desc_a,prec,info,amold,vmold,imold)
prec%d(i) = cone/prec%d(i) prec%d(i) = cone/prec%d(i)
endif endif
end do end do
!$omp parallel do private(i)
do i=nrow+1,ncol
prec%d(i) = cone
end do
allocate(prec%dv,stat=info) allocate(prec%dv,stat=info)
if (info == 0) then if (info == 0) then

@ -239,7 +239,7 @@ subroutine psb_d_diag_precbld(a,desc_a,prec,info,amold,vmold,imold)
class(psb_d_base_sparse_mat), intent(in), optional :: amold class(psb_d_base_sparse_mat), intent(in), optional :: amold
class(psb_d_base_vect_type), intent(in), optional :: vmold class(psb_d_base_vect_type), intent(in), optional :: vmold
class(psb_i_base_vect_type), intent(in), optional :: imold class(psb_i_base_vect_type), intent(in), optional :: imold
integer(psb_ipk_) :: err_act, nrow,i integer(psb_ipk_) :: err_act, nrow,ncol,i
character(len=20) :: name='d_diag_precbld' character(len=20) :: name='d_diag_precbld'
call psb_erractionsave(err_act) call psb_erractionsave(err_act)
@ -247,6 +247,7 @@ subroutine psb_d_diag_precbld(a,desc_a,prec,info,amold,vmold,imold)
info = psb_success_ info = psb_success_
call prec%set_ctxt(desc_a%get_ctxt()) call prec%set_ctxt(desc_a%get_ctxt())
nrow = desc_a%get_local_rows() nrow = desc_a%get_local_rows()
ncol = desc_a%get_local_cols()
prec%d=a%get_diag(info) prec%d=a%get_diag(info)
if (info /= psb_success_) then if (info /= psb_success_) then
@ -255,7 +256,8 @@ subroutine psb_d_diag_precbld(a,desc_a,prec,info,amold,vmold,imold)
goto 9999 goto 9999
end if end if
call psb_realloc(desc_a%get_local_cols(),prec%d,info,pad=done) call psb_realloc(ncol,prec%d,info)
!$omp parallel do private(i)
do i=1,nrow do i=1,nrow
if (prec%d(i) == dzero) then if (prec%d(i) == dzero) then
prec%d(i) = done prec%d(i) = done
@ -263,6 +265,10 @@ subroutine psb_d_diag_precbld(a,desc_a,prec,info,amold,vmold,imold)
prec%d(i) = done/prec%d(i) prec%d(i) = done/prec%d(i)
endif endif
end do end do
!$omp parallel do private(i)
do i=nrow+1,ncol
prec%d(i) = done
end do
allocate(prec%dv,stat=info) allocate(prec%dv,stat=info)
if (info == 0) then if (info == 0) then

@ -239,7 +239,7 @@ subroutine psb_s_diag_precbld(a,desc_a,prec,info,amold,vmold,imold)
class(psb_s_base_sparse_mat), intent(in), optional :: amold class(psb_s_base_sparse_mat), intent(in), optional :: amold
class(psb_s_base_vect_type), intent(in), optional :: vmold class(psb_s_base_vect_type), intent(in), optional :: vmold
class(psb_i_base_vect_type), intent(in), optional :: imold class(psb_i_base_vect_type), intent(in), optional :: imold
integer(psb_ipk_) :: err_act, nrow,i integer(psb_ipk_) :: err_act, nrow,ncol,i
character(len=20) :: name='s_diag_precbld' character(len=20) :: name='s_diag_precbld'
call psb_erractionsave(err_act) call psb_erractionsave(err_act)
@ -247,6 +247,7 @@ subroutine psb_s_diag_precbld(a,desc_a,prec,info,amold,vmold,imold)
info = psb_success_ info = psb_success_
call prec%set_ctxt(desc_a%get_ctxt()) call prec%set_ctxt(desc_a%get_ctxt())
nrow = desc_a%get_local_rows() nrow = desc_a%get_local_rows()
ncol = desc_a%get_local_cols()
prec%d=a%get_diag(info) prec%d=a%get_diag(info)
if (info /= psb_success_) then if (info /= psb_success_) then
@ -255,7 +256,8 @@ subroutine psb_s_diag_precbld(a,desc_a,prec,info,amold,vmold,imold)
goto 9999 goto 9999
end if end if
call psb_realloc(desc_a%get_local_cols(),prec%d,info,pad=sone) call psb_realloc(ncol,prec%d,info)
!$omp parallel do private(i)
do i=1,nrow do i=1,nrow
if (prec%d(i) == dzero) then if (prec%d(i) == dzero) then
prec%d(i) = sone prec%d(i) = sone
@ -263,6 +265,10 @@ subroutine psb_s_diag_precbld(a,desc_a,prec,info,amold,vmold,imold)
prec%d(i) = sone/prec%d(i) prec%d(i) = sone/prec%d(i)
endif endif
end do end do
!$omp parallel do private(i)
do i=nrow+1,ncol
prec%d(i) = sone
end do
allocate(prec%dv,stat=info) allocate(prec%dv,stat=info)
if (info == 0) then if (info == 0) then

@ -239,7 +239,7 @@ subroutine psb_z_diag_precbld(a,desc_a,prec,info,amold,vmold,imold)
class(psb_z_base_sparse_mat), intent(in), optional :: amold class(psb_z_base_sparse_mat), intent(in), optional :: amold
class(psb_z_base_vect_type), intent(in), optional :: vmold class(psb_z_base_vect_type), intent(in), optional :: vmold
class(psb_i_base_vect_type), intent(in), optional :: imold class(psb_i_base_vect_type), intent(in), optional :: imold
integer(psb_ipk_) :: err_act, nrow,i integer(psb_ipk_) :: err_act, nrow,ncol,i
character(len=20) :: name='z_diag_precbld' character(len=20) :: name='z_diag_precbld'
call psb_erractionsave(err_act) call psb_erractionsave(err_act)
@ -247,6 +247,7 @@ subroutine psb_z_diag_precbld(a,desc_a,prec,info,amold,vmold,imold)
info = psb_success_ info = psb_success_
call prec%set_ctxt(desc_a%get_ctxt()) call prec%set_ctxt(desc_a%get_ctxt())
nrow = desc_a%get_local_rows() nrow = desc_a%get_local_rows()
ncol = desc_a%get_local_cols()
prec%d=a%get_diag(info) prec%d=a%get_diag(info)
if (info /= psb_success_) then if (info /= psb_success_) then
@ -255,7 +256,8 @@ subroutine psb_z_diag_precbld(a,desc_a,prec,info,amold,vmold,imold)
goto 9999 goto 9999
end if end if
call psb_realloc(desc_a%get_local_cols(),prec%d,info,pad=zone) call psb_realloc(ncol,prec%d,info)
!$omp parallel do private(i)
do i=1,nrow do i=1,nrow
if (prec%d(i) == dzero) then if (prec%d(i) == dzero) then
prec%d(i) = zone prec%d(i) = zone
@ -263,6 +265,10 @@ subroutine psb_z_diag_precbld(a,desc_a,prec,info,amold,vmold,imold)
prec%d(i) = zone/prec%d(i) prec%d(i) = zone/prec%d(i)
endif endif
end do end do
!$omp parallel do private(i)
do i=nrow+1,ncol
prec%d(i) = zone
end do
allocate(prec%dv,stat=info) allocate(prec%dv,stat=info)
if (info == 0) then if (info == 0) then

@ -94,7 +94,7 @@ contains
! do k=1,nz ! do k=1,nz
! ijk2idx(i,j,k) = idx ! ijk2idx(i,j,k) = idx
! idx = idx + 1 ! idx = idx + 1
subroutine idx2ijk3d(i,j,k,idx,nx,ny,nz,base) pure subroutine idx2ijk3d(i,j,k,idx,nx,ny,nz,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_ use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none implicit none
integer(psb_mpk_), intent(out) :: i,j,k integer(psb_mpk_), intent(out) :: i,j,k
@ -111,7 +111,7 @@ contains
end subroutine idx2ijk3d end subroutine idx2ijk3d
subroutine idx2ijk2d(i,j,idx,nx,ny,base) pure subroutine idx2ijk2d(i,j,idx,nx,ny,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_ use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none implicit none
integer(psb_mpk_), intent(out) :: i,j integer(psb_mpk_), intent(out) :: i,j
@ -139,7 +139,7 @@ contains
! do k=1,nz ! do k=1,nz
! ijk2idx(i,j,k) = idx ! ijk2idx(i,j,k) = idx
! idx = idx + 1 ! idx = idx + 1
subroutine idx2ijkv(coords,idx,dims,base) pure subroutine idx2ijkv(coords,idx,dims,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_ use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none implicit none
integer(psb_mpk_), intent(out) :: coords(:) integer(psb_mpk_), intent(out) :: coords(:)
@ -156,7 +156,7 @@ contains
idx_ = idx - base_ idx_ = idx - base_
if (size(coords) < size(dims)) then if (size(coords) < size(dims)) then
write(0,*) 'Error: size mismatch ',size(coords),size(dims) !write(0,*) 'Error: size mismatch ',size(coords),size(dims)
coords = 0 coords = 0
return return
end if end if
@ -181,7 +181,7 @@ contains
end subroutine idx2ijkv end subroutine idx2ijkv
subroutine lidx2ijk3d(i,j,k,idx,nx,ny,nz,base) pure subroutine lidx2ijk3d(i,j,k,idx,nx,ny,nz,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_ use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none implicit none
integer(psb_mpk_), intent(out) :: i,j,k integer(psb_mpk_), intent(out) :: i,j,k
@ -199,7 +199,7 @@ contains
end subroutine lidx2ijk3d end subroutine lidx2ijk3d
subroutine lidx2ijk2d(i,j,idx,nx,ny,base) pure subroutine lidx2ijk2d(i,j,idx,nx,ny,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_ use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none implicit none
integer(psb_mpk_), intent(out) :: i,j integer(psb_mpk_), intent(out) :: i,j
@ -228,7 +228,7 @@ contains
! do k=1,nz ! do k=1,nz
! ijk2idx(i,j,k) = idx ! ijk2idx(i,j,k) = idx
! idx = idx + 1 ! idx = idx + 1
subroutine lidx2ijkv(coords,idx,dims,base) pure subroutine lidx2ijkv(coords,idx,dims,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_ use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none implicit none
integer(psb_mpk_), intent(out) :: coords(:) integer(psb_mpk_), intent(out) :: coords(:)
@ -247,7 +247,7 @@ contains
idx_ = idx - base_ idx_ = idx - base_
if (size(coords) < size(dims)) then if (size(coords) < size(dims)) then
write(0,*) 'Error: size mismatch ',size(coords),size(dims) !write(0,*) 'Error: size mismatch ',size(coords),size(dims)
coords = 0 coords = 0
return return
end if end if
@ -272,7 +272,7 @@ contains
end subroutine lidx2ijkv end subroutine lidx2ijkv
subroutine lidx2lijk3d(i,j,k,idx,nx,ny,nz,base) pure subroutine lidx2lijk3d(i,j,k,idx,nx,ny,nz,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_ use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none implicit none
integer(psb_epk_), intent(out) :: i,j,k integer(psb_epk_), intent(out) :: i,j,k
@ -290,7 +290,7 @@ contains
end subroutine lidx2lijk3d end subroutine lidx2lijk3d
subroutine lidx2lijk2d(i,j,idx,nx,ny,base) pure subroutine lidx2lijk2d(i,j,idx,nx,ny,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_ use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none implicit none
integer(psb_epk_), intent(out) :: i,j integer(psb_epk_), intent(out) :: i,j
@ -319,7 +319,7 @@ contains
! do k=1,nz ! do k=1,nz
! ijk2idx(i,j,k) = idx ! ijk2idx(i,j,k) = idx
! idx = idx + 1 ! idx = idx + 1
subroutine lidx2lijkv(coords,idx,dims,base) pure subroutine lidx2lijkv(coords,idx,dims,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_ use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none implicit none
integer(psb_epk_), intent(out) :: coords(:) integer(psb_epk_), intent(out) :: coords(:)
@ -338,7 +338,7 @@ contains
idx_ = idx - base_ idx_ = idx - base_
if (size(coords) < size(dims)) then if (size(coords) < size(dims)) then
write(0,*) 'Error: size mismatch ',size(coords),size(dims) !write(0,*) 'Error: size mismatch ',size(coords),size(dims)
coords = 0 coords = 0
return return
end if end if
@ -374,7 +374,7 @@ contains
! do k=1,nz ! do k=1,nz
! ijk2idx(i,j,k) = idx ! ijk2idx(i,j,k) = idx
! idx = idx + 1 ! idx = idx + 1
subroutine ijk2idxv(idx,coords,dims,base) pure subroutine ijk2idxv(idx,coords,dims,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_ use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none implicit none
integer(psb_mpk_), intent(in) :: coords(:),dims(:) integer(psb_mpk_), intent(in) :: coords(:),dims(:)
@ -389,7 +389,7 @@ contains
end if end if
sz = size(coords) sz = size(coords)
if (sz /= size(dims)) then if (sz /= size(dims)) then
write(0,*) 'Error: size mismatch ',size(coords),size(dims) !write(0,*) 'Error: size mismatch ',size(coords),size(dims)
idx = 0 idx = 0
return return
end if end if
@ -420,7 +420,7 @@ contains
! do k=1,nz ! do k=1,nz
! ijk2idx(i,j,k) = idx ! ijk2idx(i,j,k) = idx
! idx = idx + 1 ! idx = idx + 1
subroutine ijk2idx3d(idx,i,j,k,nx,ny,nz,base) pure subroutine ijk2idx3d(idx,i,j,k,nx,ny,nz,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_ use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none implicit none
integer(psb_mpk_), intent(out) :: idx integer(psb_mpk_), intent(out) :: idx
@ -431,7 +431,7 @@ contains
call ijk2idx(idx,[i,j,k],[nx,ny,nz],base) call ijk2idx(idx,[i,j,k],[nx,ny,nz],base)
end subroutine ijk2idx3d end subroutine ijk2idx3d
subroutine ijk2idx2d(idx,i,j,nx,ny,base) pure subroutine ijk2idx2d(idx,i,j,nx,ny,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_ use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none implicit none
integer(psb_mpk_), intent(out) :: idx integer(psb_mpk_), intent(out) :: idx
@ -442,7 +442,7 @@ contains
call ijk2idx(idx,[i,j],[nx,ny],base) call ijk2idx(idx,[i,j],[nx,ny],base)
end subroutine ijk2idx2d end subroutine ijk2idx2d
subroutine ijk2lidxv(idx,coords,dims,base) pure subroutine ijk2lidxv(idx,coords,dims,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_ use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none implicit none
integer(psb_mpk_), intent(in) :: coords(:),dims(:) integer(psb_mpk_), intent(in) :: coords(:),dims(:)
@ -457,7 +457,7 @@ contains
end if end if
sz = size(coords) sz = size(coords)
if (sz /= size(dims)) then if (sz /= size(dims)) then
write(0,*) 'Error: size mismatch ',size(coords),size(dims) !write(0,*) 'Error: size mismatch ',size(coords),size(dims)
idx = 0 idx = 0
return return
end if end if
@ -489,7 +489,7 @@ contains
! do k=1,nz ! do k=1,nz
! ijk2idx(i,j,k) = idx ! ijk2idx(i,j,k) = idx
! idx = idx + 1 ! idx = idx + 1
subroutine ijk2lidx3d(idx,i,j,k,nx,ny,nz,base) pure subroutine ijk2lidx3d(idx,i,j,k,nx,ny,nz,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_ use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none implicit none
integer(psb_epk_), intent(out) :: idx integer(psb_epk_), intent(out) :: idx
@ -500,7 +500,7 @@ contains
call ijk2idx(idx,[i,j,k],[nx,ny,nz],base) call ijk2idx(idx,[i,j,k],[nx,ny,nz],base)
end subroutine ijk2lidx3d end subroutine ijk2lidx3d
subroutine ijk2lidx2d(idx,i,j,nx,ny,base) pure subroutine ijk2lidx2d(idx,i,j,nx,ny,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_ use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none implicit none
integer(psb_epk_), intent(out) :: idx integer(psb_epk_), intent(out) :: idx
@ -512,7 +512,7 @@ contains
end subroutine ijk2lidx2d end subroutine ijk2lidx2d
subroutine lijk2lidxv(idx,coords,dims,base) pure subroutine lijk2lidxv(idx,coords,dims,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_ use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none implicit none
integer(psb_epk_), intent(in) :: coords(:),dims(:) integer(psb_epk_), intent(in) :: coords(:),dims(:)
@ -527,7 +527,7 @@ contains
end if end if
sz = size(coords) sz = size(coords)
if (sz /= size(dims)) then if (sz /= size(dims)) then
write(0,*) 'Error: size mismatch ',size(coords),size(dims) !write(0,*) 'Error: size mismatch ',size(coords),size(dims)
idx = 0 idx = 0
return return
end if end if
@ -559,7 +559,7 @@ contains
! do k=1,nz ! do k=1,nz
! ijk2idx(i,j,k) = idx ! ijk2idx(i,j,k) = idx
! idx = idx + 1 ! idx = idx + 1
subroutine lijk2lidx3d(idx,i,j,k,nx,ny,nz,base) pure subroutine lijk2lidx3d(idx,i,j,k,nx,ny,nz,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_ use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none implicit none
integer(psb_epk_), intent(out) :: idx integer(psb_epk_), intent(out) :: idx
@ -570,7 +570,7 @@ contains
call ijk2idx(idx,[i,j,k],[nx,ny,nz],base) call ijk2idx(idx,[i,j,k],[nx,ny,nz],base)
end subroutine lijk2lidx3d end subroutine lijk2lidx3d
subroutine lijk2lidx2d(idx,i,j,nx,ny,base) pure subroutine lijk2lidx2d(idx,i,j,nx,ny,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_ use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none implicit none
integer(psb_epk_), intent(out) :: idx integer(psb_epk_), intent(out) :: idx

Loading…
Cancel
Save