Merge branch 'dev-openmp' into development

master
Salvatore Filippone 2 years ago
commit 61d054850d

@ -131,7 +131,7 @@ Contains
! ...Local Variables
complex(psb_spk_),allocatable :: tmp(:)
integer(psb_mpk_) :: dim, lb_, lbi,ub_
integer(psb_mpk_) :: dim, lb_, lbi,ub_, i
integer(psb_ipk_) :: err_act,err
character(len=30) :: name
logical, parameter :: debug=.false.
@ -179,7 +179,10 @@ Contains
end if
endif
if (present(pad)) then
rrax(lb_-1+dim+1:lb_-1+len) = pad
!$omp parallel do private(i) shared(dim,len)
do i=lb_-1+dim+1,lb_-1+len
rrax(i) = pad
end do
endif
call psb_erractionrestore(err_act)
return
@ -204,7 +207,7 @@ Contains
complex(psb_spk_),allocatable :: tmp(:,:)
integer(psb_ipk_) :: err_act,err
integer(psb_mpk_) :: dim,dim2,lb1_, lb2_, ub1_, ub2_,lbi1, lbi2
integer(psb_mpk_) :: dim,dim2,lb1_, lb2_, ub1_, ub2_,lbi1, lbi2, i
character(len=30) :: name
name='psb_r_m_c_rk2'
@ -267,8 +270,14 @@ Contains
end if
endif
if (present(pad)) then
rrax(lb1_-1+dim+1:lb1_-1+len1,:) = pad
rrax(lb1_:lb1_-1+dim,lb2_-1+dim2+1:lb2_-1+len2) = pad
!$omp parallel do private(i) shared(lb1_,dim,len1)
do i=lb1_-1+dim+1,lb1_-1+len1
rrax(i,:) = pad
end do
!$omp parallel do private(i) shared(lb1_,dim,len1,lb2_,dim2,len2)
do i=lb1_,lb1_-1+len1
rrax(i,lb2_-1+dim2+1:lb2_-1+len2) = pad
end do
endif
call psb_erractionrestore(err_act)
return
@ -982,7 +991,48 @@ Contains
goto 9999
end if
!!$ If (len > psb_size(v)) Then
!!$ if (present(newsz)) then
!!$ isz = (max(len+1,newsz))
!!$ else
!!$ if (present(addsz)) then
!!$ isz = len+max(1,addsz)
!!$ else
!!$ isz = max(len+10, int(1.25*len))
!!$ endif
!!$ endif
!!$
!!$ call psb_realloc(isz,v,info,pad=pad)
!!$ if (info /= psb_success_) then
!!$ info=psb_err_from_subroutine_
!!$ call psb_errpush(info,name,a_err='psb_realloc')
!!$ goto 9999
!!$ End If
!!$ end If
If (len > psb_size(v)) Then
#if defined(OPENMP)
!$OMP CRITICAL
if (len > psb_size(v)) then
if (present(newsz)) then
isz = (max(len+1,newsz))
else
if (present(addsz)) then
isz = len+max(1,addsz)
else
isz = max(len+10, int(1.25*len))
endif
endif
call psb_realloc(isz,v,info,pad=pad)
end if
!$OMP END CRITICAL
if (info /= psb_success_) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='psb_realloc')
goto 9999
end if
#else
if (present(newsz)) then
isz = (max(len+1,newsz))
else
@ -999,6 +1049,7 @@ Contains
call psb_errpush(info,name,a_err='psb_realloc')
goto 9999
End If
#endif
end If
call psb_erractionrestore(err_act)

@ -131,7 +131,7 @@ Contains
! ...Local Variables
real(psb_dpk_),allocatable :: tmp(:)
integer(psb_mpk_) :: dim, lb_, lbi,ub_
integer(psb_mpk_) :: dim, lb_, lbi,ub_, i
integer(psb_ipk_) :: err_act,err
character(len=30) :: name
logical, parameter :: debug=.false.
@ -179,7 +179,10 @@ Contains
end if
endif
if (present(pad)) then
rrax(lb_-1+dim+1:lb_-1+len) = pad
!$omp parallel do private(i) shared(dim,len)
do i=lb_-1+dim+1,lb_-1+len
rrax(i) = pad
end do
endif
call psb_erractionrestore(err_act)
return
@ -204,7 +207,7 @@ Contains
real(psb_dpk_),allocatable :: tmp(:,:)
integer(psb_ipk_) :: err_act,err
integer(psb_mpk_) :: dim,dim2,lb1_, lb2_, ub1_, ub2_,lbi1, lbi2
integer(psb_mpk_) :: dim,dim2,lb1_, lb2_, ub1_, ub2_,lbi1, lbi2, i
character(len=30) :: name
name='psb_r_m_d_rk2'
@ -267,8 +270,14 @@ Contains
end if
endif
if (present(pad)) then
rrax(lb1_-1+dim+1:lb1_-1+len1,:) = pad
rrax(lb1_:lb1_-1+dim,lb2_-1+dim2+1:lb2_-1+len2) = pad
!$omp parallel do private(i) shared(lb1_,dim,len1)
do i=lb1_-1+dim+1,lb1_-1+len1
rrax(i,:) = pad
end do
!$omp parallel do private(i) shared(lb1_,dim,len1,lb2_,dim2,len2)
do i=lb1_,lb1_-1+len1
rrax(i,lb2_-1+dim2+1:lb2_-1+len2) = pad
end do
endif
call psb_erractionrestore(err_act)
return
@ -982,7 +991,48 @@ Contains
goto 9999
end if
!!$ If (len > psb_size(v)) Then
!!$ if (present(newsz)) then
!!$ isz = (max(len+1,newsz))
!!$ else
!!$ if (present(addsz)) then
!!$ isz = len+max(1,addsz)
!!$ else
!!$ isz = max(len+10, int(1.25*len))
!!$ endif
!!$ endif
!!$
!!$ call psb_realloc(isz,v,info,pad=pad)
!!$ if (info /= psb_success_) then
!!$ info=psb_err_from_subroutine_
!!$ call psb_errpush(info,name,a_err='psb_realloc')
!!$ goto 9999
!!$ End If
!!$ end If
If (len > psb_size(v)) Then
#if defined(OPENMP)
!$OMP CRITICAL
if (len > psb_size(v)) then
if (present(newsz)) then
isz = (max(len+1,newsz))
else
if (present(addsz)) then
isz = len+max(1,addsz)
else
isz = max(len+10, int(1.25*len))
endif
endif
call psb_realloc(isz,v,info,pad=pad)
end if
!$OMP END CRITICAL
if (info /= psb_success_) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='psb_realloc')
goto 9999
end if
#else
if (present(newsz)) then
isz = (max(len+1,newsz))
else
@ -999,6 +1049,7 @@ Contains
call psb_errpush(info,name,a_err='psb_realloc')
goto 9999
End If
#endif
end If
call psb_erractionrestore(err_act)

@ -131,7 +131,7 @@ Contains
! ...Local Variables
integer(psb_epk_),allocatable :: tmp(:)
integer(psb_mpk_) :: dim, lb_, lbi,ub_
integer(psb_mpk_) :: dim, lb_, lbi,ub_, i
integer(psb_ipk_) :: err_act,err
character(len=30) :: name
logical, parameter :: debug=.false.
@ -179,7 +179,10 @@ Contains
end if
endif
if (present(pad)) then
rrax(lb_-1+dim+1:lb_-1+len) = pad
!$omp parallel do private(i) shared(dim,len)
do i=lb_-1+dim+1,lb_-1+len
rrax(i) = pad
end do
endif
call psb_erractionrestore(err_act)
return
@ -204,7 +207,7 @@ Contains
integer(psb_epk_),allocatable :: tmp(:,:)
integer(psb_ipk_) :: err_act,err
integer(psb_mpk_) :: dim,dim2,lb1_, lb2_, ub1_, ub2_,lbi1, lbi2
integer(psb_mpk_) :: dim,dim2,lb1_, lb2_, ub1_, ub2_,lbi1, lbi2, i
character(len=30) :: name
name='psb_r_m_e_rk2'
@ -267,8 +270,14 @@ Contains
end if
endif
if (present(pad)) then
rrax(lb1_-1+dim+1:lb1_-1+len1,:) = pad
rrax(lb1_:lb1_-1+dim,lb2_-1+dim2+1:lb2_-1+len2) = pad
!$omp parallel do private(i) shared(lb1_,dim,len1)
do i=lb1_-1+dim+1,lb1_-1+len1
rrax(i,:) = pad
end do
!$omp parallel do private(i) shared(lb1_,dim,len1,lb2_,dim2,len2)
do i=lb1_,lb1_-1+len1
rrax(i,lb2_-1+dim2+1:lb2_-1+len2) = pad
end do
endif
call psb_erractionrestore(err_act)
return
@ -982,7 +991,48 @@ Contains
goto 9999
end if
!!$ If (len > psb_size(v)) Then
!!$ if (present(newsz)) then
!!$ isz = (max(len+1,newsz))
!!$ else
!!$ if (present(addsz)) then
!!$ isz = len+max(1,addsz)
!!$ else
!!$ isz = max(len+10, int(1.25*len))
!!$ endif
!!$ endif
!!$
!!$ call psb_realloc(isz,v,info,pad=pad)
!!$ if (info /= psb_success_) then
!!$ info=psb_err_from_subroutine_
!!$ call psb_errpush(info,name,a_err='psb_realloc')
!!$ goto 9999
!!$ End If
!!$ end If
If (len > psb_size(v)) Then
#if defined(OPENMP)
!$OMP CRITICAL
if (len > psb_size(v)) then
if (present(newsz)) then
isz = (max(len+1,newsz))
else
if (present(addsz)) then
isz = len+max(1,addsz)
else
isz = max(len+10, int(1.25*len))
endif
endif
call psb_realloc(isz,v,info,pad=pad)
end if
!$OMP END CRITICAL
if (info /= psb_success_) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='psb_realloc')
goto 9999
end if
#else
if (present(newsz)) then
isz = (max(len+1,newsz))
else
@ -999,6 +1049,7 @@ Contains
call psb_errpush(info,name,a_err='psb_realloc')
goto 9999
End If
#endif
end If
call psb_erractionrestore(err_act)

@ -131,7 +131,7 @@ Contains
! ...Local Variables
integer(psb_i2pk_),allocatable :: tmp(:)
integer(psb_mpk_) :: dim, lb_, lbi,ub_
integer(psb_mpk_) :: dim, lb_, lbi,ub_, i
integer(psb_ipk_) :: err_act,err
character(len=30) :: name
logical, parameter :: debug=.false.
@ -179,7 +179,10 @@ Contains
end if
endif
if (present(pad)) then
rrax(lb_-1+dim+1:lb_-1+len) = pad
!$omp parallel do private(i) shared(dim,len)
do i=lb_-1+dim+1,lb_-1+len
rrax(i) = pad
end do
endif
call psb_erractionrestore(err_act)
return
@ -204,7 +207,7 @@ Contains
integer(psb_i2pk_),allocatable :: tmp(:,:)
integer(psb_ipk_) :: err_act,err
integer(psb_mpk_) :: dim,dim2,lb1_, lb2_, ub1_, ub2_,lbi1, lbi2
integer(psb_mpk_) :: dim,dim2,lb1_, lb2_, ub1_, ub2_,lbi1, lbi2, i
character(len=30) :: name
name='psb_r_m_i2_rk2'
@ -267,8 +270,14 @@ Contains
end if
endif
if (present(pad)) then
rrax(lb1_-1+dim+1:lb1_-1+len1,:) = pad
rrax(lb1_:lb1_-1+dim,lb2_-1+dim2+1:lb2_-1+len2) = pad
!$omp parallel do private(i) shared(lb1_,dim,len1)
do i=lb1_-1+dim+1,lb1_-1+len1
rrax(i,:) = pad
end do
!$omp parallel do private(i) shared(lb1_,dim,len1,lb2_,dim2,len2)
do i=lb1_,lb1_-1+len1
rrax(i,lb2_-1+dim2+1:lb2_-1+len2) = pad
end do
endif
call psb_erractionrestore(err_act)
return
@ -982,7 +991,48 @@ Contains
goto 9999
end if
!!$ If (len > psb_size(v)) Then
!!$ if (present(newsz)) then
!!$ isz = (max(len+1,newsz))
!!$ else
!!$ if (present(addsz)) then
!!$ isz = len+max(1,addsz)
!!$ else
!!$ isz = max(len+10, int(1.25*len))
!!$ endif
!!$ endif
!!$
!!$ call psb_realloc(isz,v,info,pad=pad)
!!$ if (info /= psb_success_) then
!!$ info=psb_err_from_subroutine_
!!$ call psb_errpush(info,name,a_err='psb_realloc')
!!$ goto 9999
!!$ End If
!!$ end If
If (len > psb_size(v)) Then
#if defined(OPENMP)
!$OMP CRITICAL
if (len > psb_size(v)) then
if (present(newsz)) then
isz = (max(len+1,newsz))
else
if (present(addsz)) then
isz = len+max(1,addsz)
else
isz = max(len+10, int(1.25*len))
endif
endif
call psb_realloc(isz,v,info,pad=pad)
end if
!$OMP END CRITICAL
if (info /= psb_success_) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='psb_realloc')
goto 9999
end if
#else
if (present(newsz)) then
isz = (max(len+1,newsz))
else
@ -999,6 +1049,7 @@ Contains
call psb_errpush(info,name,a_err='psb_realloc')
goto 9999
End If
#endif
end If
call psb_erractionrestore(err_act)

@ -131,7 +131,7 @@ Contains
! ...Local Variables
integer(psb_mpk_),allocatable :: tmp(:)
integer(psb_mpk_) :: dim, lb_, lbi,ub_
integer(psb_mpk_) :: dim, lb_, lbi,ub_, i
integer(psb_ipk_) :: err_act,err
character(len=30) :: name
logical, parameter :: debug=.false.
@ -179,7 +179,10 @@ Contains
end if
endif
if (present(pad)) then
rrax(lb_-1+dim+1:lb_-1+len) = pad
!$omp parallel do private(i) shared(dim,len)
do i=lb_-1+dim+1,lb_-1+len
rrax(i) = pad
end do
endif
call psb_erractionrestore(err_act)
return
@ -204,7 +207,7 @@ Contains
integer(psb_mpk_),allocatable :: tmp(:,:)
integer(psb_ipk_) :: err_act,err
integer(psb_mpk_) :: dim,dim2,lb1_, lb2_, ub1_, ub2_,lbi1, lbi2
integer(psb_mpk_) :: dim,dim2,lb1_, lb2_, ub1_, ub2_,lbi1, lbi2, i
character(len=30) :: name
name='psb_r_m_m_rk2'
@ -267,8 +270,14 @@ Contains
end if
endif
if (present(pad)) then
rrax(lb1_-1+dim+1:lb1_-1+len1,:) = pad
rrax(lb1_:lb1_-1+dim,lb2_-1+dim2+1:lb2_-1+len2) = pad
!$omp parallel do private(i) shared(lb1_,dim,len1)
do i=lb1_-1+dim+1,lb1_-1+len1
rrax(i,:) = pad
end do
!$omp parallel do private(i) shared(lb1_,dim,len1,lb2_,dim2,len2)
do i=lb1_,lb1_-1+len1
rrax(i,lb2_-1+dim2+1:lb2_-1+len2) = pad
end do
endif
call psb_erractionrestore(err_act)
return
@ -982,7 +991,48 @@ Contains
goto 9999
end if
!!$ If (len > psb_size(v)) Then
!!$ if (present(newsz)) then
!!$ isz = (max(len+1,newsz))
!!$ else
!!$ if (present(addsz)) then
!!$ isz = len+max(1,addsz)
!!$ else
!!$ isz = max(len+10, int(1.25*len))
!!$ endif
!!$ endif
!!$
!!$ call psb_realloc(isz,v,info,pad=pad)
!!$ if (info /= psb_success_) then
!!$ info=psb_err_from_subroutine_
!!$ call psb_errpush(info,name,a_err='psb_realloc')
!!$ goto 9999
!!$ End If
!!$ end If
If (len > psb_size(v)) Then
#if defined(OPENMP)
!$OMP CRITICAL
if (len > psb_size(v)) then
if (present(newsz)) then
isz = (max(len+1,newsz))
else
if (present(addsz)) then
isz = len+max(1,addsz)
else
isz = max(len+10, int(1.25*len))
endif
endif
call psb_realloc(isz,v,info,pad=pad)
end if
!$OMP END CRITICAL
if (info /= psb_success_) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='psb_realloc')
goto 9999
end if
#else
if (present(newsz)) then
isz = (max(len+1,newsz))
else
@ -999,6 +1049,7 @@ Contains
call psb_errpush(info,name,a_err='psb_realloc')
goto 9999
End If
#endif
end If
call psb_erractionrestore(err_act)

@ -131,7 +131,7 @@ Contains
! ...Local Variables
real(psb_spk_),allocatable :: tmp(:)
integer(psb_mpk_) :: dim, lb_, lbi,ub_
integer(psb_mpk_) :: dim, lb_, lbi,ub_, i
integer(psb_ipk_) :: err_act,err
character(len=30) :: name
logical, parameter :: debug=.false.
@ -179,7 +179,10 @@ Contains
end if
endif
if (present(pad)) then
rrax(lb_-1+dim+1:lb_-1+len) = pad
!$omp parallel do private(i) shared(dim,len)
do i=lb_-1+dim+1,lb_-1+len
rrax(i) = pad
end do
endif
call psb_erractionrestore(err_act)
return
@ -204,7 +207,7 @@ Contains
real(psb_spk_),allocatable :: tmp(:,:)
integer(psb_ipk_) :: err_act,err
integer(psb_mpk_) :: dim,dim2,lb1_, lb2_, ub1_, ub2_,lbi1, lbi2
integer(psb_mpk_) :: dim,dim2,lb1_, lb2_, ub1_, ub2_,lbi1, lbi2, i
character(len=30) :: name
name='psb_r_m_s_rk2'
@ -267,8 +270,14 @@ Contains
end if
endif
if (present(pad)) then
rrax(lb1_-1+dim+1:lb1_-1+len1,:) = pad
rrax(lb1_:lb1_-1+dim,lb2_-1+dim2+1:lb2_-1+len2) = pad
!$omp parallel do private(i) shared(lb1_,dim,len1)
do i=lb1_-1+dim+1,lb1_-1+len1
rrax(i,:) = pad
end do
!$omp parallel do private(i) shared(lb1_,dim,len1,lb2_,dim2,len2)
do i=lb1_,lb1_-1+len1
rrax(i,lb2_-1+dim2+1:lb2_-1+len2) = pad
end do
endif
call psb_erractionrestore(err_act)
return
@ -982,7 +991,48 @@ Contains
goto 9999
end if
!!$ If (len > psb_size(v)) Then
!!$ if (present(newsz)) then
!!$ isz = (max(len+1,newsz))
!!$ else
!!$ if (present(addsz)) then
!!$ isz = len+max(1,addsz)
!!$ else
!!$ isz = max(len+10, int(1.25*len))
!!$ endif
!!$ endif
!!$
!!$ call psb_realloc(isz,v,info,pad=pad)
!!$ if (info /= psb_success_) then
!!$ info=psb_err_from_subroutine_
!!$ call psb_errpush(info,name,a_err='psb_realloc')
!!$ goto 9999
!!$ End If
!!$ end If
If (len > psb_size(v)) Then
#if defined(OPENMP)
!$OMP CRITICAL
if (len > psb_size(v)) then
if (present(newsz)) then
isz = (max(len+1,newsz))
else
if (present(addsz)) then
isz = len+max(1,addsz)
else
isz = max(len+10, int(1.25*len))
endif
endif
call psb_realloc(isz,v,info,pad=pad)
end if
!$OMP END CRITICAL
if (info /= psb_success_) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='psb_realloc')
goto 9999
end if
#else
if (present(newsz)) then
isz = (max(len+1,newsz))
else
@ -999,6 +1049,7 @@ Contains
call psb_errpush(info,name,a_err='psb_realloc')
goto 9999
End If
#endif
end If
call psb_erractionrestore(err_act)

@ -131,7 +131,7 @@ Contains
! ...Local Variables
complex(psb_dpk_),allocatable :: tmp(:)
integer(psb_mpk_) :: dim, lb_, lbi,ub_
integer(psb_mpk_) :: dim, lb_, lbi,ub_, i
integer(psb_ipk_) :: err_act,err
character(len=30) :: name
logical, parameter :: debug=.false.
@ -179,7 +179,10 @@ Contains
end if
endif
if (present(pad)) then
rrax(lb_-1+dim+1:lb_-1+len) = pad
!$omp parallel do private(i) shared(dim,len)
do i=lb_-1+dim+1,lb_-1+len
rrax(i) = pad
end do
endif
call psb_erractionrestore(err_act)
return
@ -204,7 +207,7 @@ Contains
complex(psb_dpk_),allocatable :: tmp(:,:)
integer(psb_ipk_) :: err_act,err
integer(psb_mpk_) :: dim,dim2,lb1_, lb2_, ub1_, ub2_,lbi1, lbi2
integer(psb_mpk_) :: dim,dim2,lb1_, lb2_, ub1_, ub2_,lbi1, lbi2, i
character(len=30) :: name
name='psb_r_m_z_rk2'
@ -267,8 +270,14 @@ Contains
end if
endif
if (present(pad)) then
rrax(lb1_-1+dim+1:lb1_-1+len1,:) = pad
rrax(lb1_:lb1_-1+dim,lb2_-1+dim2+1:lb2_-1+len2) = pad
!$omp parallel do private(i) shared(lb1_,dim,len1)
do i=lb1_-1+dim+1,lb1_-1+len1
rrax(i,:) = pad
end do
!$omp parallel do private(i) shared(lb1_,dim,len1,lb2_,dim2,len2)
do i=lb1_,lb1_-1+len1
rrax(i,lb2_-1+dim2+1:lb2_-1+len2) = pad
end do
endif
call psb_erractionrestore(err_act)
return
@ -982,7 +991,48 @@ Contains
goto 9999
end if
!!$ If (len > psb_size(v)) Then
!!$ if (present(newsz)) then
!!$ isz = (max(len+1,newsz))
!!$ else
!!$ if (present(addsz)) then
!!$ isz = len+max(1,addsz)
!!$ else
!!$ isz = max(len+10, int(1.25*len))
!!$ endif
!!$ endif
!!$
!!$ call psb_realloc(isz,v,info,pad=pad)
!!$ if (info /= psb_success_) then
!!$ info=psb_err_from_subroutine_
!!$ call psb_errpush(info,name,a_err='psb_realloc')
!!$ goto 9999
!!$ End If
!!$ end If
If (len > psb_size(v)) Then
#if defined(OPENMP)
!$OMP CRITICAL
if (len > psb_size(v)) then
if (present(newsz)) then
isz = (max(len+1,newsz))
else
if (present(addsz)) then
isz = len+max(1,addsz)
else
isz = max(len+10, int(1.25*len))
endif
endif
call psb_realloc(isz,v,info,pad=pad)
end if
!$OMP END CRITICAL
if (info /= psb_success_) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='psb_realloc')
goto 9999
end if
#else
if (present(newsz)) then
isz = (max(len+1,newsz))
else
@ -999,6 +1049,7 @@ Contains
call psb_errpush(info,name,a_err='psb_realloc')
goto 9999
End If
#endif
end If
call psb_erractionrestore(err_act)

@ -629,13 +629,17 @@ contains
end subroutine hash_g2ls2_ins
! #################### THESIS ####################
subroutine hash_g2lv1_ins(idx,idxmap,info,mask,lidx)
use psb_error_mod
use psb_realloc_mod
use psb_sort_mod
use psb_penv_mod
!$ use omp_lib
implicit none
class(psb_hash_map), intent(inout) :: idxmap
integer(psb_lpk_), intent(inout) :: idx(:)
integer(psb_ipk_), intent(out) :: info
@ -648,6 +652,12 @@ contains
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: me, np
character(len=20) :: name,ch_err
logical :: use_openmp = .false.
logical, volatile :: isLoopValid
!$ integer(kind = OMP_lock_kind) :: ins_lck
!$ use_openmp = .true.
info = psb_success_
name = 'hash_g2l_ins'
@ -664,6 +674,7 @@ contains
return
end if
end if
if (present(lidx)) then
if (size(lidx) < size(idx)) then
info = -1
@ -671,37 +682,277 @@ contains
end if
end if
mglob = idxmap%get_gr()
nrow = idxmap%get_lr()
if (idxmap%is_bld()) then
if (use_openmp) then
!$ call OMP_init_lock(ins_lck)
isLoopValid = .true.
ncol = idxmap%get_lc()
end if
if (present(lidx)) then
if (present(mask)) then
do i = 1, is
ncol = idxmap%get_lc()
if (mask(i)) then
if (use_openmp) then
!$OMP PARALLEL DO default(none) schedule(STATIC) &
!$OMP shared(name,is,mask,lidx,idx,mglob,idxmap,ncol,nrow,ins_lck,laddsz) &
!$OMP private(i,ip,lip,tlip,nxt,info) &
!$OMP reduction(.AND.:isLoopValid)
do i = 1, is
if (mask(i)) then
ip = idx(i)
if ((ip < 1 ).or.(ip>mglob)) then
idx(i) = -1
cycle
endif
! At first, we check the index presence in 'idxmap'. Usually
! the index is found. If it is not found, we repeat the checking,
! but inside a critical region.
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
if (lip < 0) then
tlip = lip
nxt = lidx(i)
if (nxt <= nrow) then
idx(i) = -1
cycle
endif
! We check again if the index is already in 'idxmap', this
! time inside a critical region (we assume that the index
! is often already existing).
!$ call OMP_set_lock(ins_lck)
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
! Index not found
if (lip < 0) then
! Locking system to handle concurrent hashmap read/write.
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info)
if (info >= 0) then
! 'nxt' is not equal to 'tlip' when the key is already inside
! the hash map. In that case 'tlip' is the value corresponding
! to the existing mapping.
if (nxt == tlip) then
ncol = MAX(ncol,nxt)
!$ call OMP_unset_lock(ins_lck)
call psb_ensure_size(ncol,idxmap%loc_to_glob,info,&
& pad=-1_psb_lpk_,addsz=laddsz)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_ai_,name,&
&a_err='psb_ensure_size',i_err=(/info/))
isLoopValid = .false.
cycle
end if
idxmap%loc_to_glob(nxt) = ip
else
!$ call OMP_unset_lock(ins_lck)
end if
info = psb_success_
else
!$ call OMP_unset_lock(ins_lck)
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='SearchInsKeyVal',i_err=(/info/))
isLoopValid = .false.
cycle
end if
else
!$ call OMP_unset_lock(ins_lck)
end if
end if
idx(i) = lip
info = psb_success_
else
idx(i) = -1
end if
end do
!$OMP END PARALLEL DO
call idxmap%set_lc(ncol)
if (.not. isLoopValid) then
goto 9999
end if
else
do i = 1, is
ncol = idxmap%get_lc()
if (mask(i)) then
ip = idx(i)
if ((ip < 1 ).or.(ip>mglob) ) then
idx(i) = -1
cycle
endif
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
if (lip < 0) then
tlip = lip
nxt = lidx(i)
if (nxt <= nrow) then
idx(i) = -1
cycle
endif
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info)
if (info >=0) then
if (nxt == tlip) then
ncol = max(ncol,nxt)
call psb_ensure_size(ncol,idxmap%loc_to_glob,info,&
& pad=-1_psb_lpk_,addsz=laddsz)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_ai_,name,&
&a_err='psb_ensure_size',i_err=(/info/))
goto 9999
end if
idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(ncol)
endif
info = psb_success_
else
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='SearchInsKeyVal',i_err=(/info/))
goto 9999
end if
end if
idx(i) = lip
info = psb_success_
else
idx(i) = -1
end if
enddo
end if
else if (.not.present(mask)) then
if (use_openmp) then
!$OMP PARALLEL DO default(none) schedule(STATIC) &
!$OMP shared(name,is,idx,lidx,mglob,idxmap,ncol,nrow,ins_lck,laddsz) &
!$OMP private(i,ip,lip,tlip,nxt,info) &
!$OMP reduction(.AND.:isLoopValid)
do i = 1, is
ip = idx(i)
if ((ip < 1 ).or.(ip>mglob) ) then
if ((ip < 1 ).or.(ip>mglob)) then
idx(i) = -1
cycle
endif
! In OMP logic the index research limit is turned off. It is
! a correct way to fit the subroutine?
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
if (lip < 0) then
tlip = lip
nxt = lidx(i)
if (nxt <= nrow) then
idx(i) = -1
cycle
endif
! We check again if the index is already in 'idxmap', this
! time inside a critical region (we assume that the index
! is often already existing).
!$ call OMP_set_lock(ins_lck)
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
if (lip < 0) then
! Locking system to handle concurrent write/access. Under checking!
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info)
!$ call OMP_unset_lock(ins_lck)
if (info >= 0) then
! 'nxt' is not equal to 'tlip' when the key is already inside
! the hash map. In that case 'tlip' is the value corresponding
! to the existing mapping.
if (nxt == tlip) then
ncol = MAX(ncol,nxt)
!$ call OMP_unset_lock(ins_lck)
! Under checking!
call psb_ensure_size(ncol,idxmap%loc_to_glob,info,&
& pad=-1_psb_lpk_,addsz=laddsz)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_ai_,name,&
&a_err='psb_ensure_size',i_err=(/info/))
isLoopValid = .false.
cycle
end if
idxmap%loc_to_glob(nxt) = ip
else
!$ call OMP_unset_lock(ins_lck)
end if
info = psb_success_
else
!$ call OMP_unset_lock(ins_lck)
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='SearchInsKeyVal',i_err=(/info/))
isLoopValid = .false.
cycle
end if
else
!$ call OMP_unset_lock(ins_lck)
end if
end if
idx(i) = lip
info = psb_success_
end do
!$OMP END PARALLEL DO
call idxmap%set_lc(ncol)
if (.not. isLoopValid) then
goto 9999
end if
else
do i = 1, is
ncol = idxmap%get_lc()
ip = idx(i)
if ((ip < 1 ).or.(ip>mglob)) then
idx(i) = -1
cycle
endif
call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,&
& idxmap%glb_lc,ncol)
if (lip < 0) then
tlip = lip
nxt = lidx(i)
if (nxt <= nrow) then
idx(i) = -1
cycle
endif
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info)
lip = tlip
if (info >=0) then
if (nxt == tlip) then
ncol = max(ncol,nxt)
call psb_ensure_size(ncol,idxmap%loc_to_glob,info,&
& pad=-1_psb_lpk_,addsz=laddsz)
if (nxt == lip) then
ncol = max(nxt,ncol)
call psb_ensure_size(ncol,idxmap%loc_to_glob,info,pad=-1_psb_lpk_,addsz=laddsz)
if (info /= psb_success_) then
info=1
call psb_errpush(psb_err_from_subroutine_ai_,name,&
&a_err='psb_ensure_size',i_err=(/info/))
goto 9999
@ -718,64 +969,238 @@ contains
end if
idx(i) = lip
info = psb_success_
else
idx(i) = -1
enddo
end if
end if
else if (.not.present(lidx)) then
if (present(mask)) then
if (use_openmp) then
!$OMP PARALLEL DO default(none) schedule(STATIC) &
!$OMP shared(name,is,idx,mask,mglob,idxmap,ncol,nrow,ins_lck,laddsz) &
!$OMP private(i,ip,lip,tlip,nxt,info) &
!$OMP reduction(.AND.:isLoopValid)
do i = 1, is
if (mask(i)) then
ip = idx(i)
if ((ip < 1 ).or.(ip>mglob)) then
idx(i) = -1
cycle
endif
! At first, we check the index presence in 'idxmap'. Usually
! the index is found. If it is not found, we repeat the checking,
! but inside a critical region.
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
if (lip < 0) then
! We check again if the index is already in 'idxmap', this
! time inside a critical region (we assume that the index
! is often already existing).
!$ call OMP_set_lock(ins_lck)
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
! Index not found
if (lip < 0) then
! Locking system to handle concurrent hashmap write/access.
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info)
lip = tlip
else
!$ call OMP_unset_lock(ins_lck)
end if
idx(i) = lip
info = psb_success_
end if
if (info >= 0) then
! 'nxt' is not equal to 'tlip' when the key is already inside
! the hash map. In that case 'tlip' is the value corresponding
! to the existing mapping.
if (nxt == tlip) then
ncol = MAX(ncol,nxt)
!$ call OMP_unset_lock(ins_lck)
call psb_ensure_size(ncol,idxmap%loc_to_glob,info,&
& pad=-1_psb_lpk_,addsz=laddsz)
if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_ai_,name,&
&a_err='psb_ensure_size',i_err=(/info/))
isLoopValid = .false.
cycle
end if
idxmap%loc_to_glob(nxt) = ip
else
!$ call OMP_unset_lock(ins_lck)
end if
info = psb_success_
else
!$ call OMP_unset_lock(ins_lck)
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='SearchInsKeyVal',i_err=(/info/))
isLoopValid = .false.
cycle
end if
else
idx(i) = -1
end if
end do
!$OMP END PARALLEL DO
call idxmap%set_lc(ncol)
if (.not. isLoopValid) then
goto 9999
end if
enddo
else
do i = 1, is
ncol = idxmap%get_lc()
if (mask(i)) then
ip = idx(i)
if ((ip < 1 ).or.(ip>mglob)) then
idx(i) = -1
cycle
endif
nxt = ncol + 1
call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,&
& idxmap%glb_lc,ncol)
if (lip < 0) then
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info)
lip = tlip
end if
if (info >=0) then
if (nxt == lip) then
ncol = nxt
call psb_ensure_size(ncol,idxmap%loc_to_glob,info,&
& pad=-1_psb_lpk_,addsz=laddsz)
if (info /= psb_success_) then
info=1
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='psb_ensure_size',i_err=(/info/))
goto 9999
end if
idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(ncol)
endif
info = psb_success_
else
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='SearchInsKeyVal',i_err=(/info/))
goto 9999
end if
idx(i) = lip
info = psb_success_
else
idx(i) = -1
end if
enddo
end if
else if (.not.present(mask)) then
if (use_openmp) then
!$OMP PARALLEL DO default(none) schedule(STATIC) &
!$OMP shared(name,is,idx,mglob,idxmap,ncol,nrow,ins_lck,laddsz) &
!$OMP private(i,ip,lip,tlip,nxt,info) &
!$OMP reduction(.AND.:isLoopValid)
do i = 1, is
do i = 1, is
ncol = idxmap%get_lc()
ip = idx(i)
if ((ip < 1 ).or.(ip>mglob)) then
idx(i) = -1
cycle
endif
call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,&
& idxmap%glb_lc,ncol)
if (lip < 0) then
nxt = lidx(i)
if (nxt <= nrow) then
ip = idx(i)
if ((ip < 1 ).or.(ip>mglob)) then
idx(i) = -1
cycle
endif
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info)
lip = tlip
if (info >=0) then
if (nxt == lip) then
ncol = max(nxt,ncol)
! At first, we check the index presence in 'idxmap'. Usually
! the index is found. If it is not found, we repeat the checking,
! but inside a critical region.
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
if (lip < 0) then
! We check again if the index is already in 'idxmap', this
! time inside a critical region (we assume that the index
! is often already existing).
!$ call OMP_set_lock(ins_lck)
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
! Index not found
if (lip < 0) then
! Locking system to handle concurrent hashmap write/access.
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info)
lip = tlip
else
!$ call OMP_unset_lock(ins_lck)
end if
idx(i) = lip
info = psb_success_
end if
if (info >= 0) then
! 'nxt' is not equal to 'tlip' when the key is already inside
! the hash map. In that case 'tlip' is the value corresponding
! to the existing mapping.
if (nxt == tlip) then
ncol = MAX(ncol,nxt)
!$ call OMP_unset_lock(ins_lck)
call psb_ensure_size(ncol,idxmap%loc_to_glob,info,&
& pad=-1_psb_lpk_,addsz=laddsz)
if (info /= psb_success_) then
info=1
call psb_errpush(psb_err_from_subroutine_ai_,name,&
&a_err='psb_ensure_size',i_err=(/info/))
goto 9999
&a_err='psb_ensure_size',i_err=(/info/))
!$ isLoopValid = .false.
cycle
end if
idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(ncol)
endif
idxmap%loc_to_glob(nxt) = ip
else
!$ call OMP_unset_lock(ins_lck)
end if
info = psb_success_
else
!$ call OMP_unset_lock(ins_lck)
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='SearchInsKeyVal',i_err=(/info/))
goto 9999
isLoopValid = .false.
cycle
end if
end if
idx(i) = lip
info = psb_success_
enddo
end do
!$OMP END PARALLEL DO
end if
call idxmap%set_lc(ncol)
else if (.not.present(lidx)) then
if (.not. isLoopValid) then
goto 9999
end if
if (present(mask)) then
do i = 1, is
ncol = idxmap%get_lc()
if (mask(i)) then
else
do i = 1, is
ncol = idxmap%get_lc()
ip = idx(i)
if ((ip < 1 ).or.(ip>mglob)) then
idx(i) = -1
@ -796,8 +1221,9 @@ contains
& pad=-1_psb_lpk_,addsz=laddsz)
if (info /= psb_success_) then
info=1
ch_err='psb_ensure_size'
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='psb_ensure_size',i_err=(/info/))
&a_err=ch_err,i_err=(/info,izero,izero,izero,izero/))
goto 9999
end if
idxmap%loc_to_glob(nxt) = ip
@ -805,68 +1231,28 @@ contains
endif
info = psb_success_
else
ch_err='SearchInsKeyVal'
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='SearchInsKeyVal',i_err=(/info/))
& a_err=ch_err,i_err=(/info,izero,izero,izero,izero/))
goto 9999
end if
idx(i) = lip
info = psb_success_
else
idx(i) = -1
end if
enddo
else if (.not.present(mask)) then
do i = 1, is
ncol = idxmap%get_lc()
ip = idx(i)
if ((ip < 1 ).or.(ip>mglob)) then
idx(i) = -1
cycle
endif
nxt = ncol + 1
call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,&
& idxmap%glb_lc,ncol)
if (lip < 0) then
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info)
lip = tlip
end if
if (info >=0) then
if (nxt == lip) then
ncol = nxt
call psb_ensure_size(ncol,idxmap%loc_to_glob,info,&
& pad=-1_psb_lpk_,addsz=laddsz)
if (info /= psb_success_) then
info=1
ch_err='psb_ensure_size'
call psb_errpush(psb_err_from_subroutine_ai_,name,&
&a_err=ch_err,i_err=(/info,izero,izero,izero,izero/))
goto 9999
end if
idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(ncol)
endif
info = psb_success_
else
ch_err='SearchInsKeyVal'
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err=ch_err,i_err=(/info,izero,izero,izero,izero/))
goto 9999
end if
idx(i) = lip
info = psb_success_
enddo
enddo
end if
end if
end if
if (use_openmp) then
!$ call OMP_destroy_lock(ins_lck)
end if
else
! Wrong state
idx = -1
info = -1
end if
call psb_erractionrestore(err_act)
return
@ -876,6 +1262,8 @@ contains
end subroutine hash_g2lv1_ins
! ################## END THESIS #########################
subroutine hash_g2lv2_ins(idxin,idxout,idxmap,info,mask,lidx)
use psb_realloc_mod
implicit none

@ -1866,6 +1866,30 @@ module psb_c_base_mat_mod
end subroutine psb_c_fix_coo_inner
end interface
interface
subroutine psb_c_fix_coo_inner_colmajor(nr,nc,nzin,dupl,&
& ia,ja,val,iaux,nzout,info)
import
integer(psb_ipk_), intent(in) :: nr,nc,nzin,dupl
integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:)
complex(psb_spk_), intent(inout) :: val(:)
integer(psb_ipk_), intent(out) :: nzout
integer(psb_ipk_), intent(out) :: info
end subroutine psb_c_fix_coo_inner_colmajor
end interface
interface
subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,&
& ia,ja,val,iaux,nzout,info)
import
integer(psb_ipk_), intent(in) :: nr,nc,nzin,dupl
integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:)
complex(psb_spk_), intent(inout) :: val(:)
integer(psb_ipk_), intent(out) :: nzout
integer(psb_ipk_), intent(out) :: info
end subroutine psb_c_fix_coo_inner_rowmajor
end interface
!
!> Function fix_coo
!! \memberof psb_c_coo_sparse_mat

@ -1208,7 +1208,7 @@ contains
if (beta == cone) then
return
else
!$omp parallel do private(i)
!$omp parallel do private(i) shared(beta)
do i=1, n
z%v(i) = beta*z%v(i)
end do
@ -1226,7 +1226,7 @@ contains
z%v(i) = z%v(i) + y(i)*x(i)
end do
else
!$omp parallel do private(i)
!$omp parallel do private(i) shared(beta)
do i=1, n
z%v(i) = beta*z%v(i) + y(i)*x(i)
end do
@ -1243,24 +1243,24 @@ contains
z%v(i) = z%v(i) - y(i)*x(i)
end do
else
!$omp parallel do private(i)
!$omp parallel do private(i) shared(beta)
do i=1, n
z%v(i) = beta*z%v(i) - y(i)*x(i)
end do
end if
else
if (beta == czero) then
!$omp parallel do private(i)
!$omp parallel do private(i) shared(alpha)
do i=1, n
z%v(i) = alpha*y(i)*x(i)
end do
else if (beta == cone) then
!$omp parallel do private(i)
!$omp parallel do private(i) shared(alpha)
do i=1, n
z%v(i) = z%v(i) + alpha*y(i)*x(i)
end do
else
!$omp parallel do private(i)
!$omp parallel do private(i) shared(alpha, beta)
do i=1, n
z%v(i) = beta*z%v(i) + alpha*y(i)*x(i)
end do

@ -1866,6 +1866,30 @@ module psb_d_base_mat_mod
end subroutine psb_d_fix_coo_inner
end interface
interface
subroutine psb_d_fix_coo_inner_colmajor(nr,nc,nzin,dupl,&
& ia,ja,val,iaux,nzout,info)
import
integer(psb_ipk_), intent(in) :: nr,nc,nzin,dupl
integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:)
real(psb_dpk_), intent(inout) :: val(:)
integer(psb_ipk_), intent(out) :: nzout
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_fix_coo_inner_colmajor
end interface
interface
subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,&
& ia,ja,val,iaux,nzout,info)
import
integer(psb_ipk_), intent(in) :: nr,nc,nzin,dupl
integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:)
real(psb_dpk_), intent(inout) :: val(:)
integer(psb_ipk_), intent(out) :: nzout
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_fix_coo_inner_rowmajor
end interface
!
!> Function fix_coo
!! \memberof psb_d_coo_sparse_mat

@ -1215,7 +1215,7 @@ contains
if (beta == done) then
return
else
!$omp parallel do private(i)
!$omp parallel do private(i) shared(beta)
do i=1, n
z%v(i) = beta*z%v(i)
end do
@ -1233,7 +1233,7 @@ contains
z%v(i) = z%v(i) + y(i)*x(i)
end do
else
!$omp parallel do private(i)
!$omp parallel do private(i) shared(beta)
do i=1, n
z%v(i) = beta*z%v(i) + y(i)*x(i)
end do
@ -1250,24 +1250,24 @@ contains
z%v(i) = z%v(i) - y(i)*x(i)
end do
else
!$omp parallel do private(i)
!$omp parallel do private(i) shared(beta)
do i=1, n
z%v(i) = beta*z%v(i) - y(i)*x(i)
end do
end if
else
if (beta == dzero) then
!$omp parallel do private(i)
!$omp parallel do private(i) shared(alpha)
do i=1, n
z%v(i) = alpha*y(i)*x(i)
end do
else if (beta == done) then
!$omp parallel do private(i)
!$omp parallel do private(i) shared(alpha)
do i=1, n
z%v(i) = z%v(i) + alpha*y(i)*x(i)
end do
else
!$omp parallel do private(i)
!$omp parallel do private(i) shared(alpha, beta)
do i=1, n
z%v(i) = beta*z%v(i) + alpha*y(i)*x(i)
end do

@ -1866,6 +1866,30 @@ module psb_s_base_mat_mod
end subroutine psb_s_fix_coo_inner
end interface
interface
subroutine psb_s_fix_coo_inner_colmajor(nr,nc,nzin,dupl,&
& ia,ja,val,iaux,nzout,info)
import
integer(psb_ipk_), intent(in) :: nr,nc,nzin,dupl
integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:)
real(psb_spk_), intent(inout) :: val(:)
integer(psb_ipk_), intent(out) :: nzout
integer(psb_ipk_), intent(out) :: info
end subroutine psb_s_fix_coo_inner_colmajor
end interface
interface
subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,&
& ia,ja,val,iaux,nzout,info)
import
integer(psb_ipk_), intent(in) :: nr,nc,nzin,dupl
integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:)
real(psb_spk_), intent(inout) :: val(:)
integer(psb_ipk_), intent(out) :: nzout
integer(psb_ipk_), intent(out) :: info
end subroutine psb_s_fix_coo_inner_rowmajor
end interface
!
!> Function fix_coo
!! \memberof psb_s_coo_sparse_mat

@ -1215,7 +1215,7 @@ contains
if (beta == sone) then
return
else
!$omp parallel do private(i)
!$omp parallel do private(i) shared(beta)
do i=1, n
z%v(i) = beta*z%v(i)
end do
@ -1233,7 +1233,7 @@ contains
z%v(i) = z%v(i) + y(i)*x(i)
end do
else
!$omp parallel do private(i)
!$omp parallel do private(i) shared(beta)
do i=1, n
z%v(i) = beta*z%v(i) + y(i)*x(i)
end do
@ -1250,24 +1250,24 @@ contains
z%v(i) = z%v(i) - y(i)*x(i)
end do
else
!$omp parallel do private(i)
!$omp parallel do private(i) shared(beta)
do i=1, n
z%v(i) = beta*z%v(i) - y(i)*x(i)
end do
end if
else
if (beta == szero) then
!$omp parallel do private(i)
!$omp parallel do private(i) shared(alpha)
do i=1, n
z%v(i) = alpha*y(i)*x(i)
end do
else if (beta == sone) then
!$omp parallel do private(i)
!$omp parallel do private(i) shared(alpha)
do i=1, n
z%v(i) = z%v(i) + alpha*y(i)*x(i)
end do
else
!$omp parallel do private(i)
!$omp parallel do private(i) shared(alpha, beta)
do i=1, n
z%v(i) = beta*z%v(i) + alpha*y(i)*x(i)
end do

@ -1866,6 +1866,30 @@ module psb_z_base_mat_mod
end subroutine psb_z_fix_coo_inner
end interface
interface
subroutine psb_z_fix_coo_inner_colmajor(nr,nc,nzin,dupl,&
& ia,ja,val,iaux,nzout,info)
import
integer(psb_ipk_), intent(in) :: nr,nc,nzin,dupl
integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:)
complex(psb_dpk_), intent(inout) :: val(:)
integer(psb_ipk_), intent(out) :: nzout
integer(psb_ipk_), intent(out) :: info
end subroutine psb_z_fix_coo_inner_colmajor
end interface
interface
subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,&
& ia,ja,val,iaux,nzout,info)
import
integer(psb_ipk_), intent(in) :: nr,nc,nzin,dupl
integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:)
complex(psb_dpk_), intent(inout) :: val(:)
integer(psb_ipk_), intent(out) :: nzout
integer(psb_ipk_), intent(out) :: info
end subroutine psb_z_fix_coo_inner_rowmajor
end interface
!
!> Function fix_coo
!! \memberof psb_z_coo_sparse_mat

@ -1208,7 +1208,7 @@ contains
if (beta == zone) then
return
else
!$omp parallel do private(i)
!$omp parallel do private(i) shared(beta)
do i=1, n
z%v(i) = beta*z%v(i)
end do
@ -1226,7 +1226,7 @@ contains
z%v(i) = z%v(i) + y(i)*x(i)
end do
else
!$omp parallel do private(i)
!$omp parallel do private(i) shared(beta)
do i=1, n
z%v(i) = beta*z%v(i) + y(i)*x(i)
end do
@ -1243,24 +1243,24 @@ contains
z%v(i) = z%v(i) - y(i)*x(i)
end do
else
!$omp parallel do private(i)
!$omp parallel do private(i) shared(beta)
do i=1, n
z%v(i) = beta*z%v(i) - y(i)*x(i)
end do
end if
else
if (beta == zzero) then
!$omp parallel do private(i)
!$omp parallel do private(i) shared(alpha)
do i=1, n
z%v(i) = alpha*y(i)*x(i)
end do
else if (beta == zone) then
!$omp parallel do private(i)
!$omp parallel do private(i) shared(alpha)
do i=1, n
z%v(i) = z%v(i) + alpha*y(i)*x(i)
end do
else
!$omp parallel do private(i)
!$omp parallel do private(i) shared(alpha, beta)
do i=1, n
z%v(i) = beta*z%v(i) + alpha*y(i)*x(i)
end do

File diff suppressed because it is too large Load Diff

@ -1328,8 +1328,8 @@ function psb_c_csr_csnmi(a) result(res)
res = szero
if (a%is_dev()) call a%sync()
!$omp parallel do private(i,acc) reduction(max: res)
do i = 1, a%get_nrows()
!$omp parallel do private(i,j,acc) reduction(max: res)
do i = 1, a%get_nrows()
acc = szero
do j=a%irp(i),a%irp(i+1)-1
acc = acc + abs(a%val(j))
@ -1562,8 +1562,12 @@ subroutine psb_c_csr_get_diag(a,d,info)
if (a%is_unit()) then
d(1:mnm) = cone
!$omp parallel do private(i)
do i=1, mnm
d(i) = cone
end do
else
!$omp parallel do private(i,j,k)
do i=1, mnm
d(i) = czero
do k=a%irp(i),a%irp(i+1)-1
@ -1574,6 +1578,7 @@ subroutine psb_c_csr_get_diag(a,d,info)
enddo
end do
end if
!$omp parallel do private(i)
do i=mnm+1,size(d)
d(i) = czero
end do
@ -1629,6 +1634,7 @@ subroutine psb_c_csr_scal(d,a,info,side)
goto 9999
end if
!$omp parallel do private(i,j)
do i=1, m
do j = a%irp(i), a%irp(i+1) -1
a%val(j) = a%val(j) * d(i)
@ -1643,6 +1649,7 @@ subroutine psb_c_csr_scal(d,a,info,side)
goto 9999
end if
!$omp parallel do private(i,j)
do i=1,a%get_nzeros()
j = a%ja(i)
a%val(i) = a%val(i) * d(j)
@ -1681,6 +1688,7 @@ subroutine psb_c_csr_scals(d,a,info)
call a%make_nonunit()
end if
!$omp parallel do private(i)
do i=1,a%get_nzeros()
a%val(i) = a%val(i) * d
enddo
@ -2851,6 +2859,13 @@ subroutine psb_c_cp_csr_from_coo(a,b,info)
integer(psb_ipk_), Parameter :: maxtry=8
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='c_cp_csr_from_coo'
logical :: use_openmp = .false.
!$ integer(psb_ipk_), allocatable :: sum(:)
!$ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j
!$ integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads
!$ use_openmp = .true.
info = psb_success_
debug_unit = psb_get_debug_unit()
@ -2871,7 +2886,7 @@ subroutine psb_c_cp_csr_from_coo(a,b,info)
call move_alloc(tmp%ia,itemp)
call move_alloc(tmp%ja,a%ja)
call move_alloc(tmp%val,a%val)
call psb_realloc(nr+1,a%irp,info)
call psb_realloc(max(nr+1,nc+1),a%irp,info)
call tmp%free()
else
@ -2889,22 +2904,97 @@ subroutine psb_c_cp_csr_from_coo(a,b,info)
call psb_safe_ab_cpy(b%ia,itemp,info)
if (info == psb_success_) call psb_safe_ab_cpy(b%ja,a%ja,info)
if (info == psb_success_) call psb_safe_ab_cpy(b%val,a%val,info)
if (info == psb_success_) call psb_realloc(nr+1,a%irp,info)
if (info == psb_success_) call psb_realloc(max(nr+1,nc+1),a%irp,info)
endif
a%irp(:) = 0
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
!!$ if (use_openmp) then
!!$ !$ maxthreads = omp_get_max_threads()
!!$ !$ allocate(sum(maxthreads+1))
!!$ !$ sum(:) = 0
!!$ !$ sum(1) = 1
!!$
!!$ !$OMP PARALLEL default(none) &
!!$ !$OMP shared(nza,itemp,a,nthreads,sum,nr) &
!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
!!$
!!$ !$OMP DO schedule(STATIC) &
!!$ !$OMP private(k,i)
!!$ do k=1,nza
!!$ i = itemp(k)
!!$ a%irp(i) = a%irp(i) + 1
!!$ end do
!!$ !$OMP END DO
!!$
!!$ !$OMP SINGLE
!!$ !$ nthreads = omp_get_num_threads()
!!$ !$OMP END SINGLE
!!$
!!$ !$ ithread = omp_get_thread_num()
!!$
!!$ !$ work = nr/nthreads
!!$ !$ if (ithread < MOD(nr,nthreads)) then
!!$ !$ work = work + 1
!!$ !$ first_idx = ithread*work + 1
!!$ !$ else
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
!!$ !$ end if
!!$
!!$ !$ last_idx = first_idx + work - 1
!!$
!!$ !$ s = 0
!!$ !$ do i=first_idx,last_idx
!!$ !$ s = s + a%irp(i)
!!$ !$ end do
!!$ !$ if (work > 0) then
!!$ !$ sum(ithread+2) = s
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$OMP SINGLE
!!$ !$ do i=2,nthreads+1
!!$ !$ sum(i) = sum(i) + sum(i-1)
!!$ !$ end do
!!$ !$OMP END SINGLE
!!$
!!$ !$ if (work > 0) then
!!$ !$ saved_elem = a%irp(first_idx)
!!$ !$ end if
!!$ !$ if (ithread == 0) then
!!$ !$ a%irp(1) = 1
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$ if (work > 0) then
!!$ !$ old_val = a%irp(first_idx+1)
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!!$ !$ end if
!!$
!!$ !$ do i=first_idx+2,last_idx+1
!!$ !$ nxt_val = a%irp(i)
!!$ !$ a%irp(i) = a%irp(i-1) + old_val
!!$ !$ old_val = nxt_val
!!$ !$ end do
!!$
!!$ !$OMP END PARALLEL
!!$ else
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
!!$ end if
call a%set_host()
@ -3020,6 +3110,13 @@ subroutine psb_c_mv_csr_from_coo(a,b,info)
integer(psb_ipk_), Parameter :: maxtry=8
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='mv_from_coo'
logical :: use_openmp = .false.
! $ integer(psb_ipk_), allocatable :: sum(:)
! $ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s
! $ integer(psb_ipk_) :: nxt_val,old_val,saved_elem
! $ use_openmp = .true.
info = psb_success_
debug_unit = psb_get_debug_unit()
@ -3040,22 +3137,93 @@ subroutine psb_c_mv_csr_from_coo(a,b,info)
call move_alloc(b%ia,itemp)
call move_alloc(b%ja,a%ja)
call move_alloc(b%val,a%val)
call psb_realloc(nr+1,a%irp,info)
call psb_realloc(max(nr+1,nc+1),a%irp,info)
call b%free()
a%irp(:) = 0
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
!!$ if (use_openmp) then
!!$ !$OMP PARALLEL default(none) &
!!$ !$OMP shared(sum,nthreads,nr,a,itemp,nza) &
!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
!!$
!!$ !$OMP DO schedule(STATIC) &
!!$ !$OMP private(k,i)
!!$ do k=1,nza
!!$ i = itemp(k)
!!$ a%irp(i) = a%irp(i) + 1
!!$ end do
!!$ !$OMP END DO
!!$
!!$ !$OMP SINGLE
!!$ !$ nthreads = omp_get_num_threads()
!!$ !$ allocate(sum(nthreads+1))
!!$ !$ sum(:) = 0
!!$ !$ sum(1) = 1
!!$ !$OMP END SINGLE
!!$
!!$ !$ ithread = omp_get_thread_num()
!!$
!!$ !$ work = nr/nthreads
!!$ !$ if (ithread < MOD(nr,nthreads)) then
!!$ !$ work = work + 1
!!$ !$ first_idx = ithread*work + 1
!!$ !$ else
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
!!$ !$ end if
!!$
!!$ !$ last_idx = first_idx + work - 1
!!$
!!$ !$ s = 0
!!$ !$ do i=first_idx,last_idx
!!$ !$ s = s + a%irp(i)
!!$ !$ end do
!!$ !$ if (work > 0) then
!!$ !$ sum(ithread+2) = s
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$OMP SINGLE
!!$ !$ do i=2,nthreads+1
!!$ !$ sum(i) = sum(i) + sum(i-1)
!!$ !$ end do
!!$ !$OMP END SINGLE
!!$
!!$ !$ if (work > 0) then
!!$ !$ saved_elem = a%irp(first_idx)
!!$ !$ end if
!!$ !$ if (ithread == 0) then
!!$ !$ a%irp(1) = 1
!!$ !$ end if
!!$
!!$ !$ if (work > 0) then
!!$ !$ old_val = a%irp(first_idx+1)
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!!$ !$ end if
!!$
!!$ !$ do i=first_idx+2,last_idx+1
!!$ !$ nxt_val = a%irp(i)
!!$ !$ a%irp(i) = a%irp(i-1) + old_val
!!$ !$ old_val = nxt_val
!!$ !$ end do
!!$
!!$ !$OMP END PARALLEL
!!$ else
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
!!$ end if
call a%set_host()
end subroutine psb_c_mv_csr_from_coo

File diff suppressed because it is too large Load Diff

@ -1328,8 +1328,8 @@ function psb_d_csr_csnmi(a) result(res)
res = dzero
if (a%is_dev()) call a%sync()
!$omp parallel do private(i,acc) reduction(max: res)
do i = 1, a%get_nrows()
!$omp parallel do private(i,j,acc) reduction(max: res)
do i = 1, a%get_nrows()
acc = dzero
do j=a%irp(i),a%irp(i+1)-1
acc = acc + abs(a%val(j))
@ -1562,8 +1562,12 @@ subroutine psb_d_csr_get_diag(a,d,info)
if (a%is_unit()) then
d(1:mnm) = done
!$omp parallel do private(i)
do i=1, mnm
d(i) = done
end do
else
!$omp parallel do private(i,j,k)
do i=1, mnm
d(i) = dzero
do k=a%irp(i),a%irp(i+1)-1
@ -1574,6 +1578,7 @@ subroutine psb_d_csr_get_diag(a,d,info)
enddo
end do
end if
!$omp parallel do private(i)
do i=mnm+1,size(d)
d(i) = dzero
end do
@ -1629,6 +1634,7 @@ subroutine psb_d_csr_scal(d,a,info,side)
goto 9999
end if
!$omp parallel do private(i,j)
do i=1, m
do j = a%irp(i), a%irp(i+1) -1
a%val(j) = a%val(j) * d(i)
@ -1643,6 +1649,7 @@ subroutine psb_d_csr_scal(d,a,info,side)
goto 9999
end if
!$omp parallel do private(i,j)
do i=1,a%get_nzeros()
j = a%ja(i)
a%val(i) = a%val(i) * d(j)
@ -1681,6 +1688,7 @@ subroutine psb_d_csr_scals(d,a,info)
call a%make_nonunit()
end if
!$omp parallel do private(i)
do i=1,a%get_nzeros()
a%val(i) = a%val(i) * d
enddo
@ -2851,6 +2859,13 @@ subroutine psb_d_cp_csr_from_coo(a,b,info)
integer(psb_ipk_), Parameter :: maxtry=8
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='d_cp_csr_from_coo'
logical :: use_openmp = .false.
!$ integer(psb_ipk_), allocatable :: sum(:)
!$ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j
!$ integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads
!$ use_openmp = .true.
info = psb_success_
debug_unit = psb_get_debug_unit()
@ -2871,7 +2886,7 @@ subroutine psb_d_cp_csr_from_coo(a,b,info)
call move_alloc(tmp%ia,itemp)
call move_alloc(tmp%ja,a%ja)
call move_alloc(tmp%val,a%val)
call psb_realloc(nr+1,a%irp,info)
call psb_realloc(max(nr+1,nc+1),a%irp,info)
call tmp%free()
else
@ -2889,22 +2904,97 @@ subroutine psb_d_cp_csr_from_coo(a,b,info)
call psb_safe_ab_cpy(b%ia,itemp,info)
if (info == psb_success_) call psb_safe_ab_cpy(b%ja,a%ja,info)
if (info == psb_success_) call psb_safe_ab_cpy(b%val,a%val,info)
if (info == psb_success_) call psb_realloc(nr+1,a%irp,info)
if (info == psb_success_) call psb_realloc(max(nr+1,nc+1),a%irp,info)
endif
a%irp(:) = 0
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
!!$ if (use_openmp) then
!!$ !$ maxthreads = omp_get_max_threads()
!!$ !$ allocate(sum(maxthreads+1))
!!$ !$ sum(:) = 0
!!$ !$ sum(1) = 1
!!$
!!$ !$OMP PARALLEL default(none) &
!!$ !$OMP shared(nza,itemp,a,nthreads,sum,nr) &
!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
!!$
!!$ !$OMP DO schedule(STATIC) &
!!$ !$OMP private(k,i)
!!$ do k=1,nza
!!$ i = itemp(k)
!!$ a%irp(i) = a%irp(i) + 1
!!$ end do
!!$ !$OMP END DO
!!$
!!$ !$OMP SINGLE
!!$ !$ nthreads = omp_get_num_threads()
!!$ !$OMP END SINGLE
!!$
!!$ !$ ithread = omp_get_thread_num()
!!$
!!$ !$ work = nr/nthreads
!!$ !$ if (ithread < MOD(nr,nthreads)) then
!!$ !$ work = work + 1
!!$ !$ first_idx = ithread*work + 1
!!$ !$ else
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
!!$ !$ end if
!!$
!!$ !$ last_idx = first_idx + work - 1
!!$
!!$ !$ s = 0
!!$ !$ do i=first_idx,last_idx
!!$ !$ s = s + a%irp(i)
!!$ !$ end do
!!$ !$ if (work > 0) then
!!$ !$ sum(ithread+2) = s
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$OMP SINGLE
!!$ !$ do i=2,nthreads+1
!!$ !$ sum(i) = sum(i) + sum(i-1)
!!$ !$ end do
!!$ !$OMP END SINGLE
!!$
!!$ !$ if (work > 0) then
!!$ !$ saved_elem = a%irp(first_idx)
!!$ !$ end if
!!$ !$ if (ithread == 0) then
!!$ !$ a%irp(1) = 1
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$ if (work > 0) then
!!$ !$ old_val = a%irp(first_idx+1)
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!!$ !$ end if
!!$
!!$ !$ do i=first_idx+2,last_idx+1
!!$ !$ nxt_val = a%irp(i)
!!$ !$ a%irp(i) = a%irp(i-1) + old_val
!!$ !$ old_val = nxt_val
!!$ !$ end do
!!$
!!$ !$OMP END PARALLEL
!!$ else
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
!!$ end if
call a%set_host()
@ -3020,6 +3110,13 @@ subroutine psb_d_mv_csr_from_coo(a,b,info)
integer(psb_ipk_), Parameter :: maxtry=8
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='mv_from_coo'
logical :: use_openmp = .false.
! $ integer(psb_ipk_), allocatable :: sum(:)
! $ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s
! $ integer(psb_ipk_) :: nxt_val,old_val,saved_elem
! $ use_openmp = .true.
info = psb_success_
debug_unit = psb_get_debug_unit()
@ -3040,22 +3137,93 @@ subroutine psb_d_mv_csr_from_coo(a,b,info)
call move_alloc(b%ia,itemp)
call move_alloc(b%ja,a%ja)
call move_alloc(b%val,a%val)
call psb_realloc(nr+1,a%irp,info)
call psb_realloc(max(nr+1,nc+1),a%irp,info)
call b%free()
a%irp(:) = 0
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
!!$ if (use_openmp) then
!!$ !$OMP PARALLEL default(none) &
!!$ !$OMP shared(sum,nthreads,nr,a,itemp,nza) &
!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
!!$
!!$ !$OMP DO schedule(STATIC) &
!!$ !$OMP private(k,i)
!!$ do k=1,nza
!!$ i = itemp(k)
!!$ a%irp(i) = a%irp(i) + 1
!!$ end do
!!$ !$OMP END DO
!!$
!!$ !$OMP SINGLE
!!$ !$ nthreads = omp_get_num_threads()
!!$ !$ allocate(sum(nthreads+1))
!!$ !$ sum(:) = 0
!!$ !$ sum(1) = 1
!!$ !$OMP END SINGLE
!!$
!!$ !$ ithread = omp_get_thread_num()
!!$
!!$ !$ work = nr/nthreads
!!$ !$ if (ithread < MOD(nr,nthreads)) then
!!$ !$ work = work + 1
!!$ !$ first_idx = ithread*work + 1
!!$ !$ else
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
!!$ !$ end if
!!$
!!$ !$ last_idx = first_idx + work - 1
!!$
!!$ !$ s = 0
!!$ !$ do i=first_idx,last_idx
!!$ !$ s = s + a%irp(i)
!!$ !$ end do
!!$ !$ if (work > 0) then
!!$ !$ sum(ithread+2) = s
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$OMP SINGLE
!!$ !$ do i=2,nthreads+1
!!$ !$ sum(i) = sum(i) + sum(i-1)
!!$ !$ end do
!!$ !$OMP END SINGLE
!!$
!!$ !$ if (work > 0) then
!!$ !$ saved_elem = a%irp(first_idx)
!!$ !$ end if
!!$ !$ if (ithread == 0) then
!!$ !$ a%irp(1) = 1
!!$ !$ end if
!!$
!!$ !$ if (work > 0) then
!!$ !$ old_val = a%irp(first_idx+1)
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!!$ !$ end if
!!$
!!$ !$ do i=first_idx+2,last_idx+1
!!$ !$ nxt_val = a%irp(i)
!!$ !$ a%irp(i) = a%irp(i-1) + old_val
!!$ !$ old_val = nxt_val
!!$ !$ end do
!!$
!!$ !$OMP END PARALLEL
!!$ else
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
!!$ end if
call a%set_host()
end subroutine psb_d_mv_csr_from_coo

File diff suppressed because it is too large Load Diff

@ -1328,8 +1328,8 @@ function psb_s_csr_csnmi(a) result(res)
res = szero
if (a%is_dev()) call a%sync()
!$omp parallel do private(i,acc) reduction(max: res)
do i = 1, a%get_nrows()
!$omp parallel do private(i,j,acc) reduction(max: res)
do i = 1, a%get_nrows()
acc = szero
do j=a%irp(i),a%irp(i+1)-1
acc = acc + abs(a%val(j))
@ -1562,8 +1562,12 @@ subroutine psb_s_csr_get_diag(a,d,info)
if (a%is_unit()) then
d(1:mnm) = sone
!$omp parallel do private(i)
do i=1, mnm
d(i) = sone
end do
else
!$omp parallel do private(i,j,k)
do i=1, mnm
d(i) = szero
do k=a%irp(i),a%irp(i+1)-1
@ -1574,6 +1578,7 @@ subroutine psb_s_csr_get_diag(a,d,info)
enddo
end do
end if
!$omp parallel do private(i)
do i=mnm+1,size(d)
d(i) = szero
end do
@ -1629,6 +1634,7 @@ subroutine psb_s_csr_scal(d,a,info,side)
goto 9999
end if
!$omp parallel do private(i,j)
do i=1, m
do j = a%irp(i), a%irp(i+1) -1
a%val(j) = a%val(j) * d(i)
@ -1643,6 +1649,7 @@ subroutine psb_s_csr_scal(d,a,info,side)
goto 9999
end if
!$omp parallel do private(i,j)
do i=1,a%get_nzeros()
j = a%ja(i)
a%val(i) = a%val(i) * d(j)
@ -1681,6 +1688,7 @@ subroutine psb_s_csr_scals(d,a,info)
call a%make_nonunit()
end if
!$omp parallel do private(i)
do i=1,a%get_nzeros()
a%val(i) = a%val(i) * d
enddo
@ -2851,6 +2859,13 @@ subroutine psb_s_cp_csr_from_coo(a,b,info)
integer(psb_ipk_), Parameter :: maxtry=8
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='s_cp_csr_from_coo'
logical :: use_openmp = .false.
!$ integer(psb_ipk_), allocatable :: sum(:)
!$ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j
!$ integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads
!$ use_openmp = .true.
info = psb_success_
debug_unit = psb_get_debug_unit()
@ -2871,7 +2886,7 @@ subroutine psb_s_cp_csr_from_coo(a,b,info)
call move_alloc(tmp%ia,itemp)
call move_alloc(tmp%ja,a%ja)
call move_alloc(tmp%val,a%val)
call psb_realloc(nr+1,a%irp,info)
call psb_realloc(max(nr+1,nc+1),a%irp,info)
call tmp%free()
else
@ -2889,22 +2904,97 @@ subroutine psb_s_cp_csr_from_coo(a,b,info)
call psb_safe_ab_cpy(b%ia,itemp,info)
if (info == psb_success_) call psb_safe_ab_cpy(b%ja,a%ja,info)
if (info == psb_success_) call psb_safe_ab_cpy(b%val,a%val,info)
if (info == psb_success_) call psb_realloc(nr+1,a%irp,info)
if (info == psb_success_) call psb_realloc(max(nr+1,nc+1),a%irp,info)
endif
a%irp(:) = 0
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
!!$ if (use_openmp) then
!!$ !$ maxthreads = omp_get_max_threads()
!!$ !$ allocate(sum(maxthreads+1))
!!$ !$ sum(:) = 0
!!$ !$ sum(1) = 1
!!$
!!$ !$OMP PARALLEL default(none) &
!!$ !$OMP shared(nza,itemp,a,nthreads,sum,nr) &
!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
!!$
!!$ !$OMP DO schedule(STATIC) &
!!$ !$OMP private(k,i)
!!$ do k=1,nza
!!$ i = itemp(k)
!!$ a%irp(i) = a%irp(i) + 1
!!$ end do
!!$ !$OMP END DO
!!$
!!$ !$OMP SINGLE
!!$ !$ nthreads = omp_get_num_threads()
!!$ !$OMP END SINGLE
!!$
!!$ !$ ithread = omp_get_thread_num()
!!$
!!$ !$ work = nr/nthreads
!!$ !$ if (ithread < MOD(nr,nthreads)) then
!!$ !$ work = work + 1
!!$ !$ first_idx = ithread*work + 1
!!$ !$ else
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
!!$ !$ end if
!!$
!!$ !$ last_idx = first_idx + work - 1
!!$
!!$ !$ s = 0
!!$ !$ do i=first_idx,last_idx
!!$ !$ s = s + a%irp(i)
!!$ !$ end do
!!$ !$ if (work > 0) then
!!$ !$ sum(ithread+2) = s
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$OMP SINGLE
!!$ !$ do i=2,nthreads+1
!!$ !$ sum(i) = sum(i) + sum(i-1)
!!$ !$ end do
!!$ !$OMP END SINGLE
!!$
!!$ !$ if (work > 0) then
!!$ !$ saved_elem = a%irp(first_idx)
!!$ !$ end if
!!$ !$ if (ithread == 0) then
!!$ !$ a%irp(1) = 1
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$ if (work > 0) then
!!$ !$ old_val = a%irp(first_idx+1)
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!!$ !$ end if
!!$
!!$ !$ do i=first_idx+2,last_idx+1
!!$ !$ nxt_val = a%irp(i)
!!$ !$ a%irp(i) = a%irp(i-1) + old_val
!!$ !$ old_val = nxt_val
!!$ !$ end do
!!$
!!$ !$OMP END PARALLEL
!!$ else
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
!!$ end if
call a%set_host()
@ -3020,6 +3110,13 @@ subroutine psb_s_mv_csr_from_coo(a,b,info)
integer(psb_ipk_), Parameter :: maxtry=8
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='mv_from_coo'
logical :: use_openmp = .false.
! $ integer(psb_ipk_), allocatable :: sum(:)
! $ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s
! $ integer(psb_ipk_) :: nxt_val,old_val,saved_elem
! $ use_openmp = .true.
info = psb_success_
debug_unit = psb_get_debug_unit()
@ -3040,22 +3137,93 @@ subroutine psb_s_mv_csr_from_coo(a,b,info)
call move_alloc(b%ia,itemp)
call move_alloc(b%ja,a%ja)
call move_alloc(b%val,a%val)
call psb_realloc(nr+1,a%irp,info)
call psb_realloc(max(nr+1,nc+1),a%irp,info)
call b%free()
a%irp(:) = 0
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
!!$ if (use_openmp) then
!!$ !$OMP PARALLEL default(none) &
!!$ !$OMP shared(sum,nthreads,nr,a,itemp,nza) &
!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
!!$
!!$ !$OMP DO schedule(STATIC) &
!!$ !$OMP private(k,i)
!!$ do k=1,nza
!!$ i = itemp(k)
!!$ a%irp(i) = a%irp(i) + 1
!!$ end do
!!$ !$OMP END DO
!!$
!!$ !$OMP SINGLE
!!$ !$ nthreads = omp_get_num_threads()
!!$ !$ allocate(sum(nthreads+1))
!!$ !$ sum(:) = 0
!!$ !$ sum(1) = 1
!!$ !$OMP END SINGLE
!!$
!!$ !$ ithread = omp_get_thread_num()
!!$
!!$ !$ work = nr/nthreads
!!$ !$ if (ithread < MOD(nr,nthreads)) then
!!$ !$ work = work + 1
!!$ !$ first_idx = ithread*work + 1
!!$ !$ else
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
!!$ !$ end if
!!$
!!$ !$ last_idx = first_idx + work - 1
!!$
!!$ !$ s = 0
!!$ !$ do i=first_idx,last_idx
!!$ !$ s = s + a%irp(i)
!!$ !$ end do
!!$ !$ if (work > 0) then
!!$ !$ sum(ithread+2) = s
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$OMP SINGLE
!!$ !$ do i=2,nthreads+1
!!$ !$ sum(i) = sum(i) + sum(i-1)
!!$ !$ end do
!!$ !$OMP END SINGLE
!!$
!!$ !$ if (work > 0) then
!!$ !$ saved_elem = a%irp(first_idx)
!!$ !$ end if
!!$ !$ if (ithread == 0) then
!!$ !$ a%irp(1) = 1
!!$ !$ end if
!!$
!!$ !$ if (work > 0) then
!!$ !$ old_val = a%irp(first_idx+1)
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!!$ !$ end if
!!$
!!$ !$ do i=first_idx+2,last_idx+1
!!$ !$ nxt_val = a%irp(i)
!!$ !$ a%irp(i) = a%irp(i-1) + old_val
!!$ !$ old_val = nxt_val
!!$ !$ end do
!!$
!!$ !$OMP END PARALLEL
!!$ else
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
!!$ end if
call a%set_host()
end subroutine psb_s_mv_csr_from_coo

File diff suppressed because it is too large Load Diff

@ -1328,8 +1328,8 @@ function psb_z_csr_csnmi(a) result(res)
res = dzero
if (a%is_dev()) call a%sync()
!$omp parallel do private(i,acc) reduction(max: res)
do i = 1, a%get_nrows()
!$omp parallel do private(i,j,acc) reduction(max: res)
do i = 1, a%get_nrows()
acc = dzero
do j=a%irp(i),a%irp(i+1)-1
acc = acc + abs(a%val(j))
@ -1562,8 +1562,12 @@ subroutine psb_z_csr_get_diag(a,d,info)
if (a%is_unit()) then
d(1:mnm) = zone
!$omp parallel do private(i)
do i=1, mnm
d(i) = zone
end do
else
!$omp parallel do private(i,j,k)
do i=1, mnm
d(i) = zzero
do k=a%irp(i),a%irp(i+1)-1
@ -1574,6 +1578,7 @@ subroutine psb_z_csr_get_diag(a,d,info)
enddo
end do
end if
!$omp parallel do private(i)
do i=mnm+1,size(d)
d(i) = zzero
end do
@ -1629,6 +1634,7 @@ subroutine psb_z_csr_scal(d,a,info,side)
goto 9999
end if
!$omp parallel do private(i,j)
do i=1, m
do j = a%irp(i), a%irp(i+1) -1
a%val(j) = a%val(j) * d(i)
@ -1643,6 +1649,7 @@ subroutine psb_z_csr_scal(d,a,info,side)
goto 9999
end if
!$omp parallel do private(i,j)
do i=1,a%get_nzeros()
j = a%ja(i)
a%val(i) = a%val(i) * d(j)
@ -1681,6 +1688,7 @@ subroutine psb_z_csr_scals(d,a,info)
call a%make_nonunit()
end if
!$omp parallel do private(i)
do i=1,a%get_nzeros()
a%val(i) = a%val(i) * d
enddo
@ -2851,6 +2859,13 @@ subroutine psb_z_cp_csr_from_coo(a,b,info)
integer(psb_ipk_), Parameter :: maxtry=8
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='z_cp_csr_from_coo'
logical :: use_openmp = .false.
!$ integer(psb_ipk_), allocatable :: sum(:)
!$ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j
!$ integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads
!$ use_openmp = .true.
info = psb_success_
debug_unit = psb_get_debug_unit()
@ -2871,7 +2886,7 @@ subroutine psb_z_cp_csr_from_coo(a,b,info)
call move_alloc(tmp%ia,itemp)
call move_alloc(tmp%ja,a%ja)
call move_alloc(tmp%val,a%val)
call psb_realloc(nr+1,a%irp,info)
call psb_realloc(max(nr+1,nc+1),a%irp,info)
call tmp%free()
else
@ -2889,22 +2904,97 @@ subroutine psb_z_cp_csr_from_coo(a,b,info)
call psb_safe_ab_cpy(b%ia,itemp,info)
if (info == psb_success_) call psb_safe_ab_cpy(b%ja,a%ja,info)
if (info == psb_success_) call psb_safe_ab_cpy(b%val,a%val,info)
if (info == psb_success_) call psb_realloc(nr+1,a%irp,info)
if (info == psb_success_) call psb_realloc(max(nr+1,nc+1),a%irp,info)
endif
a%irp(:) = 0
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
!!$ if (use_openmp) then
!!$ !$ maxthreads = omp_get_max_threads()
!!$ !$ allocate(sum(maxthreads+1))
!!$ !$ sum(:) = 0
!!$ !$ sum(1) = 1
!!$
!!$ !$OMP PARALLEL default(none) &
!!$ !$OMP shared(nza,itemp,a,nthreads,sum,nr) &
!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
!!$
!!$ !$OMP DO schedule(STATIC) &
!!$ !$OMP private(k,i)
!!$ do k=1,nza
!!$ i = itemp(k)
!!$ a%irp(i) = a%irp(i) + 1
!!$ end do
!!$ !$OMP END DO
!!$
!!$ !$OMP SINGLE
!!$ !$ nthreads = omp_get_num_threads()
!!$ !$OMP END SINGLE
!!$
!!$ !$ ithread = omp_get_thread_num()
!!$
!!$ !$ work = nr/nthreads
!!$ !$ if (ithread < MOD(nr,nthreads)) then
!!$ !$ work = work + 1
!!$ !$ first_idx = ithread*work + 1
!!$ !$ else
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
!!$ !$ end if
!!$
!!$ !$ last_idx = first_idx + work - 1
!!$
!!$ !$ s = 0
!!$ !$ do i=first_idx,last_idx
!!$ !$ s = s + a%irp(i)
!!$ !$ end do
!!$ !$ if (work > 0) then
!!$ !$ sum(ithread+2) = s
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$OMP SINGLE
!!$ !$ do i=2,nthreads+1
!!$ !$ sum(i) = sum(i) + sum(i-1)
!!$ !$ end do
!!$ !$OMP END SINGLE
!!$
!!$ !$ if (work > 0) then
!!$ !$ saved_elem = a%irp(first_idx)
!!$ !$ end if
!!$ !$ if (ithread == 0) then
!!$ !$ a%irp(1) = 1
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$ if (work > 0) then
!!$ !$ old_val = a%irp(first_idx+1)
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!!$ !$ end if
!!$
!!$ !$ do i=first_idx+2,last_idx+1
!!$ !$ nxt_val = a%irp(i)
!!$ !$ a%irp(i) = a%irp(i-1) + old_val
!!$ !$ old_val = nxt_val
!!$ !$ end do
!!$
!!$ !$OMP END PARALLEL
!!$ else
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
!!$ end if
call a%set_host()
@ -3020,6 +3110,13 @@ subroutine psb_z_mv_csr_from_coo(a,b,info)
integer(psb_ipk_), Parameter :: maxtry=8
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='mv_from_coo'
logical :: use_openmp = .false.
! $ integer(psb_ipk_), allocatable :: sum(:)
! $ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s
! $ integer(psb_ipk_) :: nxt_val,old_val,saved_elem
! $ use_openmp = .true.
info = psb_success_
debug_unit = psb_get_debug_unit()
@ -3040,22 +3137,93 @@ subroutine psb_z_mv_csr_from_coo(a,b,info)
call move_alloc(b%ia,itemp)
call move_alloc(b%ja,a%ja)
call move_alloc(b%val,a%val)
call psb_realloc(nr+1,a%irp,info)
call psb_realloc(max(nr+1,nc+1),a%irp,info)
call b%free()
a%irp(:) = 0
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
!!$ if (use_openmp) then
!!$ !$OMP PARALLEL default(none) &
!!$ !$OMP shared(sum,nthreads,nr,a,itemp,nza) &
!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
!!$
!!$ !$OMP DO schedule(STATIC) &
!!$ !$OMP private(k,i)
!!$ do k=1,nza
!!$ i = itemp(k)
!!$ a%irp(i) = a%irp(i) + 1
!!$ end do
!!$ !$OMP END DO
!!$
!!$ !$OMP SINGLE
!!$ !$ nthreads = omp_get_num_threads()
!!$ !$ allocate(sum(nthreads+1))
!!$ !$ sum(:) = 0
!!$ !$ sum(1) = 1
!!$ !$OMP END SINGLE
!!$
!!$ !$ ithread = omp_get_thread_num()
!!$
!!$ !$ work = nr/nthreads
!!$ !$ if (ithread < MOD(nr,nthreads)) then
!!$ !$ work = work + 1
!!$ !$ first_idx = ithread*work + 1
!!$ !$ else
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
!!$ !$ end if
!!$
!!$ !$ last_idx = first_idx + work - 1
!!$
!!$ !$ s = 0
!!$ !$ do i=first_idx,last_idx
!!$ !$ s = s + a%irp(i)
!!$ !$ end do
!!$ !$ if (work > 0) then
!!$ !$ sum(ithread+2) = s
!!$ !$ end if
!!$
!!$ !$OMP BARRIER
!!$
!!$ !$OMP SINGLE
!!$ !$ do i=2,nthreads+1
!!$ !$ sum(i) = sum(i) + sum(i-1)
!!$ !$ end do
!!$ !$OMP END SINGLE
!!$
!!$ !$ if (work > 0) then
!!$ !$ saved_elem = a%irp(first_idx)
!!$ !$ end if
!!$ !$ if (ithread == 0) then
!!$ !$ a%irp(1) = 1
!!$ !$ end if
!!$
!!$ !$ if (work > 0) then
!!$ !$ old_val = a%irp(first_idx+1)
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!!$ !$ end if
!!$
!!$ !$ do i=first_idx+2,last_idx+1
!!$ !$ nxt_val = a%irp(i)
!!$ !$ a%irp(i) = a%irp(i-1) + old_val
!!$ !$ old_val = nxt_val
!!$ !$ end do
!!$
!!$ !$OMP END PARALLEL
!!$ else
do k=1,nza
i = itemp(k)
a%irp(i) = a%irp(i) + 1
end do
ip = 1
do i=1,nr
ncl = a%irp(i)
a%irp(i) = ip
ip = ip + ncl
end do
a%irp(nr+1) = ip
!!$ end if
call a%set_host()
end subroutine psb_z_mv_csr_from_coo

@ -244,36 +244,38 @@ subroutine psb_cd_inloc(v, ctxt, desc, info, globalcheck,idx,usehash)
call psb_errpush(info,name,l_err=l_err)
goto 9999
end if
! Sort, eliminate duplicates, then
! scramble back into original position.
ix(1) = -1
if (present(idx)) then
if (size(idx) >= loc_row) then
if (check_) then
! Sort, eliminate duplicates, then
! scramble back into original position.
ix(1) = -1
if (present(idx)) then
if (size(idx) >= loc_row) then
!$omp parallel do private(i)
do i=1, loc_row
ix(i) = idx(i)
end do
end if
end if
if (ix(1) == -1) then
!$omp parallel do private(i)
do i=1, loc_row
ix(i) = idx(i)
ix(i) = i
end do
end if
end if
if (ix(1) == -1) then
do i=1, loc_row
ix(i) = i
call psb_msort(vl,ix,flag=psb_sort_keep_idx_)
nlu = min(1,loc_row)
do i=2,loc_row
if (vl(i) /= vl(nlu)) then
nlu = nlu + 1
vl(nlu) = vl(i)
ix(nlu) = ix(i)
end if
end do
end if
call psb_msort(vl,ix,flag=psb_sort_keep_idx_)
nlu = min(1,loc_row)
do i=2,loc_row
if (vl(i) /= vl(nlu)) then
nlu = nlu + 1
vl(nlu) = vl(i)
ix(nlu) = ix(i)
end if
end do
call psb_msort(ix(1:nlu),vl(1:nlu),flag=psb_sort_keep_idx_)
if (debug_size) &
& write(debug_unit,*) me,' ',trim(name),': After sort ',nlu
call psb_msort(ix(1:nlu),vl(1:nlu),flag=psb_sort_keep_idx_)
if (debug_size) &
& write(debug_unit,*) me,' ',trim(name),': After sort ',nlu
end if
call psb_nullify_desc(desc)
if (do_timings) then
call psb_barrier(ctxt)

@ -1,3 +1,39 @@
!
! Parallel Sparse BLAS version 3.5
! (C) Copyright 2006-2018
! Salvatore Filippone
! Alfredo Buttari
!
! Redistribution and use in source and binary forms, with or without
! modification, are permitted provided that the following conditions
! are met:
! 1. Redistributions of source code must retain the above copyright
! notice, this list of conditions and the following disclaimer.
! 2. Redistributions in binary form must reproduce the above copyright
! notice, this list of conditions, and the following disclaimer in the
! documentation and/or other materials provided with the distribution.
! 3. The name of the PSBLAS group or the names of its contributors may
! not be used to endorse or promote products derived from this
! software without specific written permission.
!
! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
! POSSIBILITY OF SUCH DAMAGE.
!
!
! File: psb_cdall.f90
!
! Subroutine: psb_cdall
! Allocate descriptor Outer routine
!
subroutine psb_cdall(ctxt, desc, info,mg,ng,parts,&
& vg,vl,flag,nl,repl,globalcheck,lidx,usehash)
use psb_desc_mod
@ -62,14 +98,15 @@ subroutine psb_cdall(ctxt, desc, info,mg,ng,parts,&
logical :: usehash_
integer(psb_ipk_), allocatable :: itmpv(:)
integer(psb_lpk_), allocatable :: lvl(:)
logical, parameter :: timings=.false.
real(psb_dpk_) :: t0, t1
if (psb_get_errstatus() /= 0) return
info=psb_success_
name = 'psb_cdall'
call psb_erractionsave(err_act)
if (timings) t0 = psb_wtime()
call psb_info(ctxt, me, np)
if (count((/ present(vg),present(vl),&
& present(parts),present(nl), present(repl) /)) < 1) then
@ -189,7 +226,11 @@ subroutine psb_cdall(ctxt, desc, info,mg,ng,parts,&
endif
if (info /= psb_success_) goto 9999
if (timings) then
t1 = psb_wtime()
write(0,*) name,' 1st phase:',t1-t0
t0 = psb_wtime()
end if
! Finish off
lr = desc%indxmap%get_lr()
call psb_realloc(max(1,lr/2),desc%halo_index, info)
@ -203,6 +244,11 @@ subroutine psb_cdall(ctxt, desc, info,mg,ng,parts,&
desc%ext_index(:) = -1
call psb_cd_set_bld(desc,info)
if (info /= psb_success_) goto 9999
if (timings) then
t1 = psb_wtime()
write(0,*) name,' 2nd phase:',t1-t0
t0 = psb_wtime()
end if
9998 continue
call psb_erractionrestore(err_act)

@ -151,6 +151,7 @@ subroutine psb_cdalv(v, ctxt, desc, info, flag)
itmpov = 0
temp_ovrlap(:) = -1
!$omp parallel do private(i) reduction(+:counter)
do i=1,m
if (((v(i)-flag_) > np-1).or.((v(i)-flag_) < 0)) then
@ -158,7 +159,7 @@ subroutine psb_cdalv(v, ctxt, desc, info, flag)
l_err(1)=3
l_err(2)=v(i) - flag_
l_err(3)=i
exit
!exit
end if
if ((v(i)-flag_) == me) then

@ -239,7 +239,7 @@ subroutine psb_c_diag_precbld(a,desc_a,prec,info,amold,vmold,imold)
class(psb_c_base_sparse_mat), intent(in), optional :: amold
class(psb_c_base_vect_type), intent(in), optional :: vmold
class(psb_i_base_vect_type), intent(in), optional :: imold
integer(psb_ipk_) :: err_act, nrow,i
integer(psb_ipk_) :: err_act, nrow,ncol,i
character(len=20) :: name='c_diag_precbld'
call psb_erractionsave(err_act)
@ -247,6 +247,7 @@ subroutine psb_c_diag_precbld(a,desc_a,prec,info,amold,vmold,imold)
info = psb_success_
call prec%set_ctxt(desc_a%get_ctxt())
nrow = desc_a%get_local_rows()
ncol = desc_a%get_local_cols()
prec%d=a%get_diag(info)
if (info /= psb_success_) then
@ -255,7 +256,8 @@ subroutine psb_c_diag_precbld(a,desc_a,prec,info,amold,vmold,imold)
goto 9999
end if
call psb_realloc(desc_a%get_local_cols(),prec%d,info,pad=cone)
call psb_realloc(ncol,prec%d,info)
!$omp parallel do private(i)
do i=1,nrow
if (prec%d(i) == dzero) then
prec%d(i) = cone
@ -263,6 +265,10 @@ subroutine psb_c_diag_precbld(a,desc_a,prec,info,amold,vmold,imold)
prec%d(i) = cone/prec%d(i)
endif
end do
!$omp parallel do private(i)
do i=nrow+1,ncol
prec%d(i) = cone
end do
allocate(prec%dv,stat=info)
if (info == 0) then

@ -239,7 +239,7 @@ subroutine psb_d_diag_precbld(a,desc_a,prec,info,amold,vmold,imold)
class(psb_d_base_sparse_mat), intent(in), optional :: amold
class(psb_d_base_vect_type), intent(in), optional :: vmold
class(psb_i_base_vect_type), intent(in), optional :: imold
integer(psb_ipk_) :: err_act, nrow,i
integer(psb_ipk_) :: err_act, nrow,ncol,i
character(len=20) :: name='d_diag_precbld'
call psb_erractionsave(err_act)
@ -247,6 +247,7 @@ subroutine psb_d_diag_precbld(a,desc_a,prec,info,amold,vmold,imold)
info = psb_success_
call prec%set_ctxt(desc_a%get_ctxt())
nrow = desc_a%get_local_rows()
ncol = desc_a%get_local_cols()
prec%d=a%get_diag(info)
if (info /= psb_success_) then
@ -255,7 +256,8 @@ subroutine psb_d_diag_precbld(a,desc_a,prec,info,amold,vmold,imold)
goto 9999
end if
call psb_realloc(desc_a%get_local_cols(),prec%d,info,pad=done)
call psb_realloc(ncol,prec%d,info)
!$omp parallel do private(i)
do i=1,nrow
if (prec%d(i) == dzero) then
prec%d(i) = done
@ -263,6 +265,10 @@ subroutine psb_d_diag_precbld(a,desc_a,prec,info,amold,vmold,imold)
prec%d(i) = done/prec%d(i)
endif
end do
!$omp parallel do private(i)
do i=nrow+1,ncol
prec%d(i) = done
end do
allocate(prec%dv,stat=info)
if (info == 0) then

@ -239,7 +239,7 @@ subroutine psb_s_diag_precbld(a,desc_a,prec,info,amold,vmold,imold)
class(psb_s_base_sparse_mat), intent(in), optional :: amold
class(psb_s_base_vect_type), intent(in), optional :: vmold
class(psb_i_base_vect_type), intent(in), optional :: imold
integer(psb_ipk_) :: err_act, nrow,i
integer(psb_ipk_) :: err_act, nrow,ncol,i
character(len=20) :: name='s_diag_precbld'
call psb_erractionsave(err_act)
@ -247,6 +247,7 @@ subroutine psb_s_diag_precbld(a,desc_a,prec,info,amold,vmold,imold)
info = psb_success_
call prec%set_ctxt(desc_a%get_ctxt())
nrow = desc_a%get_local_rows()
ncol = desc_a%get_local_cols()
prec%d=a%get_diag(info)
if (info /= psb_success_) then
@ -255,7 +256,8 @@ subroutine psb_s_diag_precbld(a,desc_a,prec,info,amold,vmold,imold)
goto 9999
end if
call psb_realloc(desc_a%get_local_cols(),prec%d,info,pad=sone)
call psb_realloc(ncol,prec%d,info)
!$omp parallel do private(i)
do i=1,nrow
if (prec%d(i) == dzero) then
prec%d(i) = sone
@ -263,6 +265,10 @@ subroutine psb_s_diag_precbld(a,desc_a,prec,info,amold,vmold,imold)
prec%d(i) = sone/prec%d(i)
endif
end do
!$omp parallel do private(i)
do i=nrow+1,ncol
prec%d(i) = sone
end do
allocate(prec%dv,stat=info)
if (info == 0) then

@ -239,7 +239,7 @@ subroutine psb_z_diag_precbld(a,desc_a,prec,info,amold,vmold,imold)
class(psb_z_base_sparse_mat), intent(in), optional :: amold
class(psb_z_base_vect_type), intent(in), optional :: vmold
class(psb_i_base_vect_type), intent(in), optional :: imold
integer(psb_ipk_) :: err_act, nrow,i
integer(psb_ipk_) :: err_act, nrow,ncol,i
character(len=20) :: name='z_diag_precbld'
call psb_erractionsave(err_act)
@ -247,6 +247,7 @@ subroutine psb_z_diag_precbld(a,desc_a,prec,info,amold,vmold,imold)
info = psb_success_
call prec%set_ctxt(desc_a%get_ctxt())
nrow = desc_a%get_local_rows()
ncol = desc_a%get_local_cols()
prec%d=a%get_diag(info)
if (info /= psb_success_) then
@ -255,7 +256,8 @@ subroutine psb_z_diag_precbld(a,desc_a,prec,info,amold,vmold,imold)
goto 9999
end if
call psb_realloc(desc_a%get_local_cols(),prec%d,info,pad=zone)
call psb_realloc(ncol,prec%d,info)
!$omp parallel do private(i)
do i=1,nrow
if (prec%d(i) == dzero) then
prec%d(i) = zone
@ -263,6 +265,10 @@ subroutine psb_z_diag_precbld(a,desc_a,prec,info,amold,vmold,imold)
prec%d(i) = zone/prec%d(i)
endif
end do
!$omp parallel do private(i)
do i=nrow+1,ncol
prec%d(i) = zone
end do
allocate(prec%dv,stat=info)
if (info == 0) then

@ -94,7 +94,7 @@ contains
! do k=1,nz
! ijk2idx(i,j,k) = idx
! idx = idx + 1
subroutine idx2ijk3d(i,j,k,idx,nx,ny,nz,base)
pure subroutine idx2ijk3d(i,j,k,idx,nx,ny,nz,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none
integer(psb_mpk_), intent(out) :: i,j,k
@ -111,7 +111,7 @@ contains
end subroutine idx2ijk3d
subroutine idx2ijk2d(i,j,idx,nx,ny,base)
pure subroutine idx2ijk2d(i,j,idx,nx,ny,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none
integer(psb_mpk_), intent(out) :: i,j
@ -139,7 +139,7 @@ contains
! do k=1,nz
! ijk2idx(i,j,k) = idx
! idx = idx + 1
subroutine idx2ijkv(coords,idx,dims,base)
pure subroutine idx2ijkv(coords,idx,dims,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none
integer(psb_mpk_), intent(out) :: coords(:)
@ -156,7 +156,7 @@ contains
idx_ = idx - base_
if (size(coords) < size(dims)) then
write(0,*) 'Error: size mismatch ',size(coords),size(dims)
!write(0,*) 'Error: size mismatch ',size(coords),size(dims)
coords = 0
return
end if
@ -181,7 +181,7 @@ contains
end subroutine idx2ijkv
subroutine lidx2ijk3d(i,j,k,idx,nx,ny,nz,base)
pure subroutine lidx2ijk3d(i,j,k,idx,nx,ny,nz,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none
integer(psb_mpk_), intent(out) :: i,j,k
@ -199,7 +199,7 @@ contains
end subroutine lidx2ijk3d
subroutine lidx2ijk2d(i,j,idx,nx,ny,base)
pure subroutine lidx2ijk2d(i,j,idx,nx,ny,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none
integer(psb_mpk_), intent(out) :: i,j
@ -228,7 +228,7 @@ contains
! do k=1,nz
! ijk2idx(i,j,k) = idx
! idx = idx + 1
subroutine lidx2ijkv(coords,idx,dims,base)
pure subroutine lidx2ijkv(coords,idx,dims,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none
integer(psb_mpk_), intent(out) :: coords(:)
@ -247,7 +247,7 @@ contains
idx_ = idx - base_
if (size(coords) < size(dims)) then
write(0,*) 'Error: size mismatch ',size(coords),size(dims)
!write(0,*) 'Error: size mismatch ',size(coords),size(dims)
coords = 0
return
end if
@ -272,7 +272,7 @@ contains
end subroutine lidx2ijkv
subroutine lidx2lijk3d(i,j,k,idx,nx,ny,nz,base)
pure subroutine lidx2lijk3d(i,j,k,idx,nx,ny,nz,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none
integer(psb_epk_), intent(out) :: i,j,k
@ -290,7 +290,7 @@ contains
end subroutine lidx2lijk3d
subroutine lidx2lijk2d(i,j,idx,nx,ny,base)
pure subroutine lidx2lijk2d(i,j,idx,nx,ny,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none
integer(psb_epk_), intent(out) :: i,j
@ -319,7 +319,7 @@ contains
! do k=1,nz
! ijk2idx(i,j,k) = idx
! idx = idx + 1
subroutine lidx2lijkv(coords,idx,dims,base)
pure subroutine lidx2lijkv(coords,idx,dims,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none
integer(psb_epk_), intent(out) :: coords(:)
@ -338,7 +338,7 @@ contains
idx_ = idx - base_
if (size(coords) < size(dims)) then
write(0,*) 'Error: size mismatch ',size(coords),size(dims)
!write(0,*) 'Error: size mismatch ',size(coords),size(dims)
coords = 0
return
end if
@ -374,7 +374,7 @@ contains
! do k=1,nz
! ijk2idx(i,j,k) = idx
! idx = idx + 1
subroutine ijk2idxv(idx,coords,dims,base)
pure subroutine ijk2idxv(idx,coords,dims,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none
integer(psb_mpk_), intent(in) :: coords(:),dims(:)
@ -389,7 +389,7 @@ contains
end if
sz = size(coords)
if (sz /= size(dims)) then
write(0,*) 'Error: size mismatch ',size(coords),size(dims)
!write(0,*) 'Error: size mismatch ',size(coords),size(dims)
idx = 0
return
end if
@ -420,7 +420,7 @@ contains
! do k=1,nz
! ijk2idx(i,j,k) = idx
! idx = idx + 1
subroutine ijk2idx3d(idx,i,j,k,nx,ny,nz,base)
pure subroutine ijk2idx3d(idx,i,j,k,nx,ny,nz,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none
integer(psb_mpk_), intent(out) :: idx
@ -431,7 +431,7 @@ contains
call ijk2idx(idx,[i,j,k],[nx,ny,nz],base)
end subroutine ijk2idx3d
subroutine ijk2idx2d(idx,i,j,nx,ny,base)
pure subroutine ijk2idx2d(idx,i,j,nx,ny,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none
integer(psb_mpk_), intent(out) :: idx
@ -442,7 +442,7 @@ contains
call ijk2idx(idx,[i,j],[nx,ny],base)
end subroutine ijk2idx2d
subroutine ijk2lidxv(idx,coords,dims,base)
pure subroutine ijk2lidxv(idx,coords,dims,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none
integer(psb_mpk_), intent(in) :: coords(:),dims(:)
@ -457,7 +457,7 @@ contains
end if
sz = size(coords)
if (sz /= size(dims)) then
write(0,*) 'Error: size mismatch ',size(coords),size(dims)
!write(0,*) 'Error: size mismatch ',size(coords),size(dims)
idx = 0
return
end if
@ -489,7 +489,7 @@ contains
! do k=1,nz
! ijk2idx(i,j,k) = idx
! idx = idx + 1
subroutine ijk2lidx3d(idx,i,j,k,nx,ny,nz,base)
pure subroutine ijk2lidx3d(idx,i,j,k,nx,ny,nz,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none
integer(psb_epk_), intent(out) :: idx
@ -500,7 +500,7 @@ contains
call ijk2idx(idx,[i,j,k],[nx,ny,nz],base)
end subroutine ijk2lidx3d
subroutine ijk2lidx2d(idx,i,j,nx,ny,base)
pure subroutine ijk2lidx2d(idx,i,j,nx,ny,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none
integer(psb_epk_), intent(out) :: idx
@ -512,7 +512,7 @@ contains
end subroutine ijk2lidx2d
subroutine lijk2lidxv(idx,coords,dims,base)
pure subroutine lijk2lidxv(idx,coords,dims,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none
integer(psb_epk_), intent(in) :: coords(:),dims(:)
@ -527,7 +527,7 @@ contains
end if
sz = size(coords)
if (sz /= size(dims)) then
write(0,*) 'Error: size mismatch ',size(coords),size(dims)
!write(0,*) 'Error: size mismatch ',size(coords),size(dims)
idx = 0
return
end if
@ -559,7 +559,7 @@ contains
! do k=1,nz
! ijk2idx(i,j,k) = idx
! idx = idx + 1
subroutine lijk2lidx3d(idx,i,j,k,nx,ny,nz,base)
pure subroutine lijk2lidx3d(idx,i,j,k,nx,ny,nz,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none
integer(psb_epk_), intent(out) :: idx
@ -570,7 +570,7 @@ contains
call ijk2idx(idx,[i,j,k],[nx,ny,nz],base)
end subroutine lijk2lidx3d
subroutine lijk2lidx2d(idx,i,j,nx,ny,base)
pure subroutine lijk2lidx2d(idx,i,j,nx,ny,base)
use psb_base_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
implicit none
integer(psb_epk_), intent(out) :: idx

Loading…
Cancel
Save