Modified matrix build procedures with OpenMP

omp-threadsafe
sfilippone 2 years ago
parent eb11e5e053
commit 8459ea28f5

@ -2818,6 +2818,9 @@ subroutine psb_c_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
use psb_realloc_mod use psb_realloc_mod
use psb_sort_mod use psb_sort_mod
use psb_c_base_mat_mod, psb_protect_name => psb_c_coo_csput_a use psb_c_base_mat_mod, psb_protect_name => psb_c_coo_csput_a
#if defined(OPENMP)
use omp_lib
#endif
implicit none implicit none
class(psb_c_coo_sparse_mat), intent(inout) :: a class(psb_c_coo_sparse_mat), intent(inout) :: a
@ -2829,7 +2832,7 @@ subroutine psb_c_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
integer(psb_ipk_) :: err_act integer(psb_ipk_) :: err_act
character(len=20) :: name='c_coo_csput_a_impl' character(len=20) :: name='c_coo_csput_a_impl'
logical, parameter :: debug=.false. logical, parameter :: debug=.false.
integer(psb_ipk_) :: nza, i,j,k, nzl, isza, debug_level, debug_unit integer(psb_ipk_) :: nza, i,j,k, nzl, isza, nzaold, debug_level, debug_unit
info = psb_success_ info = psb_success_
debug_unit = psb_get_debug_unit() debug_unit = psb_get_debug_unit()
@ -2861,10 +2864,11 @@ subroutine psb_c_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
if (nz == 0) return if (nz == 0) return
nza = a%get_nzeros()
isza = a%get_size()
if (a%is_bld()) then if (a%is_bld()) then
!$omp critical
nza = a%get_nzeros()
isza = a%get_size()
! Build phase. Must handle reallocations in a sensible way. ! Build phase. Must handle reallocations in a sensible way.
if (isza < (nza+nz)) then if (isza < (nza+nz)) then
call a%reallocate(max(nza+nz,int(1.5*isza))) call a%reallocate(max(nza+nz,int(1.5*isza)))
@ -2872,16 +2876,23 @@ subroutine psb_c_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
isza = a%get_size() isza = a%get_size()
if (isza < (nza+nz)) then if (isza < (nza+nz)) then
info = psb_err_alloc_dealloc_; call psb_errpush(info,name) info = psb_err_alloc_dealloc_; call psb_errpush(info,name)
goto 9999 else
nzaold = nza
nza = nza + nz
call a%set_nzeros(nza)
#if defined(OPENMP)
!write(0,*) 'From thread ',omp_get_thread_num(),nzaold,nz,nza,a%get_nzeros()
#endif
end if end if
!$omp end critical
call psb_inner_ins(nz,ia,ja,val,nza,a%ia,a%ja,a%val,isza,& if (info /= 0) goto 9999
call psb_inner_ins(nz,ia,ja,val,nzaold,a%ia,a%ja,a%val,isza,&
& imin,imax,jmin,jmax,info) & imin,imax,jmin,jmax,info)
call a%set_nzeros(nza)
call a%set_sorted(.false.) call a%set_sorted(.false.)
else if (a%is_upd()) then else if (a%is_upd()) then
nza = a%get_nzeros()
isza = a%get_size()
if (a%is_dev()) call a%sync() if (a%is_dev()) call a%sync()
@ -2951,7 +2962,7 @@ contains
end do end do
!$OMP END PARALLEL DO !$OMP END PARALLEL DO
nza = nza + nz !nza = nza + nz
#else #else
do i=1, nz do i=1, nz
ir = ia(i) ir = ia(i)

@ -2998,21 +2998,20 @@ subroutine psb_c_cp_csr_from_coo(a,b,info)
!$OMP END PARALLEL !$OMP END PARALLEL
#else #else
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
a%irp(i) = a%irp(i) + 1 a%irp(i) = a%irp(i) + 1
end do end do
ip = 1 ip = 1
do i=1,nr do i=1,nr
ncl = a%irp(i) ncl = a%irp(i)
a%irp(i) = ip a%irp(i) = ip
ip = ip + ncl ip = ip + ncl
end do end do
a%irp(nr+1) = ip a%irp(nr+1) = ip
#endif #endif
call a%set_host() call a%set_host()
end subroutine psb_c_cp_csr_from_coo end subroutine psb_c_cp_csr_from_coo
@ -3128,7 +3127,6 @@ subroutine psb_c_mv_csr_from_coo(a,b,info)
integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_), Parameter :: maxtry=8
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='mv_from_coo' character(len=20) :: name='mv_from_coo'
logical :: use_openmp = .false.
#if defined(OPENMP) #if defined(OPENMP)
integer(psb_ipk_), allocatable :: sum(:) integer(psb_ipk_), allocatable :: sum(:)
@ -3229,7 +3227,6 @@ subroutine psb_c_mv_csr_from_coo(a,b,info)
!$OMP END PARALLEL !$OMP END PARALLEL
#else #else
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
a%irp(i) = a%irp(i) + 1 a%irp(i) = a%irp(i) + 1

@ -2832,7 +2832,7 @@ subroutine psb_d_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
integer(psb_ipk_) :: err_act integer(psb_ipk_) :: err_act
character(len=20) :: name='d_coo_csput_a_impl' character(len=20) :: name='d_coo_csput_a_impl'
logical, parameter :: debug=.false. logical, parameter :: debug=.false.
integer(psb_ipk_) :: nza, i,j,k, nzl, isza, debug_level, debug_unit, nzaold integer(psb_ipk_) :: nza, i,j,k, nzl, isza, nzaold, debug_level, debug_unit
info = psb_success_ info = psb_success_
debug_unit = psb_get_debug_unit() debug_unit = psb_get_debug_unit()
@ -2864,7 +2864,6 @@ subroutine psb_d_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
if (nz == 0) return if (nz == 0) return
if (a%is_bld()) then if (a%is_bld()) then
!$omp critical !$omp critical
@ -2945,9 +2944,9 @@ contains
! the serial version: each element is stored in data ! the serial version: each element is stored in data
! structures but the invalid ones are stored as '-1' values. ! structures but the invalid ones are stored as '-1' values.
! These values will be filtered in a future fixing process. ! These values will be filtered in a future fixing process.
! $ OMP PARALLEL DO default(none) schedule(STATIC) & !$OMP PARALLEL DO default(none) schedule(STATIC) &
! $ OMP shared(nz,imin,imax,jmin,jmax,ia,ja,val,ia1,ia2,aspk,nza) & !$OMP shared(nz,imin,imax,jmin,jmax,ia,ja,val,ia1,ia2,aspk,nza) &
! $ OMP private(ir,ic,i) !$OMP private(ir,ic,i)
do i=1,nz do i=1,nz
ir = ia(i) ir = ia(i)
ic = ja(i) ic = ja(i)
@ -2961,7 +2960,7 @@ contains
aspk(nza+i) = -1 aspk(nza+i) = -1
end if end if
end do end do
! $OMP END PARALLEL DO !$OMP END PARALLEL DO
!nza = nza + nz !nza = nza + nz
#else #else

@ -2998,21 +2998,20 @@ subroutine psb_d_cp_csr_from_coo(a,b,info)
!$OMP END PARALLEL !$OMP END PARALLEL
#else #else
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
a%irp(i) = a%irp(i) + 1 a%irp(i) = a%irp(i) + 1
end do end do
ip = 1 ip = 1
do i=1,nr do i=1,nr
ncl = a%irp(i) ncl = a%irp(i)
a%irp(i) = ip a%irp(i) = ip
ip = ip + ncl ip = ip + ncl
end do end do
a%irp(nr+1) = ip a%irp(nr+1) = ip
#endif #endif
call a%set_host() call a%set_host()
end subroutine psb_d_cp_csr_from_coo end subroutine psb_d_cp_csr_from_coo
@ -3128,7 +3127,6 @@ subroutine psb_d_mv_csr_from_coo(a,b,info)
integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_), Parameter :: maxtry=8
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='mv_from_coo' character(len=20) :: name='mv_from_coo'
logical :: use_openmp = .false.
#if defined(OPENMP) #if defined(OPENMP)
integer(psb_ipk_), allocatable :: sum(:) integer(psb_ipk_), allocatable :: sum(:)
@ -3229,7 +3227,6 @@ subroutine psb_d_mv_csr_from_coo(a,b,info)
!$OMP END PARALLEL !$OMP END PARALLEL
#else #else
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
a%irp(i) = a%irp(i) + 1 a%irp(i) = a%irp(i) + 1

@ -2818,6 +2818,9 @@ subroutine psb_s_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
use psb_realloc_mod use psb_realloc_mod
use psb_sort_mod use psb_sort_mod
use psb_s_base_mat_mod, psb_protect_name => psb_s_coo_csput_a use psb_s_base_mat_mod, psb_protect_name => psb_s_coo_csput_a
#if defined(OPENMP)
use omp_lib
#endif
implicit none implicit none
class(psb_s_coo_sparse_mat), intent(inout) :: a class(psb_s_coo_sparse_mat), intent(inout) :: a
@ -2829,7 +2832,7 @@ subroutine psb_s_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
integer(psb_ipk_) :: err_act integer(psb_ipk_) :: err_act
character(len=20) :: name='s_coo_csput_a_impl' character(len=20) :: name='s_coo_csput_a_impl'
logical, parameter :: debug=.false. logical, parameter :: debug=.false.
integer(psb_ipk_) :: nza, i,j,k, nzl, isza, debug_level, debug_unit integer(psb_ipk_) :: nza, i,j,k, nzl, isza, nzaold, debug_level, debug_unit
info = psb_success_ info = psb_success_
debug_unit = psb_get_debug_unit() debug_unit = psb_get_debug_unit()
@ -2861,10 +2864,11 @@ subroutine psb_s_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
if (nz == 0) return if (nz == 0) return
nza = a%get_nzeros()
isza = a%get_size()
if (a%is_bld()) then if (a%is_bld()) then
!$omp critical
nza = a%get_nzeros()
isza = a%get_size()
! Build phase. Must handle reallocations in a sensible way. ! Build phase. Must handle reallocations in a sensible way.
if (isza < (nza+nz)) then if (isza < (nza+nz)) then
call a%reallocate(max(nza+nz,int(1.5*isza))) call a%reallocate(max(nza+nz,int(1.5*isza)))
@ -2872,16 +2876,23 @@ subroutine psb_s_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
isza = a%get_size() isza = a%get_size()
if (isza < (nza+nz)) then if (isza < (nza+nz)) then
info = psb_err_alloc_dealloc_; call psb_errpush(info,name) info = psb_err_alloc_dealloc_; call psb_errpush(info,name)
goto 9999 else
nzaold = nza
nza = nza + nz
call a%set_nzeros(nza)
#if defined(OPENMP)
!write(0,*) 'From thread ',omp_get_thread_num(),nzaold,nz,nza,a%get_nzeros()
#endif
end if end if
!$omp end critical
call psb_inner_ins(nz,ia,ja,val,nza,a%ia,a%ja,a%val,isza,& if (info /= 0) goto 9999
call psb_inner_ins(nz,ia,ja,val,nzaold,a%ia,a%ja,a%val,isza,&
& imin,imax,jmin,jmax,info) & imin,imax,jmin,jmax,info)
call a%set_nzeros(nza)
call a%set_sorted(.false.) call a%set_sorted(.false.)
else if (a%is_upd()) then else if (a%is_upd()) then
nza = a%get_nzeros()
isza = a%get_size()
if (a%is_dev()) call a%sync() if (a%is_dev()) call a%sync()
@ -2951,7 +2962,7 @@ contains
end do end do
!$OMP END PARALLEL DO !$OMP END PARALLEL DO
nza = nza + nz !nza = nza + nz
#else #else
do i=1, nz do i=1, nz
ir = ia(i) ir = ia(i)

@ -2998,21 +2998,20 @@ subroutine psb_s_cp_csr_from_coo(a,b,info)
!$OMP END PARALLEL !$OMP END PARALLEL
#else #else
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
a%irp(i) = a%irp(i) + 1 a%irp(i) = a%irp(i) + 1
end do end do
ip = 1 ip = 1
do i=1,nr do i=1,nr
ncl = a%irp(i) ncl = a%irp(i)
a%irp(i) = ip a%irp(i) = ip
ip = ip + ncl ip = ip + ncl
end do end do
a%irp(nr+1) = ip a%irp(nr+1) = ip
#endif #endif
call a%set_host() call a%set_host()
end subroutine psb_s_cp_csr_from_coo end subroutine psb_s_cp_csr_from_coo
@ -3128,7 +3127,6 @@ subroutine psb_s_mv_csr_from_coo(a,b,info)
integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_), Parameter :: maxtry=8
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='mv_from_coo' character(len=20) :: name='mv_from_coo'
logical :: use_openmp = .false.
#if defined(OPENMP) #if defined(OPENMP)
integer(psb_ipk_), allocatable :: sum(:) integer(psb_ipk_), allocatable :: sum(:)
@ -3229,7 +3227,6 @@ subroutine psb_s_mv_csr_from_coo(a,b,info)
!$OMP END PARALLEL !$OMP END PARALLEL
#else #else
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
a%irp(i) = a%irp(i) + 1 a%irp(i) = a%irp(i) + 1

@ -2818,6 +2818,9 @@ subroutine psb_z_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
use psb_realloc_mod use psb_realloc_mod
use psb_sort_mod use psb_sort_mod
use psb_z_base_mat_mod, psb_protect_name => psb_z_coo_csput_a use psb_z_base_mat_mod, psb_protect_name => psb_z_coo_csput_a
#if defined(OPENMP)
use omp_lib
#endif
implicit none implicit none
class(psb_z_coo_sparse_mat), intent(inout) :: a class(psb_z_coo_sparse_mat), intent(inout) :: a
@ -2829,7 +2832,7 @@ subroutine psb_z_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
integer(psb_ipk_) :: err_act integer(psb_ipk_) :: err_act
character(len=20) :: name='z_coo_csput_a_impl' character(len=20) :: name='z_coo_csput_a_impl'
logical, parameter :: debug=.false. logical, parameter :: debug=.false.
integer(psb_ipk_) :: nza, i,j,k, nzl, isza, debug_level, debug_unit integer(psb_ipk_) :: nza, i,j,k, nzl, isza, nzaold, debug_level, debug_unit
info = psb_success_ info = psb_success_
debug_unit = psb_get_debug_unit() debug_unit = psb_get_debug_unit()
@ -2861,10 +2864,11 @@ subroutine psb_z_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
if (nz == 0) return if (nz == 0) return
nza = a%get_nzeros()
isza = a%get_size()
if (a%is_bld()) then if (a%is_bld()) then
!$omp critical
nza = a%get_nzeros()
isza = a%get_size()
! Build phase. Must handle reallocations in a sensible way. ! Build phase. Must handle reallocations in a sensible way.
if (isza < (nza+nz)) then if (isza < (nza+nz)) then
call a%reallocate(max(nza+nz,int(1.5*isza))) call a%reallocate(max(nza+nz,int(1.5*isza)))
@ -2872,16 +2876,23 @@ subroutine psb_z_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
isza = a%get_size() isza = a%get_size()
if (isza < (nza+nz)) then if (isza < (nza+nz)) then
info = psb_err_alloc_dealloc_; call psb_errpush(info,name) info = psb_err_alloc_dealloc_; call psb_errpush(info,name)
goto 9999 else
nzaold = nza
nza = nza + nz
call a%set_nzeros(nza)
#if defined(OPENMP)
!write(0,*) 'From thread ',omp_get_thread_num(),nzaold,nz,nza,a%get_nzeros()
#endif
end if end if
!$omp end critical
call psb_inner_ins(nz,ia,ja,val,nza,a%ia,a%ja,a%val,isza,& if (info /= 0) goto 9999
call psb_inner_ins(nz,ia,ja,val,nzaold,a%ia,a%ja,a%val,isza,&
& imin,imax,jmin,jmax,info) & imin,imax,jmin,jmax,info)
call a%set_nzeros(nza)
call a%set_sorted(.false.) call a%set_sorted(.false.)
else if (a%is_upd()) then else if (a%is_upd()) then
nza = a%get_nzeros()
isza = a%get_size()
if (a%is_dev()) call a%sync() if (a%is_dev()) call a%sync()
@ -2951,7 +2962,7 @@ contains
end do end do
!$OMP END PARALLEL DO !$OMP END PARALLEL DO
nza = nza + nz !nza = nza + nz
#else #else
do i=1, nz do i=1, nz
ir = ia(i) ir = ia(i)

@ -2998,21 +2998,20 @@ subroutine psb_z_cp_csr_from_coo(a,b,info)
!$OMP END PARALLEL !$OMP END PARALLEL
#else #else
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
a%irp(i) = a%irp(i) + 1 a%irp(i) = a%irp(i) + 1
end do end do
ip = 1 ip = 1
do i=1,nr do i=1,nr
ncl = a%irp(i) ncl = a%irp(i)
a%irp(i) = ip a%irp(i) = ip
ip = ip + ncl ip = ip + ncl
end do end do
a%irp(nr+1) = ip a%irp(nr+1) = ip
#endif #endif
call a%set_host() call a%set_host()
end subroutine psb_z_cp_csr_from_coo end subroutine psb_z_cp_csr_from_coo
@ -3128,7 +3127,6 @@ subroutine psb_z_mv_csr_from_coo(a,b,info)
integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_), Parameter :: maxtry=8
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='mv_from_coo' character(len=20) :: name='mv_from_coo'
logical :: use_openmp = .false.
#if defined(OPENMP) #if defined(OPENMP)
integer(psb_ipk_), allocatable :: sum(:) integer(psb_ipk_), allocatable :: sum(:)
@ -3229,7 +3227,6 @@ subroutine psb_z_mv_csr_from_coo(a,b,info)
!$OMP END PARALLEL !$OMP END PARALLEL
#else #else
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
a%irp(i) = a%irp(i) + 1 a%irp(i) = a%irp(i) + 1

@ -51,6 +51,9 @@
subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
use psb_base_mod, psb_protect_name => psb_cspins use psb_base_mod, psb_protect_name => psb_cspins
use psi_mod use psi_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none implicit none
!....parameters... !....parameters...
@ -70,7 +73,7 @@ subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
integer(psb_ipk_), parameter :: relocsz=200 integer(psb_ipk_), parameter :: relocsz=200
logical :: rebuild_, local_ logical :: rebuild_, local_
integer(psb_ipk_), allocatable :: ila(:),jla(:) integer(psb_ipk_), allocatable :: ila(:),jla(:)
integer(psb_ipk_) :: i,k integer(psb_ipk_) :: i,k, ith, nth
integer(psb_lpk_) :: nnl integer(psb_lpk_) :: nnl
integer(psb_lpk_), allocatable :: lila(:),ljla(:) integer(psb_lpk_), allocatable :: lila(:),ljla(:)
complex(psb_spk_), allocatable :: lval(:) complex(psb_spk_), allocatable :: lval(:)
@ -82,7 +85,13 @@ subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
ctxt = desc_a%get_context() ctxt = desc_a%get_context()
call psb_info(ctxt, me, np) call psb_info(ctxt, me, np)
#if defined(OPENMP)
nth = omp_get_num_threads()
ith = omp_get_thread_num()
#else
nth = 1
ith = 0
#endif
if (nz < 0) then if (nz < 0) then
info = 1111 info = 1111
call psb_errpush(info,name) call psb_errpush(info,name)
@ -131,15 +140,23 @@ subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
& a_err='allocate',i_err=(/info/)) & a_err='allocate',i_err=(/info/))
goto 9999 goto 9999
end if end if
#if defined(OPENMP)
!$omp parallel private(ila,jla,nrow,ncol)
#endif
call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.)
#if defined(OPENMP)
!$omp critical(cSPINS)
#endif
if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,& if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,&
& mask=(ila(1:nz)>0)) & mask=(ila(1:nz)>0))
#if defined(OPENMP)
!$omp end critical(cSPINS)
#endif
if (info /= psb_success_) then if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_ai_,name,& call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='psb_cdins',i_err=(/info/)) & a_err='psb_cdins',i_err=(/info/))
goto 9999 !goto 9999
end if end if
nrow = desc_a%get_local_rows() nrow = desc_a%get_local_rows()
ncol = desc_a%get_local_cols() ncol = desc_a%get_local_cols()
@ -149,13 +166,12 @@ subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
if (info /= psb_success_) then if (info /= psb_success_) then
info=psb_err_from_subroutine_ info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='a%csput') call psb_errpush(info,name,a_err='a%csput')
goto 9999 !goto 9999
end if end if
if (a%is_remote_build()) then if (a%is_remote_build()) then
nnl = count(ila(1:nz)<0) nnl = count(ila(1:nz)<0)
if (nnl > 0) then if (nnl > 0) then
!write(0,*) 'Check on insert ',nnl
allocate(lila(nnl),ljla(nnl),lval(nnl)) allocate(lila(nnl),ljla(nnl),lval(nnl))
k = 0 k = 0
do i=1,nz do i=1,nz
@ -175,8 +191,13 @@ subroutine psb_cspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
else else
info = psb_err_invalid_a_and_cd_state_ info = psb_err_invalid_a_and_cd_state_
call psb_errpush(info,name) call psb_errpush(info,name)
goto 9999 !goto 9999
end if end if
#if defined(OPENMP)
!$omp end parallel
#endif
if (info /= 0) goto 9999
endif endif
else if (desc_a%is_asb()) then else if (desc_a%is_asb()) then

@ -73,7 +73,7 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
integer(psb_ipk_), parameter :: relocsz=200 integer(psb_ipk_), parameter :: relocsz=200
logical :: rebuild_, local_ logical :: rebuild_, local_
integer(psb_ipk_), allocatable :: ila(:),jla(:) integer(psb_ipk_), allocatable :: ila(:),jla(:)
integer(psb_ipk_) :: i,k integer(psb_ipk_) :: i,k, ith, nth
integer(psb_lpk_) :: nnl integer(psb_lpk_) :: nnl
integer(psb_lpk_), allocatable :: lila(:),ljla(:) integer(psb_lpk_), allocatable :: lila(:),ljla(:)
real(psb_dpk_), allocatable :: lval(:) real(psb_dpk_), allocatable :: lval(:)
@ -86,7 +86,11 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
ctxt = desc_a%get_context() ctxt = desc_a%get_context()
call psb_info(ctxt, me, np) call psb_info(ctxt, me, np)
#if defined(OPENMP) #if defined(OPENMP)
!write(0,*) name,omp_get_num_threads(),omp_get_thread_num() nth = omp_get_num_threads()
ith = omp_get_thread_num()
#else
nth = 1
ith = 0
#endif #endif
if (nz < 0) then if (nz < 0) then
info = 1111 info = 1111
@ -139,17 +143,14 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
#if defined(OPENMP) #if defined(OPENMP)
!$omp parallel private(ila,jla,nrow,ncol) !$omp parallel private(ila,jla,nrow,ncol)
#endif #endif
call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.)
#if defined(OPENMP) #if defined(OPENMP)
!$omp critical !$omp critical(dSPINS)
#endif #endif
if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,& if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,&
& mask=(ila(1:nz)>0)) & mask=(ila(1:nz)>0))
!!$ write(0,*) omp_get_thread_num(),'Check g2l: ',ila(1:nz),':',&
!!$ & jla(1:nz),':',ja(1:nz)
#if defined(OPENMP) #if defined(OPENMP)
!$omp end critical !$omp end critical(dSPINS)
#endif #endif
if (info /= psb_success_) then if (info /= psb_success_) then
@ -171,7 +172,6 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
if (a%is_remote_build()) then if (a%is_remote_build()) then
nnl = count(ila(1:nz)<0) nnl = count(ila(1:nz)<0)
if (nnl > 0) then if (nnl > 0) then
!write(0,*) 'Check on insert ',nnl
allocate(lila(nnl),ljla(nnl),lval(nnl)) allocate(lila(nnl),ljla(nnl),lval(nnl))
k = 0 k = 0
do i=1,nz do i=1,nz
@ -197,7 +197,7 @@ subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
#if defined(OPENMP) #if defined(OPENMP)
!$omp end parallel !$omp end parallel
#endif #endif
if (info /= 0) goto 9999
endif endif
else if (desc_a%is_asb()) then else if (desc_a%is_asb()) then

@ -51,6 +51,9 @@
subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
use psb_base_mod, psb_protect_name => psb_sspins use psb_base_mod, psb_protect_name => psb_sspins
use psi_mod use psi_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none implicit none
!....parameters... !....parameters...
@ -70,7 +73,7 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
integer(psb_ipk_), parameter :: relocsz=200 integer(psb_ipk_), parameter :: relocsz=200
logical :: rebuild_, local_ logical :: rebuild_, local_
integer(psb_ipk_), allocatable :: ila(:),jla(:) integer(psb_ipk_), allocatable :: ila(:),jla(:)
integer(psb_ipk_) :: i,k integer(psb_ipk_) :: i,k, ith, nth
integer(psb_lpk_) :: nnl integer(psb_lpk_) :: nnl
integer(psb_lpk_), allocatable :: lila(:),ljla(:) integer(psb_lpk_), allocatable :: lila(:),ljla(:)
real(psb_spk_), allocatable :: lval(:) real(psb_spk_), allocatable :: lval(:)
@ -82,7 +85,13 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
ctxt = desc_a%get_context() ctxt = desc_a%get_context()
call psb_info(ctxt, me, np) call psb_info(ctxt, me, np)
#if defined(OPENMP)
nth = omp_get_num_threads()
ith = omp_get_thread_num()
#else
nth = 1
ith = 0
#endif
if (nz < 0) then if (nz < 0) then
info = 1111 info = 1111
call psb_errpush(info,name) call psb_errpush(info,name)
@ -131,15 +140,23 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
& a_err='allocate',i_err=(/info/)) & a_err='allocate',i_err=(/info/))
goto 9999 goto 9999
end if end if
#if defined(OPENMP)
!$omp parallel private(ila,jla,nrow,ncol)
#endif
call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.)
#if defined(OPENMP)
!$omp critical(sSPINS)
#endif
if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,& if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,&
& mask=(ila(1:nz)>0)) & mask=(ila(1:nz)>0))
#if defined(OPENMP)
!$omp end critical(sSPINS)
#endif
if (info /= psb_success_) then if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_ai_,name,& call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='psb_cdins',i_err=(/info/)) & a_err='psb_cdins',i_err=(/info/))
goto 9999 !goto 9999
end if end if
nrow = desc_a%get_local_rows() nrow = desc_a%get_local_rows()
ncol = desc_a%get_local_cols() ncol = desc_a%get_local_cols()
@ -149,13 +166,12 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
if (info /= psb_success_) then if (info /= psb_success_) then
info=psb_err_from_subroutine_ info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='a%csput') call psb_errpush(info,name,a_err='a%csput')
goto 9999 !goto 9999
end if end if
if (a%is_remote_build()) then if (a%is_remote_build()) then
nnl = count(ila(1:nz)<0) nnl = count(ila(1:nz)<0)
if (nnl > 0) then if (nnl > 0) then
!write(0,*) 'Check on insert ',nnl
allocate(lila(nnl),ljla(nnl),lval(nnl)) allocate(lila(nnl),ljla(nnl),lval(nnl))
k = 0 k = 0
do i=1,nz do i=1,nz
@ -175,8 +191,13 @@ subroutine psb_sspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
else else
info = psb_err_invalid_a_and_cd_state_ info = psb_err_invalid_a_and_cd_state_
call psb_errpush(info,name) call psb_errpush(info,name)
goto 9999 !goto 9999
end if end if
#if defined(OPENMP)
!$omp end parallel
#endif
if (info /= 0) goto 9999
endif endif
else if (desc_a%is_asb()) then else if (desc_a%is_asb()) then

@ -51,6 +51,9 @@
subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local) subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
use psb_base_mod, psb_protect_name => psb_zspins use psb_base_mod, psb_protect_name => psb_zspins
use psi_mod use psi_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none implicit none
!....parameters... !....parameters...
@ -70,7 +73,7 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
integer(psb_ipk_), parameter :: relocsz=200 integer(psb_ipk_), parameter :: relocsz=200
logical :: rebuild_, local_ logical :: rebuild_, local_
integer(psb_ipk_), allocatable :: ila(:),jla(:) integer(psb_ipk_), allocatable :: ila(:),jla(:)
integer(psb_ipk_) :: i,k integer(psb_ipk_) :: i,k, ith, nth
integer(psb_lpk_) :: nnl integer(psb_lpk_) :: nnl
integer(psb_lpk_), allocatable :: lila(:),ljla(:) integer(psb_lpk_), allocatable :: lila(:),ljla(:)
complex(psb_dpk_), allocatable :: lval(:) complex(psb_dpk_), allocatable :: lval(:)
@ -82,7 +85,13 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
ctxt = desc_a%get_context() ctxt = desc_a%get_context()
call psb_info(ctxt, me, np) call psb_info(ctxt, me, np)
#if defined(OPENMP)
nth = omp_get_num_threads()
ith = omp_get_thread_num()
#else
nth = 1
ith = 0
#endif
if (nz < 0) then if (nz < 0) then
info = 1111 info = 1111
call psb_errpush(info,name) call psb_errpush(info,name)
@ -131,15 +140,23 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
& a_err='allocate',i_err=(/info/)) & a_err='allocate',i_err=(/info/))
goto 9999 goto 9999
end if end if
#if defined(OPENMP)
!$omp parallel private(ila,jla,nrow,ncol)
#endif
call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.) call desc_a%indxmap%g2l(ia(1:nz),ila(1:nz),info,owned=.true.)
#if defined(OPENMP)
!$omp critical(zSPINS)
#endif
if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,& if (info == 0) call desc_a%indxmap%g2l_ins(ja(1:nz),jla(1:nz),info,&
& mask=(ila(1:nz)>0)) & mask=(ila(1:nz)>0))
#if defined(OPENMP)
!$omp end critical(zSPINS)
#endif
if (info /= psb_success_) then if (info /= psb_success_) then
call psb_errpush(psb_err_from_subroutine_ai_,name,& call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='psb_cdins',i_err=(/info/)) & a_err='psb_cdins',i_err=(/info/))
goto 9999 !goto 9999
end if end if
nrow = desc_a%get_local_rows() nrow = desc_a%get_local_rows()
ncol = desc_a%get_local_cols() ncol = desc_a%get_local_cols()
@ -149,13 +166,12 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
if (info /= psb_success_) then if (info /= psb_success_) then
info=psb_err_from_subroutine_ info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='a%csput') call psb_errpush(info,name,a_err='a%csput')
goto 9999 !goto 9999
end if end if
if (a%is_remote_build()) then if (a%is_remote_build()) then
nnl = count(ila(1:nz)<0) nnl = count(ila(1:nz)<0)
if (nnl > 0) then if (nnl > 0) then
!write(0,*) 'Check on insert ',nnl
allocate(lila(nnl),ljla(nnl),lval(nnl)) allocate(lila(nnl),ljla(nnl),lval(nnl))
k = 0 k = 0
do i=1,nz do i=1,nz
@ -175,8 +191,13 @@ subroutine psb_zspins(nz,ia,ja,val,a,desc_a,info,rebuild,local)
else else
info = psb_err_invalid_a_and_cd_state_ info = psb_err_invalid_a_and_cd_state_
call psb_errpush(info,name) call psb_errpush(info,name)
goto 9999 !goto 9999
end if end if
#if defined(OPENMP)
!$omp end parallel
#endif
if (info /= 0) goto 9999
endif endif
else if (desc_a%is_asb()) then else if (desc_a%is_asb()) then

@ -451,16 +451,24 @@ contains
call psb_barrier(ctxt) call psb_barrier(ctxt)
t1 = psb_wtime() t1 = psb_wtime()
!$omp parallel private(i,ii,ib,icoeff,glob_row,x,y,z,zt,ix,iy,iz) !$omp parallel shared(deltah,myidx,a,desc_a)
! shared(deltah,myidx,a,desc_a,nb)
! we build an auxiliary matrix consisting of one row at a ! we build an auxiliary matrix consisting of one row at a
! time; just a small matrix. might be extended to generate ! time; just a small matrix. might be extended to generate
! a bunch of rows per call. ! a bunch of rows per call.
! !
block block
integer(psb_ipk_) :: i,j,ii,ib,icoeff, ix,iy,iz, ith,nth
integer(psb_lpk_) :: glob_row
integer(psb_lpk_), allocatable :: irow(:),icol(:) integer(psb_lpk_), allocatable :: irow(:),icol(:)
real(psb_dpk_), allocatable :: val(:) real(psb_dpk_), allocatable :: val(:)
real(psb_dpk_) :: x,y,z, zt(nb)
#if defined(OPENMP)
nth = omp_get_num_threads()
ith = omp_get_thread_num()
#else
nth = 1
ith = 0
#endif
allocate(val(20*nb),irow(20*nb),& allocate(val(20*nb),irow(20*nb),&
&icol(20*nb),stat=info) &icol(20*nb),stat=info)
if (info /= psb_success_ ) then if (info /= psb_success_ ) then
@ -473,7 +481,7 @@ contains
! loop over rows belonging to current process in a block ! loop over rows belonging to current process in a block
! distribution. ! distribution.
!$omp do !$omp do schedule(dynamic,4)
! !
do ii=1, nlr, nb do ii=1, nlr, nb
if (info /= 0) cycle if (info /= 0) cycle
@ -723,7 +731,7 @@ program psb_d_pde3d
if(psb_errstatus_fatal()) goto 9999 if(psb_errstatus_fatal()) goto 9999
name='pde3d90' name='pde3d90'
call psb_set_errverbosity(itwo) call psb_set_errverbosity(itwo)
call psb_cd_set_large_threshold(2000_psb_ipk_) !call psb_cd_set_large_threshold(2000_psb_ipk_)
! !
! Hello world ! Hello world
! !

@ -799,7 +799,6 @@ program psb_d_pde3d
if(psb_errstatus_fatal()) goto 9999 if(psb_errstatus_fatal()) goto 9999
name='pde3d90' name='pde3d90'
call psb_set_errverbosity(itwo) call psb_set_errverbosity(itwo)
call psb_cd_set_large_threshold(2000_psb_ipk_)
! !
! Hello world ! Hello world
! !

Loading…
Cancel
Save