Merge branch 'repackage' into repack-ovrlp

repack-ovrlp
sfilippone 8 months ago
commit f159c7fd4d

@ -215,7 +215,9 @@ contains
end if
if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,idxmap,owned_,info) &
!$omp private(i)
do i=1, size(idx)
if (mask(i)) then
if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then
@ -229,9 +231,11 @@ contains
end if
end if
end do
!$omp end parallel do
else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idx,idxmap,owned_,info) &
!$omp private(i)
do i=1, size(idx)
if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then
idx(i) = idxmap%min_glob_row + idx(i) - 1
@ -243,7 +247,7 @@ contains
info = -1
end if
end do
!$omp end parallel do
end if
end subroutine block_ll2gv1
@ -277,7 +281,9 @@ contains
end if
if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idxin,idxout,idxmap,owned_,info) &
!$omp private(i)
do i=1, im
if (mask(i)) then
if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then
@ -291,9 +297,11 @@ contains
end if
end if
end do
!$omp end parallel do
else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idxin,idxout,idxmap,owned_,info) &
!$omp private(i)
do i=1, im
if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then
idxout(i) = idxmap%min_glob_row + idxin(i) - 1
@ -305,7 +313,7 @@ contains
info = -1
end if
end do
!$omp end parallel do
end if
if (is > im) then
@ -392,6 +400,9 @@ contains
if (present(mask)) then
if (idxmap%is_asb()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,is,idx,idxmap,owned_) &
!$omp private(i,nv,tidx)
do i=1, is
if (mask(i)) then
if ((idxmap%min_glob_row <= idx(i)).and. &
@ -408,7 +419,11 @@ contains
end if
end if
end do
!$omp end parallel do
else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,is,idx,idxmap,owned_) &
!$omp private(i,ip,lip,tidx,info)
do i=1,is
if (mask(i)) then
if ((idxmap%min_glob_row <= idx(i)).and.&
@ -424,8 +439,8 @@ contains
end if
end if
end do
!$omp end parallel do
else
!!$ write(0,*) 'Block status: invalid ',idxmap%get_state()
idx(1:is) = -1
info = -1
end if
@ -433,6 +448,9 @@ contains
else if (.not.present(mask)) then
if (idxmap%is_asb()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(is,idx,idxmap,owned_) &
!$omp private(i,nv,tidx)
do i=1, is
if ((idxmap%min_glob_row <= idx(i)).and.&
& (idx(i) <= idxmap%max_glob_row)) then
@ -447,8 +465,11 @@ contains
idx(i) = -1
end if
end do
!$omp end parallel do
else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(is,idx,idxmap,owned_) &
!$omp private(i,ip,lip,tidx,info)
do i=1,is
if ((idxmap%min_glob_row <= idx(i)).and.&
& (idx(i) <= idxmap%max_glob_row)) then
@ -462,6 +483,7 @@ contains
idx(i) = -1
end if
end do
!$omp end parallel do
else
idx(1:is) = -1
info = -1
@ -953,7 +975,9 @@ contains
end if
info = psb_success_
else
info = -5
write(0,*) 'From has_search_ins:',info,ip,lip,nxt,&
& idxmap%min_glob_row,idxmap%max_glob_row
info = -6
return
end if
idxout(i) = lip + idxmap%local_rows
@ -1131,7 +1155,7 @@ contains
idxmap%global_cols = ntot
idxmap%local_rows = nl
idxmap%local_cols = nl
idxmap%ctxt = ctxt
idxmap%ctxt = ctxt
idxmap%state = psb_desc_bld_
idxmap%mpic = psb_get_mpi_comm(ctxt)
idxmap%min_glob_row = vnl(iam)+1

@ -221,6 +221,9 @@ contains
if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,idxmap,owned_) &
!$omp private(i)
do i=1, size(idx)
if (mask(i)) then
if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then
@ -233,9 +236,12 @@ contains
end if
end if
end do
!$omp end parallel do
else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idx,idxmap,owned_) &
!$omp private(i)
do i=1, size(idx)
if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then
idx(i) = idxmap%loc_to_glob(idx(i))
@ -246,7 +252,7 @@ contains
idx(i) = -1
end if
end do
!$omp end parallel do
end if
end subroutine hash_l2gv1
@ -363,6 +369,9 @@ contains
else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,is,idx,mglob,idxmap,nrm,ncol,nrow,owned_) &
!$omp private(i,ip,lip,tlip,info)
do i = 1, is
if (mask(i)) then
ip = idx(i)
@ -388,7 +397,7 @@ contains
endif
end if
enddo
!$omp end parallel do
else
write(0,*) 'Hash status: invalid ',idxmap%get_state()
idx(1:is) = -1
@ -404,6 +413,9 @@ contains
else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(is,idx,mglob,idxmap,nrm,ncol,nrow,owned_) &
!$omp private(i,ip,lip,tlip,info)
do i = 1, is
ip = idx(i)
if ((ip < 1 ).or.(ip>mglob)) then
@ -427,14 +439,12 @@ contains
idx(i) = lip
endif
enddo
!$omp end parallel do
else
write(0,*) 'Hash status: invalid ',idxmap%get_state()
idx(1:is) = -1
info = -1
end if
end if
end subroutine hash_g2lv1
@ -493,6 +503,9 @@ contains
else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,is,idxin,idxout,mglob,idxmap,nrm,ncol,nrow,owned_) &
!$omp private(i,ip,lip,tlip,info)
do i = 1, is
if (mask(i)) then
ip = idxin(i)
@ -518,6 +531,7 @@ contains
endif
end if
enddo
!$omp end parallel do
else
write(0,*) 'Hash status: invalid ',idxmap%get_state()
idxout(1:is) = -1
@ -533,6 +547,9 @@ contains
else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(is,idxin,idxout,mglob,idxmap,nrm,ncol,nrow,owned_) &
!$omp private(i,ip,lip,tlip,info)
do i = 1, is
ip = idxin(i)
if ((ip < 1 ).or.(ip>mglob)) then
@ -556,14 +573,12 @@ contains
idxout(i) = lip
endif
enddo
!$omp end parallel do
else
write(0,*) 'Hash status: invalid ',idxmap%get_state()
idxout(1:is) = -1
info = -1
end if
end if
end subroutine hash_g2lv2
@ -649,7 +664,7 @@ contains
& err_act
integer(psb_lpk_) :: mglob, ip, nxt, tlip
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: me, np
integer(psb_ipk_) :: me, np,ith
character(len=20) :: name,ch_err
logical, allocatable :: mask_(:)
!!$ logical :: use_openmp = .true.
@ -683,363 +698,243 @@ contains
mglob = idxmap%get_gr()
nrow = idxmap%get_lr()
!write(0,*) me,name,' before loop ',psb_errstatus_fatal()
#ifdef OPENMP
!call OMP_init_lock(ins_lck)
#if defined(OPENMP)
isLoopValid = .true.
if (idxmap%is_bld()) then
isLoopValid = .true.
ncol = idxmap%get_lc()
if (present(mask)) then
mask_ = mask
else
allocate(mask_(size(idx)))
mask_ = .true.
end if
if (present(lidx)) then
if (present(mask)) then
!$omp critical(hash_g2l_ins)
! $ OMP PARALLEL DO default(none) schedule(DYNAMIC) &
! $ OMP shared(name,me,is,idx,ins_lck,mask,mglob,idxmap,ncol,nrow,laddsz,lidx) &
! $ OMP private(i,ip,lip,tlip,nxt,info) &
! $ OMP reduction(.AND.:isLoopValid)
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(lidx,mask,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) &
!$omp private(i,ip,lip,tlip,nxt,info) &
!$omp reduction(.AND.:isLoopValid)
do i = 1, is
info = 0
if (.not. isLoopValid) cycle
if (mask(i)) then
ip = idx(i)
if ((ip < 1 ).or.(ip>mglob)) then
if ((ip < 1 ).or.(ip>mglob) ) then
idx(i) = -1
cycle
endif
!call OMP_set_lock(ins_lck)
ncol = idxmap%get_lc()
!call OMP_unset_lock(ins_lck)
! At first, we check the index presence in 'idxmap'. Usually
! the index is found. If it is not found, we repeat the checking,
! but inside a critical region.
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
if (lip < 0) then
!call OMP_set_lock(ins_lck)
! We check again if the index is already in 'idxmap', this
! time inside a critical region (we assume that the index
! is often already existing).
ncol = idxmap%get_lc()
nxt = lidx(i)
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
if (lip > 0) then
idx(i) = lip
else if (lip < 0) then
! Index not found
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info)
lip = tlip
if (info >= 0) then
! 'nxt' is not equal to 'tlip' when the key is already inside
! the hash map. In that case 'tlip' is the value corresponding
! to the existing mapping.
if (nxt == tlip) then
ncol = MAX(ncol,nxt)
call psb_ensure_size(ncol,idxmap%loc_to_glob,info,&
& pad=-1_psb_lpk_,addsz=laddsz)
if (info /= psb_success_) then
!write(0,*) 'Error spot 1'
call psb_errpush(psb_err_from_subroutine_ai_,name,&
&a_err='psb_ensure_size',i_err=(/info/))
isLoopValid = .false.
idx(i) = -1
else
idx(i) = lip
idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(ncol)
end if
end if
if (lip > 0) then
idx(i) = lip
info = psb_success_
else
!$omp critical(hash_g2l_ins)
tlip = lip
nxt = lidx(i)
if (nxt <= nrow) then
idx(i) = -1
else
call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,&
& idxmap%glb_lc,ncol)
if (lip > 0) then
idx(i) = lip
else
idx(i) = -1
call psb_hash_searchinskey(ip,lip,nxt,idxmap%hash,info)
if (info >=0) then
if (nxt == lip) then
call psb_ensure_size(nxt,idxmap%loc_to_glob,info,&
& pad=-1_psb_lpk_,addsz=laddsz)
if (info /= psb_success_) then
info=1
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='psb_ensure_size',i_err=(/info/))
isLoopValid = .false.
end if
idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(max(ncol,nxt))
endif
idx(i) = lip
info = psb_success_
else
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='SearchInsKeyVal',i_err=(/info/))
isLoopValid = .false.
end if
end if
!call OMP_unset_lock(ins_lck)
end if
else
idx(i) = lip
endif
!$omp end critical(hash_g2l_ins)
end if
else
idx(i) = -1
end if
enddo
!$omp end parallel do
end do
! $ OMP END PARALLEL DO
!$omp end critical(hash_g2l_ins)
if (.not. isLoopValid) then
goto 9999
end if
else
!$omp critical(hash_g2l_ins)
else if (.not.present(mask)) then
! $ OMP PARALLEL DO default(none) schedule(DYNAMIC) &
! $ OMP shared(name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz,lidx) &
! $ OMP private(i,ip,lip,tlip,nxt,info) &
! $ OMP reduction(.AND.:isLoopValid)
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(lidx,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) &
!$omp private(i,ip,lip,tlip,nxt,info) &
!$omp reduction(.AND.:isLoopValid)
do i = 1, is
info = 0
if (.not. isLoopValid) cycle
ip = idx(i)
if ((ip < 1 ).or.(ip>mglob)) then
if ((ip < 1 ).or.(ip>mglob) ) then
idx(i) = -1
cycle
endif
!call OMP_set_lock(ins_lck)
ncol = idxmap%get_lc()
!call OMP_unset_lock(ins_lck)
! At first, we check the index presence in 'idxmap'. Usually
! the index is found. If it is not found, we repeat the checking,
! but inside a critical region.
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
if (lip < 0) then
!call OMP_set_lock(ins_lck)
! We check again if the index is already in 'idxmap', this
! time inside a critical region (we assume that the index
! is often already existing).
ncol = idxmap%get_lc()
nxt = lidx(i)
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
if (lip > 0) then
idx(i) = lip
else if (lip < 0) then
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info)
lip = tlip
if (info >= 0) then
! 'nxt' is not equal to 'tlip' when the key is already inside
! the hash map. In that case 'tlip' is the value corresponding
! to the existing mapping.
if (nxt == tlip) then
ncol = MAX(ncol,nxt)
call psb_ensure_size(ncol,idxmap%loc_to_glob,info,&
& pad=-1_psb_lpk_,addsz=laddsz)
if (info /= psb_success_) then
!write(0,*) 'Error spot 2'
call psb_errpush(psb_err_from_subroutine_ai_,name,&
&a_err='psb_ensure_size',i_err=(/info/))
isLoopValid = .false.
idx(i) = -1
else
idx(i) = lip
idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(ncol)
end if
end if
if (lip > 0) then
idx(i) = lip
info = psb_success_
else
!$omp critical(hash_g2l_ins)
tlip = lip
nxt = lidx(i)
if (nxt <= nrow) then
idx(i) = -1
else
call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,&
& idxmap%glb_lc,ncol)
if (lip > 0) then
idx(i) = lip
else
idx(i) = -1
call psb_hash_searchinskey(ip,lip,nxt,idxmap%hash,info)
if (info >=0) then
if (nxt == lip) then
call psb_ensure_size(nxt,idxmap%loc_to_glob,info,&
& pad=-1_psb_lpk_,addsz=laddsz)
if (info /= psb_success_) then
info=1
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='psb_ensure_size',i_err=(/info/))
isLoopValid = .false.
end if
idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(max(ncol,nxt))
endif
idx(i) = lip
info = psb_success_
else
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='SearchInsKeyVal',i_err=(/info/))
isLoopValid = .false.
end if
end if
!call OMP_unset_lock(ins_lck)
end if
else
idx(i) = lip
endif
!$omp end critical(hash_g2l_ins)
end if
end do
! $ OMP END PARALLEL DO
!$omp end critical(hash_g2l_ins)
if (.not. isLoopValid) then
goto 9999
end if
enddo
!$omp end parallel do
end if
else if (.not.present(lidx)) then
if(present(mask)) then
! $ OMP PARALLEL DO default(none) schedule(DYNAMIC) &
! $ OMP shared(name,me,is,idx,ins_lck,mask,mglob,idxmap,ncol,nrow,laddsz) &
! $ OMP private(i,ip,lip,tlip,nxt,info) &
! $ OMP reduction(.AND.:isLoopValid)
!$omp critical(hash_g2l_ins)
if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) &
!$omp private(i,ip,lip,tlip,nxt,info) &
!$omp reduction(.AND.:isLoopValid)
do i = 1, is
info = 0
if (.not. isLoopValid) cycle
if (mask(i)) then
ip = idx(i)
ip = idx(i)
if ((ip < 1 ).or.(ip>mglob)) then
idx(i) = -1
cycle
endif
!call OMP_set_lock(ins_lck)
ncol = idxmap%get_lc()
!call OMP_unset_lock(ins_lck)
! At first, we check the index presence in 'idxmap'. Usually
! the index is found. If it is not found, we repeat the checking,
! but inside a critical region.
!write(0,*) me,name,' b hic 1 ',psb_errstatus_fatal()
ncol = idxmap%get_lc()
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
!write(0,*) me,name,' a hic 1 ',psb_errstatus_fatal()
if (lip < 0) then
!call OMP_set_lock(ins_lck)
! We check again if the index is already in 'idxmap', this
! time inside a critical region (we assume that the index
! is often already existing, so this lock is relatively rare).
ncol = idxmap%get_lc()
nxt = ncol + 1
!write(0,*) me,name,' b hic 2 ',psb_errstatus_fatal()
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
!write(0,*) me,name,' a hic 2 ',psb_errstatus_fatal()
if (lip > 0) then
idx(i) = lip
info = psb_success_
else
!$omp critical(hash_g2l_ins)
ncol = idxmap%get_lc()
nxt = ncol + 1
call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,&
& idxmap%glb_lc,ncol)
if (lip > 0) then
idx(i) = lip
else if (lip < 0) then
! Index not found
!write(0,*) me,name,' b hsik ',psb_errstatus_fatal()
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info)
if (psb_errstatus_fatal()) write(0,*) me,name,' a hsik ',info,omp_get_thread_num()
!write(0,*) me,name,' a hsik ',psb_errstatus_fatal()
lip = tlip
if (info >= 0) then
!write(0,*) 'Error before spot 3', info
! 'nxt' is not equal to 'tlip' when the key is already inside
! the hash map. In that case 'tlip' is the value corresponding
! to the existing mapping.
if (nxt == tlip) then
ncol = MAX(ncol,nxt)
call psb_ensure_size(ncol,idxmap%loc_to_glob,info,&
else
call psb_hash_searchinskey(ip,lip,nxt,idxmap%hash,info)
if (info >=0) then
if (nxt == lip) then
call psb_ensure_size(nxt,idxmap%loc_to_glob,info,&
& pad=-1_psb_lpk_,addsz=laddsz)
if (psb_errstatus_fatal()) write(0,*) me,name,' a esz ',info,omp_get_thread_num()
if (info /= psb_success_) then
!write(0,*) 'Error spot 3', info
info=1
call psb_errpush(psb_err_from_subroutine_ai_,name,&
&a_err='psb_ensure_size',i_err=(/info/))
& a_err='psb_ensure_size',i_err=(/info/))
isLoopValid = .false.
idx(i) = -1
else
idx(i) = lip
idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(ncol)
end if
end if
idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(nxt)
endif
idx(i) = lip
info = psb_success_
else
idx(i) = -1
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='SearchInsKeyVal',i_err=(/info/))
isLoopValid = .false.
end if
!call OMP_unset_lock(ins_lck)
end if
else
idx(i) = lip
!$omp end critical(hash_g2l_ins)
end if
else
idx(i) = -1
end if
enddo
!$omp end parallel do
end do
! $ OMP END PARALLEL DO
!$omp end critical(hash_g2l_ins)
else if (.not.present(mask)) then
if (.not. isLoopValid) then
goto 9999
end if
else
! $ OMP PARALLEL DO default(none) schedule(DYNAMIC) &
! $ OMP shared(name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) &
! $ OMP private(i,ip,lip,tlip,nxt,info) &
! $ OMP reduction(.AND.:isLoopValid)
!$omp critical(hash_g2l_ins)
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) &
!$omp private(i,ip,lip,tlip,nxt,info) &
!$omp reduction(.AND.:isLoopValid)
do i = 1, is
info = 0
if (.not. isLoopValid) cycle
ip = idx(i)
ip = idx(i)
if ((ip < 1 ).or.(ip>mglob)) then
idx(i) = -1
cycle
endif
!call OMP_set_lock(ins_lck)
ncol = idxmap%get_lc()
!call OMP_unset_lock(ins_lck)
! At first, we check the index presence in 'idxmap'. Usually
! the index is found. If it is not found, we repeat the checking,
! but inside a critical region.
ncol = idxmap%get_lc()
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
if (lip < 0) then
!call OMP_set_lock(ins_lck)
! We check again if the index is already in 'idxmap', this
! time inside a critical region (we assume that the index
! is often already existing).
ncol = idxmap%get_lc()
nxt = ncol + 1
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
if (lip > 0) then
idx(i) = lip
info = psb_success_
else
!$omp critical(hash_g2l_ins)
ncol = idxmap%get_lc()
nxt = ncol + 1
call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,&
& idxmap%glb_lc,ncol)
if (lip > 0) then
idx(i) = lip
else if (lip < 0) then
! Index not found
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info)
lip = tlip
if (info >= 0) then
! 'nxt' is not equal to 'tlip' when the key is already inside
! the hash map. In that case 'tlip' is the value corresponding
! to the existing mapping.
if (nxt == tlip) then
ncol = MAX(ncol,nxt)
call psb_ensure_size(ncol,idxmap%loc_to_glob,info,&
else
call psb_hash_searchinskey(ip,lip,nxt,idxmap%hash,info)
if (info >=0) then
if (nxt == lip) then
call psb_ensure_size(nxt,idxmap%loc_to_glob,info,&
& pad=-1_psb_lpk_,addsz=laddsz)
if (info /= psb_success_) then
!write(0,*) 'Error spot 4'
info=1
call psb_errpush(psb_err_from_subroutine_ai_,name,&
&a_err='psb_ensure_size',i_err=(/info/))
& a_err='psb_ensure_size',i_err=(/info/))
isLoopValid = .false.
idx(i) = -1
else
idx(i) = lip
idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(ncol)
end if
end if
idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(nxt)
endif
idx(i) = lip
info = psb_success_
else
idx(i) = -1
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='SearchInsKeyVal',i_err=(/info/))
isLoopValid = .false.
end if
!call OMP_unset_lock(ins_lck)
end if
else
idx(i) = lip
!$omp end critical(hash_g2l_ins)
end if
end do
! $ OMP END PARALLEL DO
!$omp end critical(hash_g2l_ins)
if (.not. isLoopValid) then
goto 9999
end if
enddo
!$omp end parallel do
end if
end if
else
@ -1047,7 +942,7 @@ contains
idx = -1
info = -1
end if
!call OMP_destroy_lock(ins_lck)
if (.not. isLoopValid) goto 9999
#else
!!$ else if (.not.use_openmp) then
isLoopValid = .true.
@ -1066,13 +961,13 @@ contains
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
if (lip < 0) then
tlip = lip
nxt = lidx(i)
if (nxt <= nrow) then
idx(i) = -1
cycle
endif
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info)
lip = tlip
if (info >=0) then
if (nxt == tlip) then
ncol = max(ncol,nxt)
@ -1747,6 +1642,9 @@ contains
! for a width of psb_hash_bits
!
if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(n,hashv,hashmask,x,glb_lc,nrm,mask) &
!$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm)
do i=1, n
if (mask(i)) then
key = x(i)
@ -1784,7 +1682,11 @@ contains
end if
end if
end do
!$omp end parallel do
else
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(n,hashv,hashmask,x,glb_lc,nrm) &
!$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm)
do i=1, n
key = x(i)
ih = iand(key,hashmask)
@ -1820,6 +1722,7 @@ contains
x(i) = tmp
end if
end do
!$omp end parallel do
end if
end subroutine hash_inner_cnv1
@ -1842,6 +1745,9 @@ contains
! for a width of psb_hash_bits
!
if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(n,hashv,hashmask,x,y,glb_lc,nrm,mask,psb_err_unit) &
!$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm)
do i=1, n
if (mask(i)) then
key = x(i)
@ -1882,9 +1788,12 @@ contains
end if
end if
end do
!$omp end parallel do
else
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(n,hashv,hashmask,x,y,glb_lc,nrm,psb_err_unit) &
!$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm)
do i=1, n
key = x(i)
ih = iand(key,hashmask)
@ -1923,6 +1832,7 @@ contains
y(i) = tmp
end if
end do
!$omp end parallel do
end if
end subroutine hash_inner_cnv2

@ -383,12 +383,12 @@ contains
integer(psb_lpk_), intent(out) :: val
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: hsize,hmask, hk, hd
integer(psb_ipk_) :: hsize,hmask, hk, hd, i
logical :: redo
info = HashOK
hsize = hash%hsize
hmask = hash%hmask
val = -1
hk = iand(psb_hashval(key),hmask)
if (hk == 0) then
hd = 1
@ -400,56 +400,57 @@ contains
info = HashOutOfMemory
return
end if
val = -1
!$omp atomic
hash%nsrch = hash%nsrch + 1
!$omp end atomic
do
!$omp atomic
hash%nacc = hash%nacc + 1
!$omp end atomic
if (hash%table(hk,1) == key) then
val = hash%table(hk,2)
info = HashDuplicate
!write(0,*) 'In searchinskey 1 : ', info, HashDuplicate
return
end if
redo = .false.
!$omp critical(hashsearchins)
if (hash%table(hk,1) == key) then
val = hash%table(hk,2)
info = HashDuplicate
else
if (hash%table(hk,1) == HashFreeEntry) then
if (hash%nk == hash%hsize -1) then
!
! Note: because of the way we allocate things at CDALL
! time this is really unlikely; if we get here, we
! have at least as many halo indices as internals, which
! means we're already in trouble. But we try to keep going.
!
call psb_hash_realloc(hash,info)
if (info /= HashOk) then
info = HashOutOfMemory
!return
else
call psb_hash_searchinskey(key,val,nextval,hash,info)
!return
end if
if (hash%table(hk,1) == HashFreeEntry) then
if (hash%nk == hash%hsize -1) then
!
! Note: because of the way we allocate things at CDALL
! time this is really unlikely; if we get here, we
! have at least as many halo indices as internals, which
! means we're already in trouble. But we try to keep going.
!
call psb_hash_realloc(hash,info)
if (info /= HashOk) then
info = HashOutOfMemory
else
hash%nk = hash%nk + 1
hash%table(hk,1) = key
hash%table(hk,2) = nextval
val = nextval
!return
redo = .true.
end if
else
hash%nk = hash%nk + 1
hash%table(hk,1) = key
hash%table(hk,2) = nextval
val = nextval
info = HashOk
end if
else if (hash%table(hk,1) == key) then
val = hash%table(hk,2)
info = HashDuplicate
else
info = HashNotFound
end if
!$omp end critical(hashsearchins)
if (info /= HashOk) then
write(0,*) 'In searchinskey 2: ', info
if (redo) then
call psb_hash_searchinskey(key,val,nextval,hash,info)
return
end if
if (val > 0) return
if (val > 0) exit
hk = hk - hd
if (hk < 0) hk = hk + hsize
end do
!write(0,*) 'In searchinskey 3: ', info
end subroutine psb_hash_lsearchinskey
recursive subroutine psb_hash_isearchinskey(key,val,nextval,hash,info)
@ -459,6 +460,7 @@ contains
integer(psb_ipk_) :: hsize,hmask, hk, hd
logical :: redo
info = HashOK
hsize = hash%hsize
hmask = hash%hmask
@ -475,16 +477,21 @@ contains
return
end if
val = -1
val = -1
!$omp atomic
hash%nsrch = hash%nsrch + 1
!$omp end atomic
do
!$omp atomic
hash%nacc = hash%nacc + 1
!$omp end atomic
if (hash%table(hk,1) == key) then
val = hash%table(hk,2)
info = HashDuplicate
return
end if
redo = .false.
!$OMP CRITICAL
!$omp critical(hashsearchins)
if (hash%table(hk,1) == HashFreeEntry) then
if (hash%nk == hash%hsize -1) then
!
@ -496,24 +503,28 @@ contains
call psb_hash_realloc(hash,info)
if (info /= HashOk) then
info = HashOutOfMemory
!return
else
redo = .true.
!!$ call psb_hash_searchinskey(key,val,nextval,hash,info)
!!$ return
end if
else
hash%nk = hash%nk + 1
hash%table(hk,1) = key
hash%table(hk,2) = nextval
val = nextval
!return
info = HashOk
end if
else if (hash%table(hk,1) == key) then
val = hash%table(hk,2)
info = HashDuplicate
else
info = HashNotFound
end if
!$OMP END CRITICAL
if (redo) call psb_hash_searchinskey(key,val,nextval,hash,info)
if (info /= HashOk) return
if (val > 0) return
!$omp end critical(hashsearchins)
if (redo) then
call psb_hash_searchinskey(key,val,nextval,hash,info)
return
end if
if (val > 0) exit
hk = hk - hd
if (hk < 0) hk = hk + hsize
end do
@ -551,7 +562,7 @@ contains
end if
if (hash%table(hk,1) == HashFreeEntry) then
val = HashFreeEntry
! !$ info = HashNotFound
info = HashNotFound
return
end if
hk = hk - hd
@ -591,7 +602,7 @@ contains
end if
if (hash%table(hk,1) == HashFreeEntry) then
val = HashFreeEntry
! !$ info = HashNotFound
info = HashNotFound
return
end if
hk = hk - hd

@ -158,6 +158,7 @@ module psb_indx_map_mod
procedure, pass(idxmap) :: set_lr => base_set_lr
procedure, pass(idxmap) :: set_lc => base_set_lc
procedure, pass(idxmap) :: inc_lc => base_inc_lc
procedure, pass(idxmap) :: set_p_adjcncy => base_set_p_adjcncy
procedure, pass(idxmap) :: xtnd_p_adjcncy => base_xtnd_p_adjcncy
@ -235,7 +236,7 @@ module psb_indx_map_mod
& base_get_gr, base_get_gc, base_get_lr, base_get_lc, base_get_ctxt,&
& base_get_mpic, base_sizeof, base_set_null, &
& base_set_grl, base_set_gcl, &
& base_set_lr, base_set_lc, base_set_ctxt,&
& base_set_lr, base_set_lc, base_inc_lc, base_set_ctxt,&
& base_set_mpic, base_get_fmt, base_asb, base_free,&
& base_l2gs1, base_l2gs2, base_l2gv1, base_l2gv2,&
& base_g2ls1, base_g2ls2, base_g2lv1, base_g2lv2,&
@ -573,6 +574,14 @@ contains
idxmap%local_cols = val
end subroutine base_set_lc
subroutine base_inc_lc(idxmap)
implicit none
class(psb_indx_map), intent(inout) :: idxmap
!$omp atomic
idxmap%local_cols = idxmap%local_cols + 1
!$omp end atomic
end subroutine base_inc_lc
subroutine base_set_p_adjcncy(idxmap,val)
use psb_realloc_mod
use psb_sort_mod

@ -179,6 +179,9 @@ contains
if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,idxmap,owned_) &
!$omp private(i)
do i=1, size(idx)
if (mask(i)) then
if ((1<=idx(i)).and.(idx(i) <= idxmap%get_lr())) then
@ -191,9 +194,12 @@ contains
end if
end if
end do
!$omp end parallel do
else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idx,idxmap,owned_) &
!$omp private(i)
do i=1, size(idx)
if ((1<=idx(i)).and.(idx(i) <= idxmap%get_lr())) then
idx(i) = idxmap%loc_to_glob(idx(i))
@ -204,6 +210,7 @@ contains
idx(i) = -1
end if
end do
!$omp end parallel do
end if
@ -298,6 +305,9 @@ contains
if (present(mask)) then
if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,is,idx,idxmap,owned_) &
!$omp private(i,ix)
do i=1,is
if (mask(i)) then
if ((1 <= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
@ -309,6 +319,7 @@ contains
end if
end if
end do
!$omp end parallel do
else
idx(1:is) = -1
info = -1
@ -317,6 +328,9 @@ contains
else if (.not.present(mask)) then
if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(is,idx,idxmap,owned_) &
!$omp private(i,ix)
do i=1, is
if ((1 <= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
ix = idxmap%glob_to_loc(idx(i))
@ -326,6 +340,7 @@ contains
idx(i) = -1
end if
end do
!$omp end parallel do
else
idx(1:is) = -1
info = -1
@ -365,6 +380,9 @@ contains
if (present(mask)) then
if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,is,idxin,idxout,idxmap,owned_) &
!$omp private(i,ix)
do i=1,is
if (mask(i)) then
if ((1 <= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
@ -376,6 +394,7 @@ contains
end if
end if
end do
!$omp end parallel do
else
idxout(1:is) = -1
info = -1
@ -384,6 +403,9 @@ contains
else if (.not.present(mask)) then
if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(is,idxin,idxout,idxmap,owned_) &
!$omp private(i,ix)
do i=1, is
if ((1 <= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
ix = idxmap%glob_to_loc(idxin(i))
@ -393,6 +415,7 @@ contains
idxout(i) = -1
end if
end do
!$omp end parallel do
else
idxout(1:is) = -1
info = -1
@ -541,6 +564,10 @@ contains
else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,is,idx,idxmap,laddsz,lidx) &
!$omp private(i,ix,info)
! $ o m p reduction(.AND.:isLoopValid)
do i=1, is
if (info /= 0) cycle
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
@ -579,8 +606,8 @@ contains
idx(i) = -1
end if
end do
!$omp end parallel do
end if
else if (.not.present(lidx)) then
if (present(mask)) then

@ -169,7 +169,9 @@ contains
end if
if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,idxmap) &
!$omp private(i)
do i=1, size(idx)
if (mask(i)) then
if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then
@ -179,9 +181,11 @@ contains
end if
end if
end do
!$omp end parallel do
else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idx,idxmap) &
!$omp private(i)
do i=1, size(idx)
if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then
! do nothing
@ -189,7 +193,7 @@ contains
idx(i) = -1
end if
end do
!$omp end parallel do
end if
end subroutine repl_l2gv1
@ -223,7 +227,9 @@ contains
end if
if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idxin,idxout,idxmap) &
!$omp private(i)
do i=1, im
if (mask(i)) then
if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then
@ -233,9 +239,11 @@ contains
end if
end if
end do
!$omp end parallel do
else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idxin,idxout,idxmap) &
!$omp private(i)
do i=1, im
if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then
idxout(i) = idxin(i)
@ -243,7 +251,7 @@ contains
idxout(i) = -1
end if
end do
!$omp end parallel do
end if
if (is > im) info = -3
@ -324,6 +332,9 @@ contains
if (present(mask)) then
if (idxmap%is_asb()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,idxmap) &
!$omp private(i)
do i=1, is
if (mask(i)) then
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
@ -333,7 +344,11 @@ contains
end if
end if
end do
!$omp end parallel do
else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,idxmap) &
!$omp private(i)
do i=1,is
if (mask(i)) then
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
@ -344,6 +359,7 @@ contains
end if
end if
end do
!$omp end parallel do
else
idx(1:is) = -1
info = -1
@ -352,6 +368,9 @@ contains
else if (.not.present(mask)) then
if (idxmap%is_asb()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idx,idxmap) &
!$omp private(i)
do i=1, is
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
! do nothing
@ -359,7 +378,11 @@ contains
idx(i) = -1
end if
end do
!$omp end parallel do
else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idx,idxmap) &
!$omp private(i)
do i=1,is
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
! do nothing
@ -367,6 +390,7 @@ contains
idx(i) = -1
end if
end do
!$omp end parallel do
else
idx(1:is) = -1
info = -1
@ -409,6 +433,9 @@ contains
if (present(mask)) then
if (idxmap%is_asb()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idxin,idxout,idxmap) &
!$omp private(i)
do i=1, im
if (mask(i)) then
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
@ -418,7 +445,11 @@ contains
end if
end if
end do
!$omp end parallel do
else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idxin,idxout,idxmap) &
!$omp private(i)
do i=1,im
if (mask(i)) then
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
@ -428,6 +459,7 @@ contains
end if
end if
end do
!$omp end parallel do
else
idxout(1:im) = -1
info = -1
@ -436,6 +468,9 @@ contains
else if (.not.present(mask)) then
if (idxmap%is_asb()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idxin,idxout,idxmap) &
!$omp private(i)
do i=1, im
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
idxout(i) = idxin(i)
@ -443,7 +478,11 @@ contains
idxout(i) = -1
end if
end do
!$omp end parallel do
else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idxin,idxout,idxmap) &
!$omp private(i)
do i=1,im
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
idxout(i) = idxin(i)
@ -451,6 +490,7 @@ contains
idxout(i) = -1
end if
end do
!$omp end parallel do
else
idxout(1:im) = -1
info = -1
@ -557,6 +597,9 @@ contains
else if (idxmap%is_valid()) then
if (present(lidx)) then
if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,lidx,is,idxmap) &
!$omp private(i)
do i=1, is
if (mask(i)) then
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
@ -566,9 +609,11 @@ contains
end if
end if
end do
!$omp end parallel do
else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,lidx,is,idxmap) &
!$omp private(i)
do i=1, is
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
! do nothing
@ -576,9 +621,13 @@ contains
idx(i) = -1
end if
end do
!$omp end parallel do
end if
else if (.not.present(lidx)) then
if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,is,idxmap) &
!$omp private(i)
do i=1, is
if (mask(i)) then
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
@ -588,8 +637,11 @@ contains
end if
end if
end do
!$omp end parallel do
else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idx,is,idxmap) &
!$omp private(i)
do i=1, is
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
! do nothing
@ -597,6 +649,7 @@ contains
idx(i) = -1
end if
end do
!$omp end parallel do
end if
end if
else
@ -644,6 +697,9 @@ contains
else if (idxmap%is_valid()) then
if (present(lidx)) then
if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idxin,idxout,im,idxmap) &
!$omp private(i)
do i=1, im
if (mask(i)) then
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
@ -653,9 +709,11 @@ contains
end if
end if
end do
!$omp end parallel do
else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idxin,idxout,im,idxmap) &
!$omp private(i)
do i=1, im
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
idxout(i) = idxin(i)
@ -663,9 +721,13 @@ contains
idxout(i) = -1
end if
end do
!$omp end parallel do
end if
else if (.not.present(lidx)) then
if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idxin,idxout,im,idxmap) &
!$omp private(i)
do i=1, im
if (mask(i)) then
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
@ -675,8 +737,11 @@ contains
end if
end if
end do
!$omp end parallel do
else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idxin,idxout,im,idxmap) &
!$omp private(i)
do i=1, im
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
idxout(i) = idxin(i)
@ -684,6 +749,7 @@ contains
idxout(i) = -1
end if
end do
!$omp end parallel do
end if
end if
else

@ -2051,8 +2051,8 @@ subroutine psb_c_base_vect_mv(alpha,a,x,beta,y,info,trans)
! For the time being we just throw everything back
! onto the normal routines.
call x%sync()
call y%sync()
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
call a%spmm(alpha,x%v,beta,y%v,info,trans)
call y%set_host()
end subroutine psb_c_base_vect_mv
@ -2105,8 +2105,6 @@ subroutine psb_c_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d)
goto 9999
end if
call x%sync()
call y%sync()
if (present(d)) then
call d%sync()
if (present(scale)) then
@ -2206,8 +2204,11 @@ subroutine psb_c_base_inner_vect_sv(alpha,a,x,beta,y,info,trans)
info = psb_success_
call psb_erractionsave(err_act)
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
call a%inner_spsm(alpha,x%v,beta,y%v,info,trans)
call y%set_host()
if (info /= psb_success_) then
info = psb_err_from_subroutine_

@ -2171,7 +2171,7 @@ subroutine psb_c_mv_csc_to_coo(a,b,info)
nr = a%get_nrows()
nc = a%get_ncols()
nza = a%get_nzeros()
nza = max(a%get_nzeros(),ione)
b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat
call b%set_nzeros(a%get_nzeros())
@ -2336,7 +2336,7 @@ subroutine psb_c_cp_csc_to_fmt(a,b,info)
if (a%is_dev()) call a%sync()
b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat
nc = a%get_ncols()
nz = a%get_nzeros()
nz = max(a%get_nzeros(),ione)
if (.false.) then
if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info)
if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info)
@ -2469,7 +2469,7 @@ subroutine psb_c_cp_csc_from_fmt(a,b,info)
if (b%is_dev()) call b%sync()
a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat
nc = b%get_ncols()
nz = b%get_nzeros()
nz = max(b%get_nzeros(),ione)
if (.false.) then
if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info)
if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info)
@ -4066,7 +4066,7 @@ subroutine psb_lc_mv_csc_to_coo(a,b,info)
nr = a%get_nrows()
nc = a%get_ncols()
nza = a%get_nzeros()
nza = max(a%get_nzeros(),ione)
b%psb_lc_base_sparse_mat = a%psb_lc_base_sparse_mat
call b%set_nzeros(a%get_nzeros())

@ -3819,6 +3819,7 @@ contains
integer(psb_ipk_) :: ma, nb
integer(psb_ipk_), allocatable :: col_inds(:), offsets(:)
integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx
integer(psb_ipk_) :: nth, lth,ith
ma = a%get_nrows()
nb = b%get_ncols()
@ -3829,12 +3830,19 @@ contains
! dense accumulator
! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf
call psb_realloc(nb, acc, info)
!$omp parallel shared(nth,lth)
!$omp single
nth = omp_get_num_threads()
lth = min(nth, ma)
!$omp end single
!$omp end parallel
allocate(offsets(omp_get_max_threads()))
!$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) &
!$omp shared(a,b,c,offsets)
!$omp num_threads(lth) shared(a,b,c,offsets)
thread_upperbound = 0
start_idx = 0
end_idx = 0
!$omp do schedule(static) private(irw, jj, j)
do irw = 1, ma
if (start_idx == 0) then
@ -3890,15 +3898,14 @@ contains
!$omp end single
!$omp barrier
if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
!$omp barrier
!$omp single
@ -3906,9 +3913,10 @@ contains
call psb_realloc(c%irp(ma + 1), c%val, info)
call psb_realloc(c%irp(ma + 1), c%ja, info)
!$omp end single
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
end if
!$omp end parallel
end subroutine spmm_omp_gustavson
@ -3944,6 +3952,7 @@ contains
!$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets)
thread_upperbound = 0
start_idx = 0
end_idx = 0
!$omp do schedule(static) private(irw, jj, j)
do irw = 1, ma
do jj = a%irp(irw), a%irp(irw + 1) - 1
@ -4010,14 +4019,14 @@ contains
!$omp barrier
if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
!$omp barrier
!$omp single
@ -4025,9 +4034,10 @@ contains
call psb_realloc(c%irp(ma + 1), c%val, info)
call psb_realloc(c%irp(ma + 1), c%ja, info)
!$omp end single
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
end if
!$omp end parallel
end subroutine spmm_omp_gustavson_1d
@ -6296,7 +6306,7 @@ subroutine psb_lc_mv_csr_to_coo(a,b,info)
if (a%is_dev()) call a%sync()
nr = a%get_nrows()
nc = a%get_ncols()
nza = a%get_nzeros()
nza = max(a%get_nzeros(),ione)
b%psb_lc_base_sparse_mat = a%psb_lc_base_sparse_mat
call b%set_nzeros(a%get_nzeros())

@ -2051,8 +2051,8 @@ subroutine psb_d_base_vect_mv(alpha,a,x,beta,y,info,trans)
! For the time being we just throw everything back
! onto the normal routines.
call x%sync()
call y%sync()
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
call a%spmm(alpha,x%v,beta,y%v,info,trans)
call y%set_host()
end subroutine psb_d_base_vect_mv
@ -2105,8 +2105,6 @@ subroutine psb_d_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d)
goto 9999
end if
call x%sync()
call y%sync()
if (present(d)) then
call d%sync()
if (present(scale)) then
@ -2206,8 +2204,11 @@ subroutine psb_d_base_inner_vect_sv(alpha,a,x,beta,y,info,trans)
info = psb_success_
call psb_erractionsave(err_act)
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
call a%inner_spsm(alpha,x%v,beta,y%v,info,trans)
call y%set_host()
if (info /= psb_success_) then
info = psb_err_from_subroutine_

@ -2171,7 +2171,7 @@ subroutine psb_d_mv_csc_to_coo(a,b,info)
nr = a%get_nrows()
nc = a%get_ncols()
nza = a%get_nzeros()
nza = max(a%get_nzeros(),ione)
b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat
call b%set_nzeros(a%get_nzeros())
@ -2336,7 +2336,7 @@ subroutine psb_d_cp_csc_to_fmt(a,b,info)
if (a%is_dev()) call a%sync()
b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat
nc = a%get_ncols()
nz = a%get_nzeros()
nz = max(a%get_nzeros(),ione)
if (.false.) then
if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info)
if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info)
@ -2469,7 +2469,7 @@ subroutine psb_d_cp_csc_from_fmt(a,b,info)
if (b%is_dev()) call b%sync()
a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat
nc = b%get_ncols()
nz = b%get_nzeros()
nz = max(b%get_nzeros(),ione)
if (.false.) then
if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info)
if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info)
@ -4066,7 +4066,7 @@ subroutine psb_ld_mv_csc_to_coo(a,b,info)
nr = a%get_nrows()
nc = a%get_ncols()
nza = a%get_nzeros()
nza = max(a%get_nzeros(),ione)
b%psb_ld_base_sparse_mat = a%psb_ld_base_sparse_mat
call b%set_nzeros(a%get_nzeros())

@ -3819,6 +3819,7 @@ contains
integer(psb_ipk_) :: ma, nb
integer(psb_ipk_), allocatable :: col_inds(:), offsets(:)
integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx
integer(psb_ipk_) :: nth, lth,ith
ma = a%get_nrows()
nb = b%get_ncols()
@ -3829,12 +3830,19 @@ contains
! dense accumulator
! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf
call psb_realloc(nb, acc, info)
!$omp parallel shared(nth,lth)
!$omp single
nth = omp_get_num_threads()
lth = min(nth, ma)
!$omp end single
!$omp end parallel
allocate(offsets(omp_get_max_threads()))
!$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) &
!$omp shared(a,b,c,offsets)
!$omp num_threads(lth) shared(a,b,c,offsets)
thread_upperbound = 0
start_idx = 0
end_idx = 0
!$omp do schedule(static) private(irw, jj, j)
do irw = 1, ma
if (start_idx == 0) then
@ -3890,15 +3898,14 @@ contains
!$omp end single
!$omp barrier
if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
!$omp barrier
!$omp single
@ -3906,9 +3913,10 @@ contains
call psb_realloc(c%irp(ma + 1), c%val, info)
call psb_realloc(c%irp(ma + 1), c%ja, info)
!$omp end single
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
end if
!$omp end parallel
end subroutine spmm_omp_gustavson
@ -3944,6 +3952,7 @@ contains
!$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets)
thread_upperbound = 0
start_idx = 0
end_idx = 0
!$omp do schedule(static) private(irw, jj, j)
do irw = 1, ma
do jj = a%irp(irw), a%irp(irw + 1) - 1
@ -4010,14 +4019,14 @@ contains
!$omp barrier
if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
!$omp barrier
!$omp single
@ -4025,9 +4034,10 @@ contains
call psb_realloc(c%irp(ma + 1), c%val, info)
call psb_realloc(c%irp(ma + 1), c%ja, info)
!$omp end single
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
end if
!$omp end parallel
end subroutine spmm_omp_gustavson_1d
@ -6296,7 +6306,7 @@ subroutine psb_ld_mv_csr_to_coo(a,b,info)
if (a%is_dev()) call a%sync()
nr = a%get_nrows()
nc = a%get_ncols()
nza = a%get_nzeros()
nza = max(a%get_nzeros(),ione)
b%psb_ld_base_sparse_mat = a%psb_ld_base_sparse_mat
call b%set_nzeros(a%get_nzeros())

@ -2051,8 +2051,8 @@ subroutine psb_s_base_vect_mv(alpha,a,x,beta,y,info,trans)
! For the time being we just throw everything back
! onto the normal routines.
call x%sync()
call y%sync()
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
call a%spmm(alpha,x%v,beta,y%v,info,trans)
call y%set_host()
end subroutine psb_s_base_vect_mv
@ -2105,8 +2105,6 @@ subroutine psb_s_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d)
goto 9999
end if
call x%sync()
call y%sync()
if (present(d)) then
call d%sync()
if (present(scale)) then
@ -2206,8 +2204,11 @@ subroutine psb_s_base_inner_vect_sv(alpha,a,x,beta,y,info,trans)
info = psb_success_
call psb_erractionsave(err_act)
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
call a%inner_spsm(alpha,x%v,beta,y%v,info,trans)
call y%set_host()
if (info /= psb_success_) then
info = psb_err_from_subroutine_

@ -2171,7 +2171,7 @@ subroutine psb_s_mv_csc_to_coo(a,b,info)
nr = a%get_nrows()
nc = a%get_ncols()
nza = a%get_nzeros()
nza = max(a%get_nzeros(),ione)
b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat
call b%set_nzeros(a%get_nzeros())
@ -2336,7 +2336,7 @@ subroutine psb_s_cp_csc_to_fmt(a,b,info)
if (a%is_dev()) call a%sync()
b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat
nc = a%get_ncols()
nz = a%get_nzeros()
nz = max(a%get_nzeros(),ione)
if (.false.) then
if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info)
if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info)
@ -2469,7 +2469,7 @@ subroutine psb_s_cp_csc_from_fmt(a,b,info)
if (b%is_dev()) call b%sync()
a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat
nc = b%get_ncols()
nz = b%get_nzeros()
nz = max(b%get_nzeros(),ione)
if (.false.) then
if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info)
if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info)
@ -4066,7 +4066,7 @@ subroutine psb_ls_mv_csc_to_coo(a,b,info)
nr = a%get_nrows()
nc = a%get_ncols()
nza = a%get_nzeros()
nza = max(a%get_nzeros(),ione)
b%psb_ls_base_sparse_mat = a%psb_ls_base_sparse_mat
call b%set_nzeros(a%get_nzeros())

@ -3819,6 +3819,7 @@ contains
integer(psb_ipk_) :: ma, nb
integer(psb_ipk_), allocatable :: col_inds(:), offsets(:)
integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx
integer(psb_ipk_) :: nth, lth,ith
ma = a%get_nrows()
nb = b%get_ncols()
@ -3829,12 +3830,19 @@ contains
! dense accumulator
! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf
call psb_realloc(nb, acc, info)
!$omp parallel shared(nth,lth)
!$omp single
nth = omp_get_num_threads()
lth = min(nth, ma)
!$omp end single
!$omp end parallel
allocate(offsets(omp_get_max_threads()))
!$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) &
!$omp shared(a,b,c,offsets)
!$omp num_threads(lth) shared(a,b,c,offsets)
thread_upperbound = 0
start_idx = 0
end_idx = 0
!$omp do schedule(static) private(irw, jj, j)
do irw = 1, ma
if (start_idx == 0) then
@ -3890,15 +3898,14 @@ contains
!$omp end single
!$omp barrier
if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
!$omp barrier
!$omp single
@ -3906,9 +3913,10 @@ contains
call psb_realloc(c%irp(ma + 1), c%val, info)
call psb_realloc(c%irp(ma + 1), c%ja, info)
!$omp end single
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
end if
!$omp end parallel
end subroutine spmm_omp_gustavson
@ -3944,6 +3952,7 @@ contains
!$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets)
thread_upperbound = 0
start_idx = 0
end_idx = 0
!$omp do schedule(static) private(irw, jj, j)
do irw = 1, ma
do jj = a%irp(irw), a%irp(irw + 1) - 1
@ -4010,14 +4019,14 @@ contains
!$omp barrier
if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
!$omp barrier
!$omp single
@ -4025,9 +4034,10 @@ contains
call psb_realloc(c%irp(ma + 1), c%val, info)
call psb_realloc(c%irp(ma + 1), c%ja, info)
!$omp end single
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
end if
!$omp end parallel
end subroutine spmm_omp_gustavson_1d
@ -6296,7 +6306,7 @@ subroutine psb_ls_mv_csr_to_coo(a,b,info)
if (a%is_dev()) call a%sync()
nr = a%get_nrows()
nc = a%get_ncols()
nza = a%get_nzeros()
nza = max(a%get_nzeros(),ione)
b%psb_ls_base_sparse_mat = a%psb_ls_base_sparse_mat
call b%set_nzeros(a%get_nzeros())

@ -2051,8 +2051,8 @@ subroutine psb_z_base_vect_mv(alpha,a,x,beta,y,info,trans)
! For the time being we just throw everything back
! onto the normal routines.
call x%sync()
call y%sync()
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
call a%spmm(alpha,x%v,beta,y%v,info,trans)
call y%set_host()
end subroutine psb_z_base_vect_mv
@ -2105,8 +2105,6 @@ subroutine psb_z_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d)
goto 9999
end if
call x%sync()
call y%sync()
if (present(d)) then
call d%sync()
if (present(scale)) then
@ -2206,8 +2204,11 @@ subroutine psb_z_base_inner_vect_sv(alpha,a,x,beta,y,info,trans)
info = psb_success_
call psb_erractionsave(err_act)
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
call a%inner_spsm(alpha,x%v,beta,y%v,info,trans)
call y%set_host()
if (info /= psb_success_) then
info = psb_err_from_subroutine_

@ -2171,7 +2171,7 @@ subroutine psb_z_mv_csc_to_coo(a,b,info)
nr = a%get_nrows()
nc = a%get_ncols()
nza = a%get_nzeros()
nza = max(a%get_nzeros(),ione)
b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat
call b%set_nzeros(a%get_nzeros())
@ -2336,7 +2336,7 @@ subroutine psb_z_cp_csc_to_fmt(a,b,info)
if (a%is_dev()) call a%sync()
b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat
nc = a%get_ncols()
nz = a%get_nzeros()
nz = max(a%get_nzeros(),ione)
if (.false.) then
if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info)
if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info)
@ -2469,7 +2469,7 @@ subroutine psb_z_cp_csc_from_fmt(a,b,info)
if (b%is_dev()) call b%sync()
a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat
nc = b%get_ncols()
nz = b%get_nzeros()
nz = max(b%get_nzeros(),ione)
if (.false.) then
if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info)
if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info)
@ -4066,7 +4066,7 @@ subroutine psb_lz_mv_csc_to_coo(a,b,info)
nr = a%get_nrows()
nc = a%get_ncols()
nza = a%get_nzeros()
nza = max(a%get_nzeros(),ione)
b%psb_lz_base_sparse_mat = a%psb_lz_base_sparse_mat
call b%set_nzeros(a%get_nzeros())

@ -3819,6 +3819,7 @@ contains
integer(psb_ipk_) :: ma, nb
integer(psb_ipk_), allocatable :: col_inds(:), offsets(:)
integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx
integer(psb_ipk_) :: nth, lth,ith
ma = a%get_nrows()
nb = b%get_ncols()
@ -3829,12 +3830,19 @@ contains
! dense accumulator
! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf
call psb_realloc(nb, acc, info)
!$omp parallel shared(nth,lth)
!$omp single
nth = omp_get_num_threads()
lth = min(nth, ma)
!$omp end single
!$omp end parallel
allocate(offsets(omp_get_max_threads()))
!$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) &
!$omp shared(a,b,c,offsets)
!$omp num_threads(lth) shared(a,b,c,offsets)
thread_upperbound = 0
start_idx = 0
end_idx = 0
!$omp do schedule(static) private(irw, jj, j)
do irw = 1, ma
if (start_idx == 0) then
@ -3890,15 +3898,14 @@ contains
!$omp end single
!$omp barrier
if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
!$omp barrier
!$omp single
@ -3906,9 +3913,10 @@ contains
call psb_realloc(c%irp(ma + 1), c%val, info)
call psb_realloc(c%irp(ma + 1), c%ja, info)
!$omp end single
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
end if
!$omp end parallel
end subroutine spmm_omp_gustavson
@ -3944,6 +3952,7 @@ contains
!$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets)
thread_upperbound = 0
start_idx = 0
end_idx = 0
!$omp do schedule(static) private(irw, jj, j)
do irw = 1, ma
do jj = a%irp(irw), a%irp(irw + 1) - 1
@ -4010,14 +4019,14 @@ contains
!$omp barrier
if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
!$omp barrier
!$omp single
@ -4025,9 +4034,10 @@ contains
call psb_realloc(c%irp(ma + 1), c%val, info)
call psb_realloc(c%irp(ma + 1), c%ja, info)
!$omp end single
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
end if
!$omp end parallel
end subroutine spmm_omp_gustavson_1d
@ -6296,7 +6306,7 @@ subroutine psb_lz_mv_csr_to_coo(a,b,info)
if (a%is_dev()) call a%sync()
nr = a%get_nrows()
nc = a%get_ncols()
nza = a%get_nzeros()
nza = max(a%get_nzeros(),ione)
b%psb_lz_base_sparse_mat = a%psb_lz_base_sparse_mat
call b%set_nzeros(a%get_nzeros())

Loading…
Cancel
Save