Merge branch 'repackage' into repack-ovrlp

repack-ovrlp
sfilippone 9 months ago
commit f159c7fd4d

@ -215,7 +215,9 @@ contains
end if end if
if (present(mask)) then if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,idxmap,owned_,info) &
!$omp private(i)
do i=1, size(idx) do i=1, size(idx)
if (mask(i)) then if (mask(i)) then
if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then
@ -229,9 +231,11 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else if (.not.present(mask)) then else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idx,idxmap,owned_,info) &
!$omp private(i)
do i=1, size(idx) do i=1, size(idx)
if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then
idx(i) = idxmap%min_glob_row + idx(i) - 1 idx(i) = idxmap%min_glob_row + idx(i) - 1
@ -243,7 +247,7 @@ contains
info = -1 info = -1
end if end if
end do end do
!$omp end parallel do
end if end if
end subroutine block_ll2gv1 end subroutine block_ll2gv1
@ -277,7 +281,9 @@ contains
end if end if
if (present(mask)) then if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idxin,idxout,idxmap,owned_,info) &
!$omp private(i)
do i=1, im do i=1, im
if (mask(i)) then if (mask(i)) then
if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then
@ -291,9 +297,11 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else if (.not.present(mask)) then else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idxin,idxout,idxmap,owned_,info) &
!$omp private(i)
do i=1, im do i=1, im
if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then
idxout(i) = idxmap%min_glob_row + idxin(i) - 1 idxout(i) = idxmap%min_glob_row + idxin(i) - 1
@ -305,7 +313,7 @@ contains
info = -1 info = -1
end if end if
end do end do
!$omp end parallel do
end if end if
if (is > im) then if (is > im) then
@ -392,6 +400,9 @@ contains
if (present(mask)) then if (present(mask)) then
if (idxmap%is_asb()) then if (idxmap%is_asb()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,is,idx,idxmap,owned_) &
!$omp private(i,nv,tidx)
do i=1, is do i=1, is
if (mask(i)) then if (mask(i)) then
if ((idxmap%min_glob_row <= idx(i)).and. & if ((idxmap%min_glob_row <= idx(i)).and. &
@ -408,7 +419,11 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else if (idxmap%is_valid()) then else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,is,idx,idxmap,owned_) &
!$omp private(i,ip,lip,tidx,info)
do i=1,is do i=1,is
if (mask(i)) then if (mask(i)) then
if ((idxmap%min_glob_row <= idx(i)).and.& if ((idxmap%min_glob_row <= idx(i)).and.&
@ -424,8 +439,8 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else else
!!$ write(0,*) 'Block status: invalid ',idxmap%get_state()
idx(1:is) = -1 idx(1:is) = -1
info = -1 info = -1
end if end if
@ -433,6 +448,9 @@ contains
else if (.not.present(mask)) then else if (.not.present(mask)) then
if (idxmap%is_asb()) then if (idxmap%is_asb()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(is,idx,idxmap,owned_) &
!$omp private(i,nv,tidx)
do i=1, is do i=1, is
if ((idxmap%min_glob_row <= idx(i)).and.& if ((idxmap%min_glob_row <= idx(i)).and.&
& (idx(i) <= idxmap%max_glob_row)) then & (idx(i) <= idxmap%max_glob_row)) then
@ -447,8 +465,11 @@ contains
idx(i) = -1 idx(i) = -1
end if end if
end do end do
!$omp end parallel do
else if (idxmap%is_valid()) then else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(is,idx,idxmap,owned_) &
!$omp private(i,ip,lip,tidx,info)
do i=1,is do i=1,is
if ((idxmap%min_glob_row <= idx(i)).and.& if ((idxmap%min_glob_row <= idx(i)).and.&
& (idx(i) <= idxmap%max_glob_row)) then & (idx(i) <= idxmap%max_glob_row)) then
@ -462,6 +483,7 @@ contains
idx(i) = -1 idx(i) = -1
end if end if
end do end do
!$omp end parallel do
else else
idx(1:is) = -1 idx(1:is) = -1
info = -1 info = -1
@ -953,7 +975,9 @@ contains
end if end if
info = psb_success_ info = psb_success_
else else
info = -5 write(0,*) 'From has_search_ins:',info,ip,lip,nxt,&
& idxmap%min_glob_row,idxmap%max_glob_row
info = -6
return return
end if end if
idxout(i) = lip + idxmap%local_rows idxout(i) = lip + idxmap%local_rows
@ -1131,7 +1155,7 @@ contains
idxmap%global_cols = ntot idxmap%global_cols = ntot
idxmap%local_rows = nl idxmap%local_rows = nl
idxmap%local_cols = nl idxmap%local_cols = nl
idxmap%ctxt = ctxt idxmap%ctxt = ctxt
idxmap%state = psb_desc_bld_ idxmap%state = psb_desc_bld_
idxmap%mpic = psb_get_mpi_comm(ctxt) idxmap%mpic = psb_get_mpi_comm(ctxt)
idxmap%min_glob_row = vnl(iam)+1 idxmap%min_glob_row = vnl(iam)+1

@ -63,7 +63,7 @@ module psb_hash_map_mod
integer(psb_lpk_) :: hashvsize, hashvmask integer(psb_lpk_) :: hashvsize, hashvmask
integer(psb_ipk_), allocatable :: hashv(:) integer(psb_ipk_), allocatable :: hashv(:)
integer(psb_lpk_), allocatable :: glb_lc(:,:), loc_to_glob(:) integer(psb_lpk_), allocatable :: glb_lc(:,:), loc_to_glob(:)
type(psb_hash_type) :: hash type(psb_hash_type) :: hash
contains contains
@ -221,6 +221,9 @@ contains
if (present(mask)) then if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,idxmap,owned_) &
!$omp private(i)
do i=1, size(idx) do i=1, size(idx)
if (mask(i)) then if (mask(i)) then
if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then
@ -233,9 +236,12 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else if (.not.present(mask)) then else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idx,idxmap,owned_) &
!$omp private(i)
do i=1, size(idx) do i=1, size(idx)
if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then
idx(i) = idxmap%loc_to_glob(idx(i)) idx(i) = idxmap%loc_to_glob(idx(i))
@ -246,7 +252,7 @@ contains
idx(i) = -1 idx(i) = -1
end if end if
end do end do
!$omp end parallel do
end if end if
end subroutine hash_l2gv1 end subroutine hash_l2gv1
@ -363,6 +369,9 @@ contains
else if (idxmap%is_valid()) then else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,is,idx,mglob,idxmap,nrm,ncol,nrow,owned_) &
!$omp private(i,ip,lip,tlip,info)
do i = 1, is do i = 1, is
if (mask(i)) then if (mask(i)) then
ip = idx(i) ip = idx(i)
@ -388,7 +397,7 @@ contains
endif endif
end if end if
enddo enddo
!$omp end parallel do
else else
write(0,*) 'Hash status: invalid ',idxmap%get_state() write(0,*) 'Hash status: invalid ',idxmap%get_state()
idx(1:is) = -1 idx(1:is) = -1
@ -404,6 +413,9 @@ contains
else if (idxmap%is_valid()) then else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(is,idx,mglob,idxmap,nrm,ncol,nrow,owned_) &
!$omp private(i,ip,lip,tlip,info)
do i = 1, is do i = 1, is
ip = idx(i) ip = idx(i)
if ((ip < 1 ).or.(ip>mglob)) then if ((ip < 1 ).or.(ip>mglob)) then
@ -427,14 +439,12 @@ contains
idx(i) = lip idx(i) = lip
endif endif
enddo enddo
!$omp end parallel do
else else
write(0,*) 'Hash status: invalid ',idxmap%get_state() write(0,*) 'Hash status: invalid ',idxmap%get_state()
idx(1:is) = -1 idx(1:is) = -1
info = -1 info = -1
end if end if
end if end if
end subroutine hash_g2lv1 end subroutine hash_g2lv1
@ -493,6 +503,9 @@ contains
else if (idxmap%is_valid()) then else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,is,idxin,idxout,mglob,idxmap,nrm,ncol,nrow,owned_) &
!$omp private(i,ip,lip,tlip,info)
do i = 1, is do i = 1, is
if (mask(i)) then if (mask(i)) then
ip = idxin(i) ip = idxin(i)
@ -518,6 +531,7 @@ contains
endif endif
end if end if
enddo enddo
!$omp end parallel do
else else
write(0,*) 'Hash status: invalid ',idxmap%get_state() write(0,*) 'Hash status: invalid ',idxmap%get_state()
idxout(1:is) = -1 idxout(1:is) = -1
@ -533,6 +547,9 @@ contains
else if (idxmap%is_valid()) then else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(is,idxin,idxout,mglob,idxmap,nrm,ncol,nrow,owned_) &
!$omp private(i,ip,lip,tlip,info)
do i = 1, is do i = 1, is
ip = idxin(i) ip = idxin(i)
if ((ip < 1 ).or.(ip>mglob)) then if ((ip < 1 ).or.(ip>mglob)) then
@ -556,14 +573,12 @@ contains
idxout(i) = lip idxout(i) = lip
endif endif
enddo enddo
!$omp end parallel do
else else
write(0,*) 'Hash status: invalid ',idxmap%get_state() write(0,*) 'Hash status: invalid ',idxmap%get_state()
idxout(1:is) = -1 idxout(1:is) = -1
info = -1 info = -1
end if end if
end if end if
end subroutine hash_g2lv2 end subroutine hash_g2lv2
@ -649,7 +664,7 @@ contains
& err_act & err_act
integer(psb_lpk_) :: mglob, ip, nxt, tlip integer(psb_lpk_) :: mglob, ip, nxt, tlip
type(psb_ctxt_type) :: ctxt type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: me, np integer(psb_ipk_) :: me, np,ith
character(len=20) :: name,ch_err character(len=20) :: name,ch_err
logical, allocatable :: mask_(:) logical, allocatable :: mask_(:)
!!$ logical :: use_openmp = .true. !!$ logical :: use_openmp = .true.
@ -683,363 +698,243 @@ contains
mglob = idxmap%get_gr() mglob = idxmap%get_gr()
nrow = idxmap%get_lr() nrow = idxmap%get_lr()
!write(0,*) me,name,' before loop ',psb_errstatus_fatal() !write(0,*) me,name,' before loop ',psb_errstatus_fatal()
#ifdef OPENMP #if defined(OPENMP)
!call OMP_init_lock(ins_lck) isLoopValid = .true.
if (idxmap%is_bld()) then
if (idxmap%is_bld()) then
isLoopValid = .true.
ncol = idxmap%get_lc()
if (present(mask)) then
mask_ = mask
else
allocate(mask_(size(idx)))
mask_ = .true.
end if
if (present(lidx)) then if (present(lidx)) then
if (present(mask)) then if (present(mask)) then
!$omp critical(hash_g2l_ins) !$omp parallel do default(none) schedule(dynamic) &
!$omp shared(lidx,mask,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) &
! $ OMP PARALLEL DO default(none) schedule(DYNAMIC) & !$omp private(i,ip,lip,tlip,nxt,info) &
! $ OMP shared(name,me,is,idx,ins_lck,mask,mglob,idxmap,ncol,nrow,laddsz,lidx) & !$omp reduction(.AND.:isLoopValid)
! $ OMP private(i,ip,lip,tlip,nxt,info) &
! $ OMP reduction(.AND.:isLoopValid)
do i = 1, is do i = 1, is
info = 0 if (mask(i)) then
if (.not. isLoopValid) cycle ip = idx(i)
if (mask(i)) then if ((ip < 1 ).or.(ip>mglob) ) then
ip = idx(i)
if ((ip < 1 ).or.(ip>mglob)) then
idx(i) = -1 idx(i) = -1
cycle cycle
endif endif
!call OMP_set_lock(ins_lck)
ncol = idxmap%get_lc() ncol = idxmap%get_lc()
!call OMP_unset_lock(ins_lck)
! At first, we check the index presence in 'idxmap'. Usually
! the index is found. If it is not found, we repeat the checking,
! but inside a critical region.
call hash_inner_cnv(ip,lip,idxmap%hashvmask,& call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol) & idxmap%hashv,idxmap%glb_lc,ncol)
if (lip < 0) then if (lip > 0) then
!call OMP_set_lock(ins_lck) idx(i) = lip
info = psb_success_
! We check again if the index is already in 'idxmap', this else
! time inside a critical region (we assume that the index !$omp critical(hash_g2l_ins)
! is often already existing). tlip = lip
ncol = idxmap%get_lc() nxt = lidx(i)
nxt = lidx(i) if (nxt <= nrow) then
call hash_inner_cnv(ip,lip,idxmap%hashvmask,& idx(i) = -1
& idxmap%hashv,idxmap%glb_lc,ncol) else
call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,&
if (lip > 0) then & idxmap%glb_lc,ncol)
idx(i) = lip if (lip > 0) then
else if (lip < 0) then idx(i) = lip
! Index not found
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info)
lip = tlip
if (info >= 0) then
! 'nxt' is not equal to 'tlip' when the key is already inside
! the hash map. In that case 'tlip' is the value corresponding
! to the existing mapping.
if (nxt == tlip) then
ncol = MAX(ncol,nxt)
call psb_ensure_size(ncol,idxmap%loc_to_glob,info,&
& pad=-1_psb_lpk_,addsz=laddsz)
if (info /= psb_success_) then
!write(0,*) 'Error spot 1'
call psb_errpush(psb_err_from_subroutine_ai_,name,&
&a_err='psb_ensure_size',i_err=(/info/))
isLoopValid = .false.
idx(i) = -1
else
idx(i) = lip
idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(ncol)
end if
end if
else else
idx(i) = -1 call psb_hash_searchinskey(ip,lip,nxt,idxmap%hash,info)
if (info >=0) then
if (nxt == lip) then
call psb_ensure_size(nxt,idxmap%loc_to_glob,info,&
& pad=-1_psb_lpk_,addsz=laddsz)
if (info /= psb_success_) then
info=1
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='psb_ensure_size',i_err=(/info/))
isLoopValid = .false.
end if
idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(max(ncol,nxt))
endif
idx(i) = lip
info = psb_success_
else
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='SearchInsKeyVal',i_err=(/info/))
isLoopValid = .false.
end if
end if end if
!call OMP_unset_lock(ins_lck) endif
end if !$omp end critical(hash_g2l_ins)
else
idx(i) = lip
end if end if
else else
idx(i) = -1 idx(i) = -1
end if end if
enddo
!$omp end parallel do
end do else if (.not.present(mask)) then
! $ OMP END PARALLEL DO
!$omp end critical(hash_g2l_ins)
if (.not. isLoopValid) then
goto 9999
end if
else
!$omp critical(hash_g2l_ins)
! $ OMP PARALLEL DO default(none) schedule(DYNAMIC) & !$omp parallel do default(none) schedule(dynamic) &
! $ OMP shared(name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz,lidx) & !$omp shared(lidx,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) &
! $ OMP private(i,ip,lip,tlip,nxt,info) & !$omp private(i,ip,lip,tlip,nxt,info) &
! $ OMP reduction(.AND.:isLoopValid) !$omp reduction(.AND.:isLoopValid)
do i = 1, is do i = 1, is
info = 0 ip = idx(i)
if (.not. isLoopValid) cycle if ((ip < 1 ).or.(ip>mglob) ) then
ip = idx(i)
if ((ip < 1 ).or.(ip>mglob)) then
idx(i) = -1 idx(i) = -1
cycle cycle
endif endif
!call OMP_set_lock(ins_lck)
ncol = idxmap%get_lc() ncol = idxmap%get_lc()
!call OMP_unset_lock(ins_lck)
! At first, we check the index presence in 'idxmap'. Usually
! the index is found. If it is not found, we repeat the checking,
! but inside a critical region.
call hash_inner_cnv(ip,lip,idxmap%hashvmask,& call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol) & idxmap%hashv,idxmap%glb_lc,ncol)
if (lip < 0) then if (lip > 0) then
!call OMP_set_lock(ins_lck) idx(i) = lip
! We check again if the index is already in 'idxmap', this info = psb_success_
! time inside a critical region (we assume that the index else
! is often already existing). !$omp critical(hash_g2l_ins)
ncol = idxmap%get_lc() tlip = lip
nxt = lidx(i) nxt = lidx(i)
call hash_inner_cnv(ip,lip,idxmap%hashvmask,& if (nxt <= nrow) then
& idxmap%hashv,idxmap%glb_lc,ncol) idx(i) = -1
else
if (lip > 0) then call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,&
idx(i) = lip & idxmap%glb_lc,ncol)
else if (lip < 0) then if (lip > 0) then
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) idx(i) = lip
lip = tlip
if (info >= 0) then
! 'nxt' is not equal to 'tlip' when the key is already inside
! the hash map. In that case 'tlip' is the value corresponding
! to the existing mapping.
if (nxt == tlip) then
ncol = MAX(ncol,nxt)
call psb_ensure_size(ncol,idxmap%loc_to_glob,info,&
& pad=-1_psb_lpk_,addsz=laddsz)
if (info /= psb_success_) then
!write(0,*) 'Error spot 2'
call psb_errpush(psb_err_from_subroutine_ai_,name,&
&a_err='psb_ensure_size',i_err=(/info/))
isLoopValid = .false.
idx(i) = -1
else
idx(i) = lip
idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(ncol)
end if
end if
else else
idx(i) = -1 call psb_hash_searchinskey(ip,lip,nxt,idxmap%hash,info)
if (info >=0) then
if (nxt == lip) then
call psb_ensure_size(nxt,idxmap%loc_to_glob,info,&
& pad=-1_psb_lpk_,addsz=laddsz)
if (info /= psb_success_) then
info=1
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='psb_ensure_size',i_err=(/info/))
isLoopValid = .false.
end if
idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(max(ncol,nxt))
endif
idx(i) = lip
info = psb_success_
else
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='SearchInsKeyVal',i_err=(/info/))
isLoopValid = .false.
end if
end if end if
!call OMP_unset_lock(ins_lck) endif
end if !$omp end critical(hash_g2l_ins)
else
idx(i) = lip
end if end if
enddo
end do !$omp end parallel do
! $ OMP END PARALLEL DO
!$omp end critical(hash_g2l_ins)
if (.not. isLoopValid) then
goto 9999
end if
end if end if
else if (.not.present(lidx)) then else if (.not.present(lidx)) then
if(present(mask)) then
! $ OMP PARALLEL DO default(none) schedule(DYNAMIC) &
! $ OMP shared(name,me,is,idx,ins_lck,mask,mglob,idxmap,ncol,nrow,laddsz) &
! $ OMP private(i,ip,lip,tlip,nxt,info) &
! $ OMP reduction(.AND.:isLoopValid)
!$omp critical(hash_g2l_ins) if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) &
!$omp private(i,ip,lip,tlip,nxt,info) &
!$omp reduction(.AND.:isLoopValid)
do i = 1, is do i = 1, is
info = 0
if (.not. isLoopValid) cycle
if (mask(i)) then if (mask(i)) then
ip = idx(i) ip = idx(i)
if ((ip < 1 ).or.(ip>mglob)) then if ((ip < 1 ).or.(ip>mglob)) then
idx(i) = -1 idx(i) = -1
cycle cycle
endif endif
!call OMP_set_lock(ins_lck) ncol = idxmap%get_lc()
ncol = idxmap%get_lc()
!call OMP_unset_lock(ins_lck)
! At first, we check the index presence in 'idxmap'. Usually
! the index is found. If it is not found, we repeat the checking,
! but inside a critical region.
!write(0,*) me,name,' b hic 1 ',psb_errstatus_fatal()
call hash_inner_cnv(ip,lip,idxmap%hashvmask,& call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol) & idxmap%hashv,idxmap%glb_lc,ncol)
!write(0,*) me,name,' a hic 1 ',psb_errstatus_fatal() if (lip > 0) then
if (lip < 0) then idx(i) = lip
!call OMP_set_lock(ins_lck) info = psb_success_
! We check again if the index is already in 'idxmap', this else
! time inside a critical region (we assume that the index !$omp critical(hash_g2l_ins)
! is often already existing, so this lock is relatively rare). ncol = idxmap%get_lc()
ncol = idxmap%get_lc() nxt = ncol + 1
nxt = ncol + 1 call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,&
!write(0,*) me,name,' b hic 2 ',psb_errstatus_fatal() & idxmap%glb_lc,ncol)
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
!write(0,*) me,name,' a hic 2 ',psb_errstatus_fatal()
if (lip > 0) then if (lip > 0) then
idx(i) = lip idx(i) = lip
else if (lip < 0) then else
! Index not found call psb_hash_searchinskey(ip,lip,nxt,idxmap%hash,info)
!write(0,*) me,name,' b hsik ',psb_errstatus_fatal() if (info >=0) then
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) if (nxt == lip) then
if (psb_errstatus_fatal()) write(0,*) me,name,' a hsik ',info,omp_get_thread_num() call psb_ensure_size(nxt,idxmap%loc_to_glob,info,&
!write(0,*) me,name,' a hsik ',psb_errstatus_fatal()
lip = tlip
if (info >= 0) then
!write(0,*) 'Error before spot 3', info
! 'nxt' is not equal to 'tlip' when the key is already inside
! the hash map. In that case 'tlip' is the value corresponding
! to the existing mapping.
if (nxt == tlip) then
ncol = MAX(ncol,nxt)
call psb_ensure_size(ncol,idxmap%loc_to_glob,info,&
& pad=-1_psb_lpk_,addsz=laddsz) & pad=-1_psb_lpk_,addsz=laddsz)
if (psb_errstatus_fatal()) write(0,*) me,name,' a esz ',info,omp_get_thread_num()
if (info /= psb_success_) then if (info /= psb_success_) then
!write(0,*) 'Error spot 3', info info=1
call psb_errpush(psb_err_from_subroutine_ai_,name,& call psb_errpush(psb_err_from_subroutine_ai_,name,&
&a_err='psb_ensure_size',i_err=(/info/)) & a_err='psb_ensure_size',i_err=(/info/))
isLoopValid = .false. isLoopValid = .false.
idx(i) = -1
else
idx(i) = lip
idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(ncol)
end if end if
end if idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(nxt)
endif
idx(i) = lip
info = psb_success_
else else
idx(i) = -1 call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='SearchInsKeyVal',i_err=(/info/))
isLoopValid = .false.
end if end if
!call OMP_unset_lock(ins_lck)
end if end if
else !$omp end critical(hash_g2l_ins)
idx(i) = lip
end if end if
else else
idx(i) = -1 idx(i) = -1
end if end if
enddo
!$omp end parallel do
end do else if (.not.present(mask)) then
! $ OMP END PARALLEL DO
!$omp end critical(hash_g2l_ins)
if (.not. isLoopValid) then !$omp parallel do default(none) schedule(dynamic) &
goto 9999 !$omp shared(name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) &
end if !$omp private(i,ip,lip,tlip,nxt,info) &
else !$omp reduction(.AND.:isLoopValid)
! $ OMP PARALLEL DO default(none) schedule(DYNAMIC) &
! $ OMP shared(name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) &
! $ OMP private(i,ip,lip,tlip,nxt,info) &
! $ OMP reduction(.AND.:isLoopValid)
!$omp critical(hash_g2l_ins)
do i = 1, is do i = 1, is
info = 0 ip = idx(i)
if (.not. isLoopValid) cycle if ((ip < 1 ).or.(ip>mglob)) then
ip = idx(i)
if ((ip < 1 ).or.(ip>mglob)) then
idx(i) = -1 idx(i) = -1
cycle cycle
endif endif
!call OMP_set_lock(ins_lck) ncol = idxmap%get_lc()
ncol = idxmap%get_lc()
!call OMP_unset_lock(ins_lck)
! At first, we check the index presence in 'idxmap'. Usually
! the index is found. If it is not found, we repeat the checking,
! but inside a critical region.
call hash_inner_cnv(ip,lip,idxmap%hashvmask,& call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol) & idxmap%hashv,idxmap%glb_lc,ncol)
if (lip < 0) then if (lip > 0) then
!call OMP_set_lock(ins_lck) idx(i) = lip
! We check again if the index is already in 'idxmap', this info = psb_success_
! time inside a critical region (we assume that the index else
! is often already existing). !$omp critical(hash_g2l_ins)
ncol = idxmap%get_lc() ncol = idxmap%get_lc()
nxt = ncol + 1 nxt = ncol + 1
call hash_inner_cnv(ip,lip,idxmap%hashvmask,& call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,&
& idxmap%hashv,idxmap%glb_lc,ncol) & idxmap%glb_lc,ncol)
if (lip > 0) then if (lip > 0) then
idx(i) = lip idx(i) = lip
else if (lip < 0) then else
! Index not found call psb_hash_searchinskey(ip,lip,nxt,idxmap%hash,info)
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) if (info >=0) then
lip = tlip if (nxt == lip) then
call psb_ensure_size(nxt,idxmap%loc_to_glob,info,&
if (info >= 0) then
! 'nxt' is not equal to 'tlip' when the key is already inside
! the hash map. In that case 'tlip' is the value corresponding
! to the existing mapping.
if (nxt == tlip) then
ncol = MAX(ncol,nxt)
call psb_ensure_size(ncol,idxmap%loc_to_glob,info,&
& pad=-1_psb_lpk_,addsz=laddsz) & pad=-1_psb_lpk_,addsz=laddsz)
if (info /= psb_success_) then if (info /= psb_success_) then
!write(0,*) 'Error spot 4' info=1
call psb_errpush(psb_err_from_subroutine_ai_,name,& call psb_errpush(psb_err_from_subroutine_ai_,name,&
&a_err='psb_ensure_size',i_err=(/info/)) & a_err='psb_ensure_size',i_err=(/info/))
isLoopValid = .false. isLoopValid = .false.
idx(i) = -1
else
idx(i) = lip
idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(ncol)
end if end if
end if idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(nxt)
endif
idx(i) = lip
info = psb_success_
else else
idx(i) = -1 call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='SearchInsKeyVal',i_err=(/info/))
isLoopValid = .false.
end if end if
!call OMP_unset_lock(ins_lck)
end if end if
!$omp end critical(hash_g2l_ins)
else
idx(i) = lip
end if end if
enddo
end do !$omp end parallel do
! $ OMP END PARALLEL DO
!$omp end critical(hash_g2l_ins)
if (.not. isLoopValid) then
goto 9999
end if
end if end if
end if end if
else else
@ -1047,7 +942,7 @@ contains
idx = -1 idx = -1
info = -1 info = -1
end if end if
!call OMP_destroy_lock(ins_lck) if (.not. isLoopValid) goto 9999
#else #else
!!$ else if (.not.use_openmp) then !!$ else if (.not.use_openmp) then
isLoopValid = .true. isLoopValid = .true.
@ -1066,13 +961,13 @@ contains
call hash_inner_cnv(ip,lip,idxmap%hashvmask,& call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol) & idxmap%hashv,idxmap%glb_lc,ncol)
if (lip < 0) then if (lip < 0) then
tlip = lip
nxt = lidx(i) nxt = lidx(i)
if (nxt <= nrow) then if (nxt <= nrow) then
idx(i) = -1 idx(i) = -1
cycle cycle
endif endif
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info)
lip = tlip
if (info >=0) then if (info >=0) then
if (nxt == tlip) then if (nxt == tlip) then
ncol = max(ncol,nxt) ncol = max(ncol,nxt)
@ -1747,6 +1642,9 @@ contains
! for a width of psb_hash_bits ! for a width of psb_hash_bits
! !
if (present(mask)) then if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(n,hashv,hashmask,x,glb_lc,nrm,mask) &
!$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm)
do i=1, n do i=1, n
if (mask(i)) then if (mask(i)) then
key = x(i) key = x(i)
@ -1784,7 +1682,11 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else else
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(n,hashv,hashmask,x,glb_lc,nrm) &
!$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm)
do i=1, n do i=1, n
key = x(i) key = x(i)
ih = iand(key,hashmask) ih = iand(key,hashmask)
@ -1820,6 +1722,7 @@ contains
x(i) = tmp x(i) = tmp
end if end if
end do end do
!$omp end parallel do
end if end if
end subroutine hash_inner_cnv1 end subroutine hash_inner_cnv1
@ -1842,6 +1745,9 @@ contains
! for a width of psb_hash_bits ! for a width of psb_hash_bits
! !
if (present(mask)) then if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(n,hashv,hashmask,x,y,glb_lc,nrm,mask,psb_err_unit) &
!$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm)
do i=1, n do i=1, n
if (mask(i)) then if (mask(i)) then
key = x(i) key = x(i)
@ -1882,9 +1788,12 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else else
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(n,hashv,hashmask,x,y,glb_lc,nrm,psb_err_unit) &
!$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm)
do i=1, n do i=1, n
key = x(i) key = x(i)
ih = iand(key,hashmask) ih = iand(key,hashmask)
@ -1923,6 +1832,7 @@ contains
y(i) = tmp y(i) = tmp
end if end if
end do end do
!$omp end parallel do
end if end if
end subroutine hash_inner_cnv2 end subroutine hash_inner_cnv2

@ -383,12 +383,12 @@ contains
integer(psb_lpk_), intent(out) :: val integer(psb_lpk_), intent(out) :: val
integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: hsize,hmask, hk, hd integer(psb_ipk_) :: hsize,hmask, hk, hd, i
logical :: redo
info = HashOK info = HashOK
hsize = hash%hsize hsize = hash%hsize
hmask = hash%hmask hmask = hash%hmask
val = -1
hk = iand(psb_hashval(key),hmask) hk = iand(psb_hashval(key),hmask)
if (hk == 0) then if (hk == 0) then
hd = 1 hd = 1
@ -400,56 +400,57 @@ contains
info = HashOutOfMemory info = HashOutOfMemory
return return
end if end if
val = -1
!$omp atomic
hash%nsrch = hash%nsrch + 1 hash%nsrch = hash%nsrch + 1
!$omp end atomic
do do
!$omp atomic
hash%nacc = hash%nacc + 1 hash%nacc = hash%nacc + 1
!$omp end atomic
if (hash%table(hk,1) == key) then if (hash%table(hk,1) == key) then
val = hash%table(hk,2) val = hash%table(hk,2)
info = HashDuplicate info = HashDuplicate
!write(0,*) 'In searchinskey 1 : ', info, HashDuplicate
return return
end if end if
redo = .false.
!$omp critical(hashsearchins) !$omp critical(hashsearchins)
if (hash%table(hk,1) == key) then if (hash%table(hk,1) == HashFreeEntry) then
val = hash%table(hk,2) if (hash%nk == hash%hsize -1) then
info = HashDuplicate !
else ! Note: because of the way we allocate things at CDALL
if (hash%table(hk,1) == HashFreeEntry) then ! time this is really unlikely; if we get here, we
if (hash%nk == hash%hsize -1) then ! have at least as many halo indices as internals, which
! ! means we're already in trouble. But we try to keep going.
! Note: because of the way we allocate things at CDALL !
! time this is really unlikely; if we get here, we call psb_hash_realloc(hash,info)
! have at least as many halo indices as internals, which if (info /= HashOk) then
! means we're already in trouble. But we try to keep going. info = HashOutOfMemory
!
call psb_hash_realloc(hash,info)
if (info /= HashOk) then
info = HashOutOfMemory
!return
else
call psb_hash_searchinskey(key,val,nextval,hash,info)
!return
end if
else else
hash%nk = hash%nk + 1 redo = .true.
hash%table(hk,1) = key
hash%table(hk,2) = nextval
val = nextval
!return
end if end if
else
hash%nk = hash%nk + 1
hash%table(hk,1) = key
hash%table(hk,2) = nextval
val = nextval
info = HashOk
end if end if
else if (hash%table(hk,1) == key) then
val = hash%table(hk,2)
info = HashDuplicate
else
info = HashNotFound
end if end if
!$omp end critical(hashsearchins) !$omp end critical(hashsearchins)
if (info /= HashOk) then if (redo) then
write(0,*) 'In searchinskey 2: ', info call psb_hash_searchinskey(key,val,nextval,hash,info)
return return
end if end if
if (val > 0) return if (val > 0) exit
hk = hk - hd hk = hk - hd
if (hk < 0) hk = hk + hsize if (hk < 0) hk = hk + hsize
end do end do
!write(0,*) 'In searchinskey 3: ', info
end subroutine psb_hash_lsearchinskey end subroutine psb_hash_lsearchinskey
recursive subroutine psb_hash_isearchinskey(key,val,nextval,hash,info) recursive subroutine psb_hash_isearchinskey(key,val,nextval,hash,info)
@ -459,10 +460,11 @@ contains
integer(psb_ipk_) :: hsize,hmask, hk, hd integer(psb_ipk_) :: hsize,hmask, hk, hd
logical :: redo logical :: redo
info = HashOK info = HashOK
hsize = hash%hsize hsize = hash%hsize
hmask = hash%hmask hmask = hash%hmask
hk = iand(psb_hashval(key),hmask) hk = iand(psb_hashval(key),hmask)
if (hk == 0) then if (hk == 0) then
hd = 1 hd = 1
@ -475,17 +477,22 @@ contains
return return
end if end if
val = -1 val = -1
val = -1
!$omp atomic
hash%nsrch = hash%nsrch + 1 hash%nsrch = hash%nsrch + 1
!$omp end atomic
do do
!$omp atomic
hash%nacc = hash%nacc + 1 hash%nacc = hash%nacc + 1
!$omp end atomic
if (hash%table(hk,1) == key) then if (hash%table(hk,1) == key) then
val = hash%table(hk,2) val = hash%table(hk,2)
info = HashDuplicate info = HashDuplicate
return return
end if end if
redo = .false. redo = .false.
!$OMP CRITICAL !$omp critical(hashsearchins)
if (hash%table(hk,1) == HashFreeEntry) then if (hash%table(hk,1) == HashFreeEntry) then
if (hash%nk == hash%hsize -1) then if (hash%nk == hash%hsize -1) then
! !
! Note: because of the way we allocate things at CDALL ! Note: because of the way we allocate things at CDALL
@ -496,24 +503,28 @@ contains
call psb_hash_realloc(hash,info) call psb_hash_realloc(hash,info)
if (info /= HashOk) then if (info /= HashOk) then
info = HashOutOfMemory info = HashOutOfMemory
!return
else else
redo = .true. redo = .true.
!!$ call psb_hash_searchinskey(key,val,nextval,hash,info)
!!$ return
end if end if
else else
hash%nk = hash%nk + 1 hash%nk = hash%nk + 1
hash%table(hk,1) = key hash%table(hk,1) = key
hash%table(hk,2) = nextval hash%table(hk,2) = nextval
val = nextval val = nextval
!return info = HashOk
end if end if
else if (hash%table(hk,1) == key) then
val = hash%table(hk,2)
info = HashDuplicate
else
info = HashNotFound
end if end if
!$OMP END CRITICAL !$omp end critical(hashsearchins)
if (redo) call psb_hash_searchinskey(key,val,nextval,hash,info) if (redo) then
if (info /= HashOk) return call psb_hash_searchinskey(key,val,nextval,hash,info)
if (val > 0) return return
end if
if (val > 0) exit
hk = hk - hd hk = hk - hd
if (hk < 0) hk = hk + hsize if (hk < 0) hk = hk + hsize
end do end do
@ -551,7 +562,7 @@ contains
end if end if
if (hash%table(hk,1) == HashFreeEntry) then if (hash%table(hk,1) == HashFreeEntry) then
val = HashFreeEntry val = HashFreeEntry
! !$ info = HashNotFound info = HashNotFound
return return
end if end if
hk = hk - hd hk = hk - hd
@ -591,7 +602,7 @@ contains
end if end if
if (hash%table(hk,1) == HashFreeEntry) then if (hash%table(hk,1) == HashFreeEntry) then
val = HashFreeEntry val = HashFreeEntry
! !$ info = HashNotFound info = HashNotFound
return return
end if end if
hk = hk - hd hk = hk - hd

@ -158,6 +158,7 @@ module psb_indx_map_mod
procedure, pass(idxmap) :: set_lr => base_set_lr procedure, pass(idxmap) :: set_lr => base_set_lr
procedure, pass(idxmap) :: set_lc => base_set_lc procedure, pass(idxmap) :: set_lc => base_set_lc
procedure, pass(idxmap) :: inc_lc => base_inc_lc
procedure, pass(idxmap) :: set_p_adjcncy => base_set_p_adjcncy procedure, pass(idxmap) :: set_p_adjcncy => base_set_p_adjcncy
procedure, pass(idxmap) :: xtnd_p_adjcncy => base_xtnd_p_adjcncy procedure, pass(idxmap) :: xtnd_p_adjcncy => base_xtnd_p_adjcncy
@ -235,7 +236,7 @@ module psb_indx_map_mod
& base_get_gr, base_get_gc, base_get_lr, base_get_lc, base_get_ctxt,& & base_get_gr, base_get_gc, base_get_lr, base_get_lc, base_get_ctxt,&
& base_get_mpic, base_sizeof, base_set_null, & & base_get_mpic, base_sizeof, base_set_null, &
& base_set_grl, base_set_gcl, & & base_set_grl, base_set_gcl, &
& base_set_lr, base_set_lc, base_set_ctxt,& & base_set_lr, base_set_lc, base_inc_lc, base_set_ctxt,&
& base_set_mpic, base_get_fmt, base_asb, base_free,& & base_set_mpic, base_get_fmt, base_asb, base_free,&
& base_l2gs1, base_l2gs2, base_l2gv1, base_l2gv2,& & base_l2gs1, base_l2gs2, base_l2gv1, base_l2gv2,&
& base_g2ls1, base_g2ls2, base_g2lv1, base_g2lv2,& & base_g2ls1, base_g2ls2, base_g2lv1, base_g2lv2,&
@ -573,6 +574,14 @@ contains
idxmap%local_cols = val idxmap%local_cols = val
end subroutine base_set_lc end subroutine base_set_lc
subroutine base_inc_lc(idxmap)
implicit none
class(psb_indx_map), intent(inout) :: idxmap
!$omp atomic
idxmap%local_cols = idxmap%local_cols + 1
!$omp end atomic
end subroutine base_inc_lc
subroutine base_set_p_adjcncy(idxmap,val) subroutine base_set_p_adjcncy(idxmap,val)
use psb_realloc_mod use psb_realloc_mod
use psb_sort_mod use psb_sort_mod

@ -178,7 +178,10 @@ contains
end if end if
if (present(mask)) then if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,idxmap,owned_) &
!$omp private(i)
do i=1, size(idx) do i=1, size(idx)
if (mask(i)) then if (mask(i)) then
if ((1<=idx(i)).and.(idx(i) <= idxmap%get_lr())) then if ((1<=idx(i)).and.(idx(i) <= idxmap%get_lr())) then
@ -191,9 +194,12 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else if (.not.present(mask)) then else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idx,idxmap,owned_) &
!$omp private(i)
do i=1, size(idx) do i=1, size(idx)
if ((1<=idx(i)).and.(idx(i) <= idxmap%get_lr())) then if ((1<=idx(i)).and.(idx(i) <= idxmap%get_lr())) then
idx(i) = idxmap%loc_to_glob(idx(i)) idx(i) = idxmap%loc_to_glob(idx(i))
@ -204,7 +210,8 @@ contains
idx(i) = -1 idx(i) = -1
end if end if
end do end do
!$omp end parallel do
end if end if
end subroutine list_ll2gv1 end subroutine list_ll2gv1
@ -298,6 +305,9 @@ contains
if (present(mask)) then if (present(mask)) then
if (idxmap%is_valid()) then if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,is,idx,idxmap,owned_) &
!$omp private(i,ix)
do i=1,is do i=1,is
if (mask(i)) then if (mask(i)) then
if ((1 <= idx(i)).and.(idx(i) <= idxmap%global_rows)) then if ((1 <= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
@ -309,6 +319,7 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else else
idx(1:is) = -1 idx(1:is) = -1
info = -1 info = -1
@ -317,6 +328,9 @@ contains
else if (.not.present(mask)) then else if (.not.present(mask)) then
if (idxmap%is_valid()) then if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(is,idx,idxmap,owned_) &
!$omp private(i,ix)
do i=1, is do i=1, is
if ((1 <= idx(i)).and.(idx(i) <= idxmap%global_rows)) then if ((1 <= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
ix = idxmap%glob_to_loc(idx(i)) ix = idxmap%glob_to_loc(idx(i))
@ -326,6 +340,7 @@ contains
idx(i) = -1 idx(i) = -1
end if end if
end do end do
!$omp end parallel do
else else
idx(1:is) = -1 idx(1:is) = -1
info = -1 info = -1
@ -365,6 +380,9 @@ contains
if (present(mask)) then if (present(mask)) then
if (idxmap%is_valid()) then if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,is,idxin,idxout,idxmap,owned_) &
!$omp private(i,ix)
do i=1,is do i=1,is
if (mask(i)) then if (mask(i)) then
if ((1 <= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then if ((1 <= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
@ -376,6 +394,7 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else else
idxout(1:is) = -1 idxout(1:is) = -1
info = -1 info = -1
@ -384,6 +403,9 @@ contains
else if (.not.present(mask)) then else if (.not.present(mask)) then
if (idxmap%is_valid()) then if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(is,idxin,idxout,idxmap,owned_) &
!$omp private(i,ix)
do i=1, is do i=1, is
if ((1 <= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then if ((1 <= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
ix = idxmap%glob_to_loc(idxin(i)) ix = idxmap%glob_to_loc(idxin(i))
@ -393,6 +415,7 @@ contains
idxout(i) = -1 idxout(i) = -1
end if end if
end do end do
!$omp end parallel do
else else
idxout(1:is) = -1 idxout(1:is) = -1
info = -1 info = -1
@ -541,6 +564,10 @@ contains
else if (.not.present(mask)) then else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,is,idx,idxmap,laddsz,lidx) &
!$omp private(i,ix,info)
! $ o m p reduction(.AND.:isLoopValid)
do i=1, is do i=1, is
if (info /= 0) cycle if (info /= 0) cycle
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
@ -579,8 +606,8 @@ contains
idx(i) = -1 idx(i) = -1
end if end if
end do end do
!$omp end parallel do
end if end if
else if (.not.present(lidx)) then else if (.not.present(lidx)) then
if (present(mask)) then if (present(mask)) then

@ -169,7 +169,9 @@ contains
end if end if
if (present(mask)) then if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,idxmap) &
!$omp private(i)
do i=1, size(idx) do i=1, size(idx)
if (mask(i)) then if (mask(i)) then
if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then
@ -179,9 +181,11 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else if (.not.present(mask)) then else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idx,idxmap) &
!$omp private(i)
do i=1, size(idx) do i=1, size(idx)
if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then
! do nothing ! do nothing
@ -189,7 +193,7 @@ contains
idx(i) = -1 idx(i) = -1
end if end if
end do end do
!$omp end parallel do
end if end if
end subroutine repl_l2gv1 end subroutine repl_l2gv1
@ -223,7 +227,9 @@ contains
end if end if
if (present(mask)) then if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idxin,idxout,idxmap) &
!$omp private(i)
do i=1, im do i=1, im
if (mask(i)) then if (mask(i)) then
if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then
@ -233,9 +239,11 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else if (.not.present(mask)) then else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idxin,idxout,idxmap) &
!$omp private(i)
do i=1, im do i=1, im
if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then
idxout(i) = idxin(i) idxout(i) = idxin(i)
@ -243,7 +251,7 @@ contains
idxout(i) = -1 idxout(i) = -1
end if end if
end do end do
!$omp end parallel do
end if end if
if (is > im) info = -3 if (is > im) info = -3
@ -324,6 +332,9 @@ contains
if (present(mask)) then if (present(mask)) then
if (idxmap%is_asb()) then if (idxmap%is_asb()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,idxmap) &
!$omp private(i)
do i=1, is do i=1, is
if (mask(i)) then if (mask(i)) then
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
@ -333,7 +344,11 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else if (idxmap%is_valid()) then else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,idxmap) &
!$omp private(i)
do i=1,is do i=1,is
if (mask(i)) then if (mask(i)) then
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
@ -344,6 +359,7 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else else
idx(1:is) = -1 idx(1:is) = -1
info = -1 info = -1
@ -352,6 +368,9 @@ contains
else if (.not.present(mask)) then else if (.not.present(mask)) then
if (idxmap%is_asb()) then if (idxmap%is_asb()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idx,idxmap) &
!$omp private(i)
do i=1, is do i=1, is
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
! do nothing ! do nothing
@ -359,7 +378,11 @@ contains
idx(i) = -1 idx(i) = -1
end if end if
end do end do
!$omp end parallel do
else if (idxmap%is_valid()) then else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idx,idxmap) &
!$omp private(i)
do i=1,is do i=1,is
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
! do nothing ! do nothing
@ -367,6 +390,7 @@ contains
idx(i) = -1 idx(i) = -1
end if end if
end do end do
!$omp end parallel do
else else
idx(1:is) = -1 idx(1:is) = -1
info = -1 info = -1
@ -409,6 +433,9 @@ contains
if (present(mask)) then if (present(mask)) then
if (idxmap%is_asb()) then if (idxmap%is_asb()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idxin,idxout,idxmap) &
!$omp private(i)
do i=1, im do i=1, im
if (mask(i)) then if (mask(i)) then
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
@ -418,7 +445,11 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else if (idxmap%is_valid()) then else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idxin,idxout,idxmap) &
!$omp private(i)
do i=1,im do i=1,im
if (mask(i)) then if (mask(i)) then
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
@ -428,6 +459,7 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else else
idxout(1:im) = -1 idxout(1:im) = -1
info = -1 info = -1
@ -436,6 +468,9 @@ contains
else if (.not.present(mask)) then else if (.not.present(mask)) then
if (idxmap%is_asb()) then if (idxmap%is_asb()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idxin,idxout,idxmap) &
!$omp private(i)
do i=1, im do i=1, im
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
idxout(i) = idxin(i) idxout(i) = idxin(i)
@ -443,7 +478,11 @@ contains
idxout(i) = -1 idxout(i) = -1
end if end if
end do end do
!$omp end parallel do
else if (idxmap%is_valid()) then else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idxin,idxout,idxmap) &
!$omp private(i)
do i=1,im do i=1,im
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
idxout(i) = idxin(i) idxout(i) = idxin(i)
@ -451,6 +490,7 @@ contains
idxout(i) = -1 idxout(i) = -1
end if end if
end do end do
!$omp end parallel do
else else
idxout(1:im) = -1 idxout(1:im) = -1
info = -1 info = -1
@ -557,6 +597,9 @@ contains
else if (idxmap%is_valid()) then else if (idxmap%is_valid()) then
if (present(lidx)) then if (present(lidx)) then
if (present(mask)) then if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,lidx,is,idxmap) &
!$omp private(i)
do i=1, is do i=1, is
if (mask(i)) then if (mask(i)) then
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
@ -566,9 +609,11 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else if (.not.present(mask)) then else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,lidx,is,idxmap) &
!$omp private(i)
do i=1, is do i=1, is
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
! do nothing ! do nothing
@ -576,9 +621,13 @@ contains
idx(i) = -1 idx(i) = -1
end if end if
end do end do
!$omp end parallel do
end if end if
else if (.not.present(lidx)) then else if (.not.present(lidx)) then
if (present(mask)) then if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,is,idxmap) &
!$omp private(i)
do i=1, is do i=1, is
if (mask(i)) then if (mask(i)) then
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
@ -588,8 +637,11 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else if (.not.present(mask)) then else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idx,is,idxmap) &
!$omp private(i)
do i=1, is do i=1, is
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
! do nothing ! do nothing
@ -597,6 +649,7 @@ contains
idx(i) = -1 idx(i) = -1
end if end if
end do end do
!$omp end parallel do
end if end if
end if end if
else else
@ -644,6 +697,9 @@ contains
else if (idxmap%is_valid()) then else if (idxmap%is_valid()) then
if (present(lidx)) then if (present(lidx)) then
if (present(mask)) then if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idxin,idxout,im,idxmap) &
!$omp private(i)
do i=1, im do i=1, im
if (mask(i)) then if (mask(i)) then
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
@ -653,9 +709,11 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else if (.not.present(mask)) then else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idxin,idxout,im,idxmap) &
!$omp private(i)
do i=1, im do i=1, im
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
idxout(i) = idxin(i) idxout(i) = idxin(i)
@ -663,9 +721,13 @@ contains
idxout(i) = -1 idxout(i) = -1
end if end if
end do end do
!$omp end parallel do
end if end if
else if (.not.present(lidx)) then else if (.not.present(lidx)) then
if (present(mask)) then if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idxin,idxout,im,idxmap) &
!$omp private(i)
do i=1, im do i=1, im
if (mask(i)) then if (mask(i)) then
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
@ -675,8 +737,11 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else if (.not.present(mask)) then else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idxin,idxout,im,idxmap) &
!$omp private(i)
do i=1, im do i=1, im
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
idxout(i) = idxin(i) idxout(i) = idxin(i)
@ -684,6 +749,7 @@ contains
idxout(i) = -1 idxout(i) = -1
end if end if
end do end do
!$omp end parallel do
end if end if
end if end if
else else

@ -2051,8 +2051,8 @@ subroutine psb_c_base_vect_mv(alpha,a,x,beta,y,info,trans)
! For the time being we just throw everything back ! For the time being we just throw everything back
! onto the normal routines. ! onto the normal routines.
call x%sync() if (x%is_dev()) call x%sync()
call y%sync() if (y%is_dev()) call y%sync()
call a%spmm(alpha,x%v,beta,y%v,info,trans) call a%spmm(alpha,x%v,beta,y%v,info,trans)
call y%set_host() call y%set_host()
end subroutine psb_c_base_vect_mv end subroutine psb_c_base_vect_mv
@ -2105,8 +2105,6 @@ subroutine psb_c_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d)
goto 9999 goto 9999
end if end if
call x%sync()
call y%sync()
if (present(d)) then if (present(d)) then
call d%sync() call d%sync()
if (present(scale)) then if (present(scale)) then
@ -2206,8 +2204,11 @@ subroutine psb_c_base_inner_vect_sv(alpha,a,x,beta,y,info,trans)
info = psb_success_ info = psb_success_
call psb_erractionsave(err_act) call psb_erractionsave(err_act)
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
call a%inner_spsm(alpha,x%v,beta,y%v,info,trans) call a%inner_spsm(alpha,x%v,beta,y%v,info,trans)
call y%set_host()
if (info /= psb_success_) then if (info /= psb_success_) then
info = psb_err_from_subroutine_ info = psb_err_from_subroutine_

@ -2171,7 +2171,7 @@ subroutine psb_c_mv_csc_to_coo(a,b,info)
nr = a%get_nrows() nr = a%get_nrows()
nc = a%get_ncols() nc = a%get_ncols()
nza = a%get_nzeros() nza = max(a%get_nzeros(),ione)
b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat
call b%set_nzeros(a%get_nzeros()) call b%set_nzeros(a%get_nzeros())
@ -2336,7 +2336,7 @@ subroutine psb_c_cp_csc_to_fmt(a,b,info)
if (a%is_dev()) call a%sync() if (a%is_dev()) call a%sync()
b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat
nc = a%get_ncols() nc = a%get_ncols()
nz = a%get_nzeros() nz = max(a%get_nzeros(),ione)
if (.false.) then if (.false.) then
if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info) if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info)
if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info) if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info)
@ -2469,7 +2469,7 @@ subroutine psb_c_cp_csc_from_fmt(a,b,info)
if (b%is_dev()) call b%sync() if (b%is_dev()) call b%sync()
a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat
nc = b%get_ncols() nc = b%get_ncols()
nz = b%get_nzeros() nz = max(b%get_nzeros(),ione)
if (.false.) then if (.false.) then
if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info) if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info)
if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info) if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info)
@ -4066,7 +4066,7 @@ subroutine psb_lc_mv_csc_to_coo(a,b,info)
nr = a%get_nrows() nr = a%get_nrows()
nc = a%get_ncols() nc = a%get_ncols()
nza = a%get_nzeros() nza = max(a%get_nzeros(),ione)
b%psb_lc_base_sparse_mat = a%psb_lc_base_sparse_mat b%psb_lc_base_sparse_mat = a%psb_lc_base_sparse_mat
call b%set_nzeros(a%get_nzeros()) call b%set_nzeros(a%get_nzeros())

@ -3819,6 +3819,7 @@ contains
integer(psb_ipk_) :: ma, nb integer(psb_ipk_) :: ma, nb
integer(psb_ipk_), allocatable :: col_inds(:), offsets(:) integer(psb_ipk_), allocatable :: col_inds(:), offsets(:)
integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx
integer(psb_ipk_) :: nth, lth,ith
ma = a%get_nrows() ma = a%get_nrows()
nb = b%get_ncols() nb = b%get_ncols()
@ -3829,12 +3830,19 @@ contains
! dense accumulator ! dense accumulator
! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf ! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf
call psb_realloc(nb, acc, info) call psb_realloc(nb, acc, info)
!$omp parallel shared(nth,lth)
!$omp single
nth = omp_get_num_threads()
lth = min(nth, ma)
!$omp end single
!$omp end parallel
allocate(offsets(omp_get_max_threads())) allocate(offsets(omp_get_max_threads()))
!$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) & !$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) &
!$omp shared(a,b,c,offsets) !$omp num_threads(lth) shared(a,b,c,offsets)
thread_upperbound = 0 thread_upperbound = 0
start_idx = 0 start_idx = 0
end_idx = 0
!$omp do schedule(static) private(irw, jj, j) !$omp do schedule(static) private(irw, jj, j)
do irw = 1, ma do irw = 1, ma
if (start_idx == 0) then if (start_idx == 0) then
@ -3890,15 +3898,14 @@ contains
!$omp end single !$omp end single
!$omp barrier !$omp barrier
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
if (omp_get_thread_num() /= 0) then if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
end if end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
!$omp barrier !$omp barrier
!$omp single !$omp single
@ -3906,9 +3913,10 @@ contains
call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%val, info)
call psb_realloc(c%irp(ma + 1), c%ja, info) call psb_realloc(c%irp(ma + 1), c%ja, info)
!$omp end single !$omp end single
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
end if
!$omp end parallel !$omp end parallel
end subroutine spmm_omp_gustavson end subroutine spmm_omp_gustavson
@ -3944,6 +3952,7 @@ contains
!$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets) !$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets)
thread_upperbound = 0 thread_upperbound = 0
start_idx = 0 start_idx = 0
end_idx = 0
!$omp do schedule(static) private(irw, jj, j) !$omp do schedule(static) private(irw, jj, j)
do irw = 1, ma do irw = 1, ma
do jj = a%irp(irw), a%irp(irw + 1) - 1 do jj = a%irp(irw), a%irp(irw + 1) - 1
@ -4010,14 +4019,14 @@ contains
!$omp barrier !$omp barrier
if (omp_get_thread_num() /= 0) then if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
end if end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
!$omp barrier !$omp barrier
!$omp single !$omp single
@ -4025,9 +4034,10 @@ contains
call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%val, info)
call psb_realloc(c%irp(ma + 1), c%ja, info) call psb_realloc(c%irp(ma + 1), c%ja, info)
!$omp end single !$omp end single
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
end if
!$omp end parallel !$omp end parallel
end subroutine spmm_omp_gustavson_1d end subroutine spmm_omp_gustavson_1d
@ -6296,7 +6306,7 @@ subroutine psb_lc_mv_csr_to_coo(a,b,info)
if (a%is_dev()) call a%sync() if (a%is_dev()) call a%sync()
nr = a%get_nrows() nr = a%get_nrows()
nc = a%get_ncols() nc = a%get_ncols()
nza = a%get_nzeros() nza = max(a%get_nzeros(),ione)
b%psb_lc_base_sparse_mat = a%psb_lc_base_sparse_mat b%psb_lc_base_sparse_mat = a%psb_lc_base_sparse_mat
call b%set_nzeros(a%get_nzeros()) call b%set_nzeros(a%get_nzeros())

@ -2051,8 +2051,8 @@ subroutine psb_d_base_vect_mv(alpha,a,x,beta,y,info,trans)
! For the time being we just throw everything back ! For the time being we just throw everything back
! onto the normal routines. ! onto the normal routines.
call x%sync() if (x%is_dev()) call x%sync()
call y%sync() if (y%is_dev()) call y%sync()
call a%spmm(alpha,x%v,beta,y%v,info,trans) call a%spmm(alpha,x%v,beta,y%v,info,trans)
call y%set_host() call y%set_host()
end subroutine psb_d_base_vect_mv end subroutine psb_d_base_vect_mv
@ -2105,8 +2105,6 @@ subroutine psb_d_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d)
goto 9999 goto 9999
end if end if
call x%sync()
call y%sync()
if (present(d)) then if (present(d)) then
call d%sync() call d%sync()
if (present(scale)) then if (present(scale)) then
@ -2206,8 +2204,11 @@ subroutine psb_d_base_inner_vect_sv(alpha,a,x,beta,y,info,trans)
info = psb_success_ info = psb_success_
call psb_erractionsave(err_act) call psb_erractionsave(err_act)
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
call a%inner_spsm(alpha,x%v,beta,y%v,info,trans) call a%inner_spsm(alpha,x%v,beta,y%v,info,trans)
call y%set_host()
if (info /= psb_success_) then if (info /= psb_success_) then
info = psb_err_from_subroutine_ info = psb_err_from_subroutine_

@ -2171,7 +2171,7 @@ subroutine psb_d_mv_csc_to_coo(a,b,info)
nr = a%get_nrows() nr = a%get_nrows()
nc = a%get_ncols() nc = a%get_ncols()
nza = a%get_nzeros() nza = max(a%get_nzeros(),ione)
b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat
call b%set_nzeros(a%get_nzeros()) call b%set_nzeros(a%get_nzeros())
@ -2336,7 +2336,7 @@ subroutine psb_d_cp_csc_to_fmt(a,b,info)
if (a%is_dev()) call a%sync() if (a%is_dev()) call a%sync()
b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat
nc = a%get_ncols() nc = a%get_ncols()
nz = a%get_nzeros() nz = max(a%get_nzeros(),ione)
if (.false.) then if (.false.) then
if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info) if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info)
if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info) if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info)
@ -2469,7 +2469,7 @@ subroutine psb_d_cp_csc_from_fmt(a,b,info)
if (b%is_dev()) call b%sync() if (b%is_dev()) call b%sync()
a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat
nc = b%get_ncols() nc = b%get_ncols()
nz = b%get_nzeros() nz = max(b%get_nzeros(),ione)
if (.false.) then if (.false.) then
if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info) if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info)
if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info) if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info)
@ -4066,7 +4066,7 @@ subroutine psb_ld_mv_csc_to_coo(a,b,info)
nr = a%get_nrows() nr = a%get_nrows()
nc = a%get_ncols() nc = a%get_ncols()
nza = a%get_nzeros() nza = max(a%get_nzeros(),ione)
b%psb_ld_base_sparse_mat = a%psb_ld_base_sparse_mat b%psb_ld_base_sparse_mat = a%psb_ld_base_sparse_mat
call b%set_nzeros(a%get_nzeros()) call b%set_nzeros(a%get_nzeros())

@ -3819,6 +3819,7 @@ contains
integer(psb_ipk_) :: ma, nb integer(psb_ipk_) :: ma, nb
integer(psb_ipk_), allocatable :: col_inds(:), offsets(:) integer(psb_ipk_), allocatable :: col_inds(:), offsets(:)
integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx
integer(psb_ipk_) :: nth, lth,ith
ma = a%get_nrows() ma = a%get_nrows()
nb = b%get_ncols() nb = b%get_ncols()
@ -3829,12 +3830,19 @@ contains
! dense accumulator ! dense accumulator
! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf ! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf
call psb_realloc(nb, acc, info) call psb_realloc(nb, acc, info)
!$omp parallel shared(nth,lth)
!$omp single
nth = omp_get_num_threads()
lth = min(nth, ma)
!$omp end single
!$omp end parallel
allocate(offsets(omp_get_max_threads())) allocate(offsets(omp_get_max_threads()))
!$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) & !$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) &
!$omp shared(a,b,c,offsets) !$omp num_threads(lth) shared(a,b,c,offsets)
thread_upperbound = 0 thread_upperbound = 0
start_idx = 0 start_idx = 0
end_idx = 0
!$omp do schedule(static) private(irw, jj, j) !$omp do schedule(static) private(irw, jj, j)
do irw = 1, ma do irw = 1, ma
if (start_idx == 0) then if (start_idx == 0) then
@ -3890,15 +3898,14 @@ contains
!$omp end single !$omp end single
!$omp barrier !$omp barrier
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
if (omp_get_thread_num() /= 0) then if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
end if end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
!$omp barrier !$omp barrier
!$omp single !$omp single
@ -3906,9 +3913,10 @@ contains
call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%val, info)
call psb_realloc(c%irp(ma + 1), c%ja, info) call psb_realloc(c%irp(ma + 1), c%ja, info)
!$omp end single !$omp end single
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
end if
!$omp end parallel !$omp end parallel
end subroutine spmm_omp_gustavson end subroutine spmm_omp_gustavson
@ -3944,6 +3952,7 @@ contains
!$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets) !$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets)
thread_upperbound = 0 thread_upperbound = 0
start_idx = 0 start_idx = 0
end_idx = 0
!$omp do schedule(static) private(irw, jj, j) !$omp do schedule(static) private(irw, jj, j)
do irw = 1, ma do irw = 1, ma
do jj = a%irp(irw), a%irp(irw + 1) - 1 do jj = a%irp(irw), a%irp(irw + 1) - 1
@ -4010,14 +4019,14 @@ contains
!$omp barrier !$omp barrier
if (omp_get_thread_num() /= 0) then if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
end if end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
!$omp barrier !$omp barrier
!$omp single !$omp single
@ -4025,9 +4034,10 @@ contains
call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%val, info)
call psb_realloc(c%irp(ma + 1), c%ja, info) call psb_realloc(c%irp(ma + 1), c%ja, info)
!$omp end single !$omp end single
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
end if
!$omp end parallel !$omp end parallel
end subroutine spmm_omp_gustavson_1d end subroutine spmm_omp_gustavson_1d
@ -6296,7 +6306,7 @@ subroutine psb_ld_mv_csr_to_coo(a,b,info)
if (a%is_dev()) call a%sync() if (a%is_dev()) call a%sync()
nr = a%get_nrows() nr = a%get_nrows()
nc = a%get_ncols() nc = a%get_ncols()
nza = a%get_nzeros() nza = max(a%get_nzeros(),ione)
b%psb_ld_base_sparse_mat = a%psb_ld_base_sparse_mat b%psb_ld_base_sparse_mat = a%psb_ld_base_sparse_mat
call b%set_nzeros(a%get_nzeros()) call b%set_nzeros(a%get_nzeros())

@ -2051,8 +2051,8 @@ subroutine psb_s_base_vect_mv(alpha,a,x,beta,y,info,trans)
! For the time being we just throw everything back ! For the time being we just throw everything back
! onto the normal routines. ! onto the normal routines.
call x%sync() if (x%is_dev()) call x%sync()
call y%sync() if (y%is_dev()) call y%sync()
call a%spmm(alpha,x%v,beta,y%v,info,trans) call a%spmm(alpha,x%v,beta,y%v,info,trans)
call y%set_host() call y%set_host()
end subroutine psb_s_base_vect_mv end subroutine psb_s_base_vect_mv
@ -2105,8 +2105,6 @@ subroutine psb_s_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d)
goto 9999 goto 9999
end if end if
call x%sync()
call y%sync()
if (present(d)) then if (present(d)) then
call d%sync() call d%sync()
if (present(scale)) then if (present(scale)) then
@ -2206,8 +2204,11 @@ subroutine psb_s_base_inner_vect_sv(alpha,a,x,beta,y,info,trans)
info = psb_success_ info = psb_success_
call psb_erractionsave(err_act) call psb_erractionsave(err_act)
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
call a%inner_spsm(alpha,x%v,beta,y%v,info,trans) call a%inner_spsm(alpha,x%v,beta,y%v,info,trans)
call y%set_host()
if (info /= psb_success_) then if (info /= psb_success_) then
info = psb_err_from_subroutine_ info = psb_err_from_subroutine_

@ -2171,7 +2171,7 @@ subroutine psb_s_mv_csc_to_coo(a,b,info)
nr = a%get_nrows() nr = a%get_nrows()
nc = a%get_ncols() nc = a%get_ncols()
nza = a%get_nzeros() nza = max(a%get_nzeros(),ione)
b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat
call b%set_nzeros(a%get_nzeros()) call b%set_nzeros(a%get_nzeros())
@ -2336,7 +2336,7 @@ subroutine psb_s_cp_csc_to_fmt(a,b,info)
if (a%is_dev()) call a%sync() if (a%is_dev()) call a%sync()
b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat
nc = a%get_ncols() nc = a%get_ncols()
nz = a%get_nzeros() nz = max(a%get_nzeros(),ione)
if (.false.) then if (.false.) then
if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info) if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info)
if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info) if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info)
@ -2469,7 +2469,7 @@ subroutine psb_s_cp_csc_from_fmt(a,b,info)
if (b%is_dev()) call b%sync() if (b%is_dev()) call b%sync()
a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat
nc = b%get_ncols() nc = b%get_ncols()
nz = b%get_nzeros() nz = max(b%get_nzeros(),ione)
if (.false.) then if (.false.) then
if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info) if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info)
if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info) if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info)
@ -4066,7 +4066,7 @@ subroutine psb_ls_mv_csc_to_coo(a,b,info)
nr = a%get_nrows() nr = a%get_nrows()
nc = a%get_ncols() nc = a%get_ncols()
nza = a%get_nzeros() nza = max(a%get_nzeros(),ione)
b%psb_ls_base_sparse_mat = a%psb_ls_base_sparse_mat b%psb_ls_base_sparse_mat = a%psb_ls_base_sparse_mat
call b%set_nzeros(a%get_nzeros()) call b%set_nzeros(a%get_nzeros())

@ -3819,6 +3819,7 @@ contains
integer(psb_ipk_) :: ma, nb integer(psb_ipk_) :: ma, nb
integer(psb_ipk_), allocatable :: col_inds(:), offsets(:) integer(psb_ipk_), allocatable :: col_inds(:), offsets(:)
integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx
integer(psb_ipk_) :: nth, lth,ith
ma = a%get_nrows() ma = a%get_nrows()
nb = b%get_ncols() nb = b%get_ncols()
@ -3829,12 +3830,19 @@ contains
! dense accumulator ! dense accumulator
! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf ! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf
call psb_realloc(nb, acc, info) call psb_realloc(nb, acc, info)
!$omp parallel shared(nth,lth)
!$omp single
nth = omp_get_num_threads()
lth = min(nth, ma)
!$omp end single
!$omp end parallel
allocate(offsets(omp_get_max_threads())) allocate(offsets(omp_get_max_threads()))
!$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) & !$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) &
!$omp shared(a,b,c,offsets) !$omp num_threads(lth) shared(a,b,c,offsets)
thread_upperbound = 0 thread_upperbound = 0
start_idx = 0 start_idx = 0
end_idx = 0
!$omp do schedule(static) private(irw, jj, j) !$omp do schedule(static) private(irw, jj, j)
do irw = 1, ma do irw = 1, ma
if (start_idx == 0) then if (start_idx == 0) then
@ -3890,15 +3898,14 @@ contains
!$omp end single !$omp end single
!$omp barrier !$omp barrier
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
if (omp_get_thread_num() /= 0) then if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
end if end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
!$omp barrier !$omp barrier
!$omp single !$omp single
@ -3906,9 +3913,10 @@ contains
call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%val, info)
call psb_realloc(c%irp(ma + 1), c%ja, info) call psb_realloc(c%irp(ma + 1), c%ja, info)
!$omp end single !$omp end single
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
end if
!$omp end parallel !$omp end parallel
end subroutine spmm_omp_gustavson end subroutine spmm_omp_gustavson
@ -3944,6 +3952,7 @@ contains
!$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets) !$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets)
thread_upperbound = 0 thread_upperbound = 0
start_idx = 0 start_idx = 0
end_idx = 0
!$omp do schedule(static) private(irw, jj, j) !$omp do schedule(static) private(irw, jj, j)
do irw = 1, ma do irw = 1, ma
do jj = a%irp(irw), a%irp(irw + 1) - 1 do jj = a%irp(irw), a%irp(irw + 1) - 1
@ -4010,14 +4019,14 @@ contains
!$omp barrier !$omp barrier
if (omp_get_thread_num() /= 0) then if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
end if end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
!$omp barrier !$omp barrier
!$omp single !$omp single
@ -4025,9 +4034,10 @@ contains
call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%val, info)
call psb_realloc(c%irp(ma + 1), c%ja, info) call psb_realloc(c%irp(ma + 1), c%ja, info)
!$omp end single !$omp end single
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
end if
!$omp end parallel !$omp end parallel
end subroutine spmm_omp_gustavson_1d end subroutine spmm_omp_gustavson_1d
@ -6296,7 +6306,7 @@ subroutine psb_ls_mv_csr_to_coo(a,b,info)
if (a%is_dev()) call a%sync() if (a%is_dev()) call a%sync()
nr = a%get_nrows() nr = a%get_nrows()
nc = a%get_ncols() nc = a%get_ncols()
nza = a%get_nzeros() nza = max(a%get_nzeros(),ione)
b%psb_ls_base_sparse_mat = a%psb_ls_base_sparse_mat b%psb_ls_base_sparse_mat = a%psb_ls_base_sparse_mat
call b%set_nzeros(a%get_nzeros()) call b%set_nzeros(a%get_nzeros())

@ -2051,8 +2051,8 @@ subroutine psb_z_base_vect_mv(alpha,a,x,beta,y,info,trans)
! For the time being we just throw everything back ! For the time being we just throw everything back
! onto the normal routines. ! onto the normal routines.
call x%sync() if (x%is_dev()) call x%sync()
call y%sync() if (y%is_dev()) call y%sync()
call a%spmm(alpha,x%v,beta,y%v,info,trans) call a%spmm(alpha,x%v,beta,y%v,info,trans)
call y%set_host() call y%set_host()
end subroutine psb_z_base_vect_mv end subroutine psb_z_base_vect_mv
@ -2105,8 +2105,6 @@ subroutine psb_z_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d)
goto 9999 goto 9999
end if end if
call x%sync()
call y%sync()
if (present(d)) then if (present(d)) then
call d%sync() call d%sync()
if (present(scale)) then if (present(scale)) then
@ -2206,8 +2204,11 @@ subroutine psb_z_base_inner_vect_sv(alpha,a,x,beta,y,info,trans)
info = psb_success_ info = psb_success_
call psb_erractionsave(err_act) call psb_erractionsave(err_act)
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
call a%inner_spsm(alpha,x%v,beta,y%v,info,trans) call a%inner_spsm(alpha,x%v,beta,y%v,info,trans)
call y%set_host()
if (info /= psb_success_) then if (info /= psb_success_) then
info = psb_err_from_subroutine_ info = psb_err_from_subroutine_

@ -2171,7 +2171,7 @@ subroutine psb_z_mv_csc_to_coo(a,b,info)
nr = a%get_nrows() nr = a%get_nrows()
nc = a%get_ncols() nc = a%get_ncols()
nza = a%get_nzeros() nza = max(a%get_nzeros(),ione)
b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat
call b%set_nzeros(a%get_nzeros()) call b%set_nzeros(a%get_nzeros())
@ -2336,7 +2336,7 @@ subroutine psb_z_cp_csc_to_fmt(a,b,info)
if (a%is_dev()) call a%sync() if (a%is_dev()) call a%sync()
b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat
nc = a%get_ncols() nc = a%get_ncols()
nz = a%get_nzeros() nz = max(a%get_nzeros(),ione)
if (.false.) then if (.false.) then
if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info) if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info)
if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info) if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info)
@ -2469,7 +2469,7 @@ subroutine psb_z_cp_csc_from_fmt(a,b,info)
if (b%is_dev()) call b%sync() if (b%is_dev()) call b%sync()
a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat
nc = b%get_ncols() nc = b%get_ncols()
nz = b%get_nzeros() nz = max(b%get_nzeros(),ione)
if (.false.) then if (.false.) then
if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info) if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info)
if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info) if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info)
@ -4066,7 +4066,7 @@ subroutine psb_lz_mv_csc_to_coo(a,b,info)
nr = a%get_nrows() nr = a%get_nrows()
nc = a%get_ncols() nc = a%get_ncols()
nza = a%get_nzeros() nza = max(a%get_nzeros(),ione)
b%psb_lz_base_sparse_mat = a%psb_lz_base_sparse_mat b%psb_lz_base_sparse_mat = a%psb_lz_base_sparse_mat
call b%set_nzeros(a%get_nzeros()) call b%set_nzeros(a%get_nzeros())

@ -3819,6 +3819,7 @@ contains
integer(psb_ipk_) :: ma, nb integer(psb_ipk_) :: ma, nb
integer(psb_ipk_), allocatable :: col_inds(:), offsets(:) integer(psb_ipk_), allocatable :: col_inds(:), offsets(:)
integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx
integer(psb_ipk_) :: nth, lth,ith
ma = a%get_nrows() ma = a%get_nrows()
nb = b%get_ncols() nb = b%get_ncols()
@ -3829,12 +3830,19 @@ contains
! dense accumulator ! dense accumulator
! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf ! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf
call psb_realloc(nb, acc, info) call psb_realloc(nb, acc, info)
!$omp parallel shared(nth,lth)
!$omp single
nth = omp_get_num_threads()
lth = min(nth, ma)
!$omp end single
!$omp end parallel
allocate(offsets(omp_get_max_threads())) allocate(offsets(omp_get_max_threads()))
!$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) & !$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) &
!$omp shared(a,b,c,offsets) !$omp num_threads(lth) shared(a,b,c,offsets)
thread_upperbound = 0 thread_upperbound = 0
start_idx = 0 start_idx = 0
end_idx = 0
!$omp do schedule(static) private(irw, jj, j) !$omp do schedule(static) private(irw, jj, j)
do irw = 1, ma do irw = 1, ma
if (start_idx == 0) then if (start_idx == 0) then
@ -3890,15 +3898,14 @@ contains
!$omp end single !$omp end single
!$omp barrier !$omp barrier
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
if (omp_get_thread_num() /= 0) then if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
end if end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
!$omp barrier !$omp barrier
!$omp single !$omp single
@ -3906,9 +3913,10 @@ contains
call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%val, info)
call psb_realloc(c%irp(ma + 1), c%ja, info) call psb_realloc(c%irp(ma + 1), c%ja, info)
!$omp end single !$omp end single
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
end if
!$omp end parallel !$omp end parallel
end subroutine spmm_omp_gustavson end subroutine spmm_omp_gustavson
@ -3944,6 +3952,7 @@ contains
!$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets) !$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets)
thread_upperbound = 0 thread_upperbound = 0
start_idx = 0 start_idx = 0
end_idx = 0
!$omp do schedule(static) private(irw, jj, j) !$omp do schedule(static) private(irw, jj, j)
do irw = 1, ma do irw = 1, ma
do jj = a%irp(irw), a%irp(irw + 1) - 1 do jj = a%irp(irw), a%irp(irw + 1) - 1
@ -4010,14 +4019,14 @@ contains
!$omp barrier !$omp barrier
if (omp_get_thread_num() /= 0) then if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
end if end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
!$omp barrier !$omp barrier
!$omp single !$omp single
@ -4025,9 +4034,10 @@ contains
call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%val, info)
call psb_realloc(c%irp(ma + 1), c%ja, info) call psb_realloc(c%irp(ma + 1), c%ja, info)
!$omp end single !$omp end single
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
end if
!$omp end parallel !$omp end parallel
end subroutine spmm_omp_gustavson_1d end subroutine spmm_omp_gustavson_1d
@ -6296,7 +6306,7 @@ subroutine psb_lz_mv_csr_to_coo(a,b,info)
if (a%is_dev()) call a%sync() if (a%is_dev()) call a%sync()
nr = a%get_nrows() nr = a%get_nrows()
nc = a%get_ncols() nc = a%get_ncols()
nza = a%get_nzeros() nza = max(a%get_nzeros(),ione)
b%psb_lz_base_sparse_mat = a%psb_lz_base_sparse_mat b%psb_lz_base_sparse_mat = a%psb_lz_base_sparse_mat
call b%set_nzeros(a%get_nzeros()) call b%set_nzeros(a%get_nzeros())

Loading…
Cancel
Save