Improve implementation of fix_coo using exscan

omp-threadsafe
sfilippone 2 years ago
parent 91d3e66547
commit 40cc78854a

@ -1866,18 +1866,6 @@ module psb_c_base_mat_mod
end subroutine psb_c_fix_coo_inner end subroutine psb_c_fix_coo_inner
end interface end interface
interface
subroutine psb_c_fix_coo_inner_colmajor(nr,nc,nzin,dupl,&
& ia,ja,val,iaux,nzout,info)
import
integer(psb_ipk_), intent(in) :: nr,nc,nzin,dupl
integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:)
complex(psb_spk_), intent(inout) :: val(:)
integer(psb_ipk_), intent(out) :: nzout
integer(psb_ipk_), intent(out) :: info
end subroutine psb_c_fix_coo_inner_colmajor
end interface
interface interface
subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,& subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,&
& ia,ja,val,iaux,nzout,info) & ia,ja,val,iaux,nzout,info)

@ -1866,18 +1866,6 @@ module psb_d_base_mat_mod
end subroutine psb_d_fix_coo_inner end subroutine psb_d_fix_coo_inner
end interface end interface
interface
subroutine psb_d_fix_coo_inner_colmajor(nr,nc,nzin,dupl,&
& ia,ja,val,iaux,nzout,info)
import
integer(psb_ipk_), intent(in) :: nr,nc,nzin,dupl
integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:)
real(psb_dpk_), intent(inout) :: val(:)
integer(psb_ipk_), intent(out) :: nzout
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_fix_coo_inner_colmajor
end interface
interface interface
subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,& subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,&
& ia,ja,val,iaux,nzout,info) & ia,ja,val,iaux,nzout,info)

@ -1866,18 +1866,6 @@ module psb_s_base_mat_mod
end subroutine psb_s_fix_coo_inner end subroutine psb_s_fix_coo_inner
end interface end interface
interface
subroutine psb_s_fix_coo_inner_colmajor(nr,nc,nzin,dupl,&
& ia,ja,val,iaux,nzout,info)
import
integer(psb_ipk_), intent(in) :: nr,nc,nzin,dupl
integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:)
real(psb_spk_), intent(inout) :: val(:)
integer(psb_ipk_), intent(out) :: nzout
integer(psb_ipk_), intent(out) :: info
end subroutine psb_s_fix_coo_inner_colmajor
end interface
interface interface
subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,& subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,&
& ia,ja,val,iaux,nzout,info) & ia,ja,val,iaux,nzout,info)

@ -1866,18 +1866,6 @@ module psb_z_base_mat_mod
end subroutine psb_z_fix_coo_inner end subroutine psb_z_fix_coo_inner
end interface end interface
interface
subroutine psb_z_fix_coo_inner_colmajor(nr,nc,nzin,dupl,&
& ia,ja,val,iaux,nzout,info)
import
integer(psb_ipk_), intent(in) :: nr,nc,nzin,dupl
integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:)
complex(psb_dpk_), intent(inout) :: val(:)
integer(psb_ipk_), intent(out) :: nzout
integer(psb_ipk_), intent(out) :: info
end subroutine psb_z_fix_coo_inner_colmajor
end interface
interface interface
subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,& subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,&
& ia,ja,val,iaux,nzout,info) & ia,ja,val,iaux,nzout,info)

@ -3573,7 +3573,7 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
character(len=20) :: name = 'psb_fixcoo' character(len=20) :: name = 'psb_fixcoo'
logical :: srt_inp, use_buffers logical :: srt_inp, use_buffers
#if defined(OPENMP) #if defined(OPENMP)
integer(psb_ipk_) :: work,first_idx,last_idx,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread
integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads
integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:)
#endif #endif
@ -3632,8 +3632,6 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
! Dirty trick: call ROWMAJOR with rows <-> columns ! Dirty trick: call ROWMAJOR with rows <-> columns
call psb_c_fix_coo_inner_rowmajor(nc,nr,nzin,dupl,& call psb_c_fix_coo_inner_rowmajor(nc,nr,nzin,dupl,&
& ja,ia,val,iaux,nzout,info) & ja,ia,val,iaux,nzout,info)
!!$ call psb_c_fix_coo_inner_colmajor(nr,nc,nzin,dupl,&
!!$ & ia,ja,val,iaux,nzout,info)
case default case default
write(debug_unit,*) trim(name),': unknown direction ',idir_ write(debug_unit,*) trim(name),': unknown direction ',idir_
info = psb_err_internal_error_ info = psb_err_internal_error_
@ -3677,7 +3675,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
character(len=20) :: name = 'psb_fixcoo' character(len=20) :: name = 'psb_fixcoo'
logical :: srt_inp, use_buffers logical :: srt_inp, use_buffers
#if defined(OPENMP) #if defined(OPENMP)
integer(psb_ipk_) :: work,first_idx,last_idx,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread
integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads
integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:)
#endif #endif
@ -3760,8 +3758,8 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
! index for each row. We do the same on 'kaux' ! index for each row. We do the same on 'kaux'
!$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!$OMP shared(idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) & !$OMP shared(idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) &
!$OMP private(s,i,j,k,ithread,first_idx,last_idx,work,nxt_val,old_val,saved_elem, & !$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, &
!$OMP first_elem,last_elem,nzl,iret,act_row) !$OMP first_elem,last_elem,nzl,iret,act_row) reduction(max: info)
!$OMP SINGLE !$OMP SINGLE
nthreads = omp_get_num_threads() nthreads = omp_get_num_threads()
@ -3774,72 +3772,32 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
work = nr/nthreads work = nr/nthreads
if (ithread < MOD(nr,nthreads)) then if (ithread < MOD(nr,nthreads)) then
work = work + 1 work = work + 1
first_idx = ithread*work + 1 idxstart = ithread*work + 1
else else
first_idx = ithread*work + MOD(nr,nthreads) + 1 idxstart = ithread*work + MOD(nr,nthreads) + 1
end if end if
last_idx = first_idx + work - 1 idxend = idxstart + work - 1
!-------------------------------------------
! ------------ iaux composition --------------
! 'iaux' will have the start index for each row
s = 0
do i=first_idx,last_idx
s = s + iaux(i)
end do
sum(ithread+2) = s
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
sum(i) = sum(i) + sum(i-1)
end do
!$OMP END SINGLE
if (work > 0) then
saved_elem = iaux(first_idx)
end if
if (ithread == 0) then
iaux(1) = 1
end if
!$OMP BARRIER
if (work > 0) then
old_val = iaux(first_idx+1)
iaux(first_idx+1) = saved_elem + sum(ithread+1)
end if
do i=first_idx+2,last_idx+1
nxt_val = iaux(i)
iaux(i) = iaux(i-1) + old_val
old_val = nxt_val
end do
! --------------------------------------
!write(0,*) 'fix_coo_inner: trying with exscan'
call psi_exscan(nr+1,iaux,info,shift=ione)
!$OMP BARRIER !$OMP BARRIER
! ------------------ Sorting and buffers ------------------- ! ------------------ Sorting and buffers -------------------
! Let's use an auxiliary buffer, 'idxaux', to get indices leaving ! Let's use an auxiliary buffer, 'idxaux', to get indices leaving
! unmodified 'iaux' ! unmodified 'iaux'
do j=first_idx,last_idx do j=idxstart,idxend
idxaux(j) = iaux(j) idxaux(j) = iaux(j)
end do end do
! Here we sort data inside the auxiliary buffers ! Here we sort data inside the auxiliary buffers
do i=1,nzin do i=1,nzin
act_row = ia(i) act_row = ia(i)
if ((act_row >= first_idx) .and. (act_row <= last_idx)) then if ((act_row >= idxstart) .and. (act_row <= idxend)) then
ias(idxaux(act_row)) = ia(i) ias(idxaux(act_row)) = ia(i)
jas(idxaux(act_row)) = ja(i) jas(idxaux(act_row)) = ja(i)
vs(idxaux(act_row)) = val(i) vs(idxaux(act_row)) = val(i)
idxaux(act_row) = idxaux(act_row) + 1 idxaux(act_row) = idxaux(act_row) + 1
end if end if
end do end do
@ -3848,7 +3806,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
! Let's sort column indices and values. After that we will store ! Let's sort column indices and values. After that we will store
! the number of unique values in 'kaux' ! the number of unique values in 'kaux'
do j=first_idx,last_idx do j=idxstart,idxend
first_elem = iaux(j) first_elem = iaux(j)
last_elem = iaux(j+1) - 1 last_elem = iaux(j+1) - 1
nzl = last_elem - first_elem + 1 nzl = last_elem - first_elem + 1
@ -3877,45 +3835,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
! -------------------------------------------------- ! --------------------------------------------------
! ---------------- kaux composition ---------------- ! ---------------- kaux composition ----------------
!$OMP SINGLE call psi_exscan(nr+1,kaux,i,shift=ione)
sum(:) = 0
sum(1) = 1
!$OMP END SINGLE
s = 0
do i=first_idx,last_idx
s = s + kaux(i)
end do
sum(ithread+2) = s
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
sum(i) = sum(i) + sum(i-1)
end do
kaux(nr+1) = sum(nthreads+1)
!$OMP END SINGLE
if (work > 0) then
saved_elem = kaux(first_idx)
end if
if (ithread == 0) then
kaux(1) = 1
end if
!$OMP BARRIER
if (work > 0) then
old_val = kaux(first_idx+1)
kaux(first_idx+1) = saved_elem + sum(ithread+1)
end if
do i=first_idx+2,last_idx+1
nxt_val = kaux(i)
kaux(i) = kaux(i-1) + old_val
old_val = nxt_val
end do
!$OMP BARRIER !$OMP BARRIER
@ -3923,7 +3843,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
select case(dupl) select case(dupl)
case(psb_dupl_ovwrt_) case(psb_dupl_ovwrt_)
!$OMP DO schedule(STATIC) !$OMP DO schedule(dynamic,32)
do j=1,nr do j=1,nr
first_elem = iaux(j) first_elem = iaux(j)
last_elem = iaux(j+1) - 1 last_elem = iaux(j+1) - 1
@ -3952,7 +3872,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!$OMP END DO !$OMP END DO
case(psb_dupl_add_) case(psb_dupl_add_)
!$OMP DO schedule(STATIC) !$OMP DO schedule(dynamic,32)
do j=1,nr do j=1,nr
first_elem = iaux(j) first_elem = iaux(j)
last_elem = iaux(j+1) - 1 last_elem = iaux(j+1) - 1
@ -3981,7 +3901,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!$OMP END DO !$OMP END DO
case(psb_dupl_err_) case(psb_dupl_err_)
!$OMP DO schedule(STATIC) !$OMP DO schedule(dynamic,32)
do j=1,nr do j=1,nr
first_elem = iaux(j) first_elem = iaux(j)
last_elem = iaux(j+1) - 1 last_elem = iaux(j+1) - 1
@ -4186,7 +4106,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
#if defined(OPENMP) #if defined(OPENMP)
!$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) & !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) &
!$OMP private(i,j,first_idx,last_idx,nzl,act_row,iret,ithread, & !$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, &
!$OMP work,first_elem,last_elem) !$OMP work,first_elem,last_elem)
!$OMP SINGLE !$OMP SINGLE
@ -4200,22 +4120,22 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
work = nr/nthreads work = nr/nthreads
if (ithread < MOD(nr,nthreads)) then if (ithread < MOD(nr,nthreads)) then
work = work + 1 work = work + 1
first_idx = ithread*work + 1 idxstart = ithread*work + 1
else else
first_idx = ithread*work + MOD(nr,nthreads) + 1 idxstart = ithread*work + MOD(nr,nthreads) + 1
end if end if
last_idx = first_idx + work - 1 idxend = idxstart + work - 1
! --------------------------------------------------- ! ---------------------------------------------------
first_elem = 0 first_elem = 0
last_elem = -1 last_elem = -1
act_row = first_idx act_row = idxstart
do j=1,nzin do j=1,nzin
if (ia(j) < act_row) then if (ia(j) < act_row) then
cycle cycle
else if ((ia(j) > last_idx) .or. (work < 1)) then else if ((ia(j) > idxend) .or. (work < 1)) then
exit exit
else if (ia(j) > act_row) then else if (ia(j) > act_row) then
nzl = last_elem - first_elem + 1 nzl = last_elem - first_elem + 1
@ -4345,676 +4265,6 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
end subroutine psb_c_fix_coo_inner_rowmajor end subroutine psb_c_fix_coo_inner_rowmajor
subroutine psb_c_fix_coo_inner_colmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,info)
use psb_const_mod
use psb_error_mod
use psb_c_base_mat_mod, psb_protect_name => psb_c_fix_coo_inner_colmajor
use psb_string_mod
use psb_ip_reord_mod
use psb_sort_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none
integer(psb_ipk_), intent(in) :: nr, nc, nzin, dupl
integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:)
complex(psb_spk_), intent(inout) :: val(:)
integer(psb_ipk_), intent(out) :: nzout, info
!locals
integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:)
complex(psb_spk_), allocatable :: vs(:)
integer(psb_ipk_) :: nza, nzl,iret
integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name = 'psb_fixcoo'
logical :: srt_inp, use_buffers
#if defined(OPENMP)
integer(psb_ipk_) :: work,first_idx,last_idx,first_elem,last_elem,s,nthreads,ithread
integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads
integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:)
#endif
info = psb_success_
call psb_erractionsave(err_act)
debug_unit = psb_get_debug_unit()
debug_level = psb_get_debug_level()
if (nc <= nzin) then
! Avoid strange situations with large indices
#if defined(OPENMP)
allocate(ias(nzin),jas(nzin),vs(nzin), stat=info)
#else
allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info)
#endif
use_buffers = (info == 0)
else
use_buffers = .false.
end if
if (use_buffers) then
iaux(:) = 0
#if defined(OPENMP)
!$OMP PARALLEL DO default(none) schedule(STATIC) &
!$OMP shared(nzin,ja,nc,iaux) &
!$OMP private(i) &
!$OMP reduction(.and.:use_buffers)
do i=1,nzin
if ((ja(i) < 1).or.(ja(i) > nc)) then
use_buffers = .false.
! Invalid indices are placed outside the considered range
ja(i) = nc+2
else
!$OMP ATOMIC UPDATE
iaux(ja(i)) = iaux(ja(i)) + 1
end if
end do
!$OMP END PARALLEL DO
#else
!srt_inp = .true.
do i=1,nzin
if ((ja(i) < 1).or.(ja(i) > nc)) then
use_buffers = .false.
!ja(i) = nc+2
!srt_inp = .false.
exit
end if
iaux(ja(i)) = iaux(ja(i)) + 1
!srt_inp = srt_inp .and.(ja(i-1)<=ja(i))
end do
#endif
end if
!use_buffers=use_buffers.and.srt_inp
! Check again use_buffers. We enter here if nzin >= nc and
! all the indices are valid
if (use_buffers) then
#if defined(OPENMP)
allocate(kaux(MAX(nzin,nc)+2),idxaux(MAX((nr+2)*maxthreads,nc)),sum(maxthreads+1),stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
kaux(:) = 0
sum(:) = 0
sum(1) = 1
err = 0
! Here, starting from 'iaux', we apply a fixing in order to obtain the starting
! index for each column. We do the same on 'kaux'
!$OMP PARALLEL default(none) &
!$OMP shared(idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) &
!$OMP private(s,i,j,k,ithread,first_idx,last_idx,work,nxt_val,old_val,saved_elem, &
!$OMP first_elem,last_elem,nzl,iret,act_col)
!$OMP SINGLE
nthreads = omp_get_num_threads()
!$OMP END SINGLE
ithread = omp_get_thread_num()
! -------- thread-specific workload --------
work = nc/nthreads
if (ithread < MOD(nc,nthreads)) then
work = work + 1
first_idx = ithread*work + 1
else
first_idx = ithread*work + MOD(nc,nthreads) + 1
end if
last_idx = first_idx + work - 1
!-------------------------------------------
! ---------- iaux composition --------------
s = 0
do i=first_idx,last_idx
s = s + iaux(i)
end do
sum(ithread+2) = s
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
sum(i) = sum(i) + sum(i-1)
end do
!$OMP END SINGLE
if (work > 0) then
saved_elem = iaux(first_idx)
end if
if (ithread == 0) then
iaux(1) = 1
end if
!$OMP BARRIER
if (work > 0) then
old_val = iaux(first_idx+1)
iaux(first_idx+1) = saved_elem + sum(ithread+1)
end if
do i=first_idx+2,last_idx+1
nxt_val = iaux(i)
iaux(i) = iaux(i-1) + old_val
old_val = nxt_val
end do
! --------------------------------------
!$OMP BARRIER
! ------------------ Sorting and buffers -------------------
! Let's use an auxiliary buffer to get indices
do j=first_idx,last_idx
idxaux(j) = iaux(j)
end do
! Here we sort data inside the auxiliary buffers
do i=1,nzin
act_col = ja(i)
if (act_col >= first_idx .and. act_col <= last_idx) then
ias(idxaux(act_col)) = ia(i)
jas(idxaux(act_col)) = ja(i)
vs(idxaux(act_col)) = val(i)
idxaux(act_col) = idxaux(act_col) + 1
end if
end do
!$OMP BARRIER
! Let's sort column indices and values. After that we will store
! the number of unique values in 'kaux'
do j=first_idx,last_idx
first_elem = iaux(j)
last_elem = iaux(j+1) - 1
nzl = last_elem - first_elem + 1
! The column has elements?
if (nzl > 0) then
call psi_msort_up(nzl,ias(first_elem:last_elem), &
& idxaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2),iret)
if (iret == 0) then
call psb_ip_reord(nzl,vs(first_elem:last_elem),&
& ias(first_elem:last_elem),jas(first_elem:last_elem), &
& idxaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2))
end if
! Over each column we count the unique values
kaux(j) = 1
do i=first_elem+1,last_elem
if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then
cycle
end if
kaux(j) = kaux(j) + 1
end do
end if
end do
! --------------------------------------------------
! ---------------- kaux composition ----------------
!$OMP SINGLE
sum(:) = 0
sum(1) = 1
!$OMP END SINGLE
s = 0
do i=first_idx,last_idx
s = s + kaux(i)
end do
sum(ithread+2) = s
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
sum(i) = sum(i) + sum(i-1)
end do
!$OMP END SINGLE
if (work > 0) then
saved_elem = kaux(first_idx)
end if
if (ithread == 0) then
kaux(1) = 1
end if
!$OMP BARRIER
if (work > 0) then
old_val = kaux(first_idx+1)
kaux(first_idx+1) = saved_elem + sum(ithread+1)
end if
do i=first_idx+2,last_idx+1
nxt_val = kaux(i)
kaux(i) = kaux(i-1) + old_val
old_val = nxt_val
end do
!$OMP BARRIER
! ------------------------------------------------
select case(dupl)
case(psb_dupl_ovwrt_)
!$OMP DO schedule(STATIC)
do j=1,nc
first_elem = iaux(j)
last_elem = iaux(j+1) - 1
if (first_elem > last_elem) then
cycle
end if
k = kaux(j)
val(k) = vs(first_elem)
ia(k) = ias(first_elem)
ja(k) = jas(first_elem)
do i=first_elem+1,last_elem
if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then
val(k) = vs(i)
else
k = k + 1
val(k) = vs(i)
ia(k) = ias(i)
ja(k) = jas(i)
endif
end do
end do
!$OMP END DO
case(psb_dupl_add_)
!$OMP DO schedule(STATIC)
do j=1,nc
first_elem = iaux(j)
last_elem = iaux(j+1) - 1
if (first_elem > last_elem) then
cycle
end if
k = kaux(j)
val(k) = vs(first_elem)
ia(k) = ias(first_elem)
ja(k) = jas(first_elem)
do i=first_elem+1,last_elem
if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then
val(k) = val(k) + vs(i)
else
k = k + 1
val(k) = vs(i)
ia(k) = ias(i)
ja(k) = jas(i)
endif
end do
end do
!$OMP END DO
case(psb_dupl_err_)
!$OMP DO schedule(STATIC)
do j=1,nc
first_elem = iaux(j)
last_elem = iaux(j+1) - 1
if (first_elem > last_elem) then
cycle
end if
k = kaux(j)
val(k) = vs(first_elem)
ia(k) = ias(first_elem)
ja(k) = jas(first_elem)
do i=first_elem+1,last_elem
if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then
err = 1
else
k = k + 1
val(k) = vs(i)
ia(k) = ias(i)
ja(k) = jas(i)
end if
end do
end do
!$OMP END DO
case default
!$OMP SINGLE
err = 2
!$OMP END SINGLE
end select
!$OMP END PARALLEL
if (err == 1) then
call psb_errpush(psb_err_duplicate_coo,name)
goto 9999
else if (err == 2) then
write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl
info =-7
return
end if
nzout = kaux(nc+1) - 1
deallocate(sum,kaux,idxaux,stat=info)
#else
!if (.not.srt_inp) then
ip = iaux(1)
iaux(1) = 0
do i=2, nc
is = iaux(i)
iaux(i) = ip
ip = ip + is
end do
iaux(nc+1) = ip
do i=1,nzin
icl = ja(i)
ip = iaux(icl) + 1
ias(ip) = ia(i)
jas(ip) = ja(i)
vs(ip) = val(i)
iaux(icl) = ip
end do
!end if
select case(dupl)
case(psb_dupl_ovwrt_)
k = 0
i = 1
do j=1, nc
nzl = iaux(j)-i+1
imx = i+nzl-1
if (nzl > 0) then
call psi_msort_up(nzl,ias(i:imx),ix2,iret)
if (iret == 0) &
& call psb_ip_reord(nzl,vs(i:imx),&
& ias(i:imx),jas(i:imx),ix2)
k = k + 1
ia(k) = ias(i)
ja(k) = jas(i)
val(k) = vs(i)
irw = ia(k)
icl = ja(k)
do
i = i + 1
if (i > imx) exit
if ((ias(i) == irw).and.(jas(i) == icl)) then
val(k) = vs(i)
else
k = k+1
val(k) = vs(i)
ia(k) = ias(i)
ja(k) = jas(i)
irw = ia(k)
icl = ja(k)
endif
enddo
end if
end do
case(psb_dupl_add_)
k = 0
i = 1
do j=1, nc
nzl = iaux(j)-i+1
imx = i+nzl-1
if (nzl > 0) then
call psi_msort_up(nzl,ias(i:imx),ix2,iret)
if (iret == 0) &
& call psb_ip_reord(nzl,vs(i:imx),&
& ias(i:imx),jas(i:imx),ix2)
k = k + 1
ia(k) = ias(i)
ja(k) = jas(i)
val(k) = vs(i)
irw = ia(k)
icl = ja(k)
do
i = i + 1
if (i > imx) exit
if ((ias(i) == irw).and.(jas(i) == icl)) then
val(k) = val(k) + vs(i)
else
k = k+1
val(k) = vs(i)
ia(k) = ias(i)
ja(k) = jas(i)
irw = ia(k)
icl = ja(k)
endif
enddo
end if
end do
case(psb_dupl_err_)
k = 0
i = 1
do j=1, nc
nzl = iaux(j)-i+1
imx = i+nzl-1
if (nzl > 0) then
call psi_msort_up(nzl,ias(i:imx),ix2,iret)
if (iret == 0) &
& call psb_ip_reord(nzl,vs(i:imx),&
& ias(i:imx),jas(i:imx),ix2)
k = k + 1
ia(k) = ias(i)
ja(k) = jas(i)
val(k) = vs(i)
irw = ia(k)
icl = ja(k)
do
i = i + 1
if (i > imx) exit
if ((ias(i) == irw).and.(jas(i) == icl)) then
call psb_errpush(psb_err_duplicate_coo,name)
goto 9999
else
k = k+1
val(k) = vs(i)
ia(k) = ias(i)
ja(k) = jas(i)
irw = ia(k)
icl = ja(k)
endif
enddo
end if
end do
case default
write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl
info =-7
return
end select
nzout = k
deallocate(ix2, stat=info)
#endif
deallocate(ias,jas,vs, stat=info)
else if (.not.use_buffers) then
call psi_msort_up(nzin,ja(1:),iaux(1:),iret)
if (iret == 0) &
& call psb_ip_reord(nzin,val,ia,ja,iaux)
#if defined(OPENMP)
!$OMP PARALLEL default(none) &
!$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) &
!$OMP private(i,j,first_idx,last_idx,nzl,act_col, &
!$OMP iret,ithread,work,first_elem,last_elem)
!$OMP SINGLE
nthreads = omp_get_num_threads()
!$OMP END SINGLE
ithread = omp_get_thread_num()
! -------- thread-specific workload --------
work = nc/nthreads
if (ithread < MOD(nc,nthreads)) then
work = work + 1
first_idx = ithread*work + 1
else
first_idx = ithread*work + MOD(nc,nthreads) + 1
end if
last_idx = first_idx + work - 1
! ---------------------------------------------------
first_elem = 0
last_elem = -1
act_col = first_idx
do j=1,nzin
if (ja(j) < act_col) then
cycle
else if ((ja(j) > last_idx) .or. (work < 1)) then
exit
else if (ja(j) > act_col) then
nzl = last_elem - first_elem + 1
if (nzl > 0) then
call psi_msort_up(nzl,ia(first_elem:),iaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2),iret)
if (iret == 0) &
& call psb_ip_reord(nzl,val(first_elem:last_elem),&
& ia(first_elem:last_elem),ja(first_elem:last_elem),&
& iaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2))
end if
act_col = act_col + 1
first_elem = 0
last_elem = -1
else
if (first_elem == 0) then
first_elem = j
end if
last_elem = j
end if
end do
!$OMP END PARALLEL
#else
i = 1
j = i
do while (i <= nzin)
do while ((ja(j) == ja(i)))
j = j+1
if (j > nzin) exit
enddo
nzl = j - i
call psi_msort_up(nzl,ia(i:),iaux(1:),iret)
if (iret == 0) &
& call psb_ip_reord(nzl,val(i:i+nzl-1),&
& ia(i:i+nzl-1),ja(i:i+nzl-1),iaux)
i = j
enddo
#endif
i = 1
irw = ia(i)
icl = ja(i)
j = 1
select case(dupl)
case(psb_dupl_ovwrt_)
do
j = j + 1
if (j > nzin) exit
if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle
if ((ia(j) == irw).and.(ja(j) == icl)) then
val(i) = val(j)
else
i = i+1
val(i) = val(j)
ia(i) = ia(j)
ja(i) = ja(j)
irw = ia(i)
icl = ja(i)
endif
enddo
case(psb_dupl_add_)
do
j = j + 1
if (j > nzin) exit
if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle
if ((ia(j) == irw).and.(ja(j) == icl)) then
val(i) = val(i) + val(j)
else
i = i+1
val(i) = val(j)
ia(i) = ia(j)
ja(i) = ja(j)
irw = ia(i)
icl = ja(i)
endif
enddo
case(psb_dupl_err_)
do
j = j + 1
if (j > nzin) exit
if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle
if ((ia(j) == irw).and.(ja(j) == icl)) then
call psb_errpush(psb_err_duplicate_coo,name)
goto 9999
else
i = i+1
val(i) = val(j)
ia(i) = ia(j)
ja(i) = ja(j)
irw = ia(i)
icl = ja(i)
endif
enddo
case default
write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl
info =-7
end select
nzout = i
if (debug_level >= psb_debug_serial_)&
& write(debug_unit,*) trim(name),': end second loop'
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_fix_coo_inner_colmajor
subroutine psb_c_cp_coo_to_lcoo(a,b,info) subroutine psb_c_cp_coo_to_lcoo(a,b,info)
use psb_error_mod use psb_error_mod
@ -8347,3 +7597,4 @@ subroutine psb_lc_cp_coo_from_icoo(a,b,info)
return return
end subroutine psb_lc_cp_coo_from_icoo end subroutine psb_lc_cp_coo_from_icoo

@ -3573,7 +3573,7 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
character(len=20) :: name = 'psb_fixcoo' character(len=20) :: name = 'psb_fixcoo'
logical :: srt_inp, use_buffers logical :: srt_inp, use_buffers
#if defined(OPENMP) #if defined(OPENMP)
integer(psb_ipk_) :: work,first_idx,last_idx,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread
integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads
integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:)
#endif #endif
@ -3632,8 +3632,6 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
! Dirty trick: call ROWMAJOR with rows <-> columns ! Dirty trick: call ROWMAJOR with rows <-> columns
call psb_d_fix_coo_inner_rowmajor(nc,nr,nzin,dupl,& call psb_d_fix_coo_inner_rowmajor(nc,nr,nzin,dupl,&
& ja,ia,val,iaux,nzout,info) & ja,ia,val,iaux,nzout,info)
!!$ call psb_d_fix_coo_inner_colmajor(nr,nc,nzin,dupl,&
!!$ & ia,ja,val,iaux,nzout,info)
case default case default
write(debug_unit,*) trim(name),': unknown direction ',idir_ write(debug_unit,*) trim(name),': unknown direction ',idir_
info = psb_err_internal_error_ info = psb_err_internal_error_
@ -3677,7 +3675,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
character(len=20) :: name = 'psb_fixcoo' character(len=20) :: name = 'psb_fixcoo'
logical :: srt_inp, use_buffers logical :: srt_inp, use_buffers
#if defined(OPENMP) #if defined(OPENMP)
integer(psb_ipk_) :: work,first_idx,last_idx,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread
integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads
integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:)
#endif #endif
@ -3760,8 +3758,8 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
! index for each row. We do the same on 'kaux' ! index for each row. We do the same on 'kaux'
!$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!$OMP shared(idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) & !$OMP shared(idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) &
!$OMP private(s,i,j,k,ithread,first_idx,last_idx,work,nxt_val,old_val,saved_elem, & !$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, &
!$OMP first_elem,last_elem,nzl,iret,act_row) !$OMP first_elem,last_elem,nzl,iret,act_row) reduction(max: info)
!$OMP SINGLE !$OMP SINGLE
nthreads = omp_get_num_threads() nthreads = omp_get_num_threads()
@ -3774,72 +3772,32 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
work = nr/nthreads work = nr/nthreads
if (ithread < MOD(nr,nthreads)) then if (ithread < MOD(nr,nthreads)) then
work = work + 1 work = work + 1
first_idx = ithread*work + 1 idxstart = ithread*work + 1
else else
first_idx = ithread*work + MOD(nr,nthreads) + 1 idxstart = ithread*work + MOD(nr,nthreads) + 1
end if end if
last_idx = first_idx + work - 1 idxend = idxstart + work - 1
!-------------------------------------------
! ------------ iaux composition --------------
! 'iaux' will have the start index for each row
s = 0
do i=first_idx,last_idx
s = s + iaux(i)
end do
sum(ithread+2) = s
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
sum(i) = sum(i) + sum(i-1)
end do
!$OMP END SINGLE
if (work > 0) then
saved_elem = iaux(first_idx)
end if
if (ithread == 0) then
iaux(1) = 1
end if
!$OMP BARRIER
if (work > 0) then
old_val = iaux(first_idx+1)
iaux(first_idx+1) = saved_elem + sum(ithread+1)
end if
do i=first_idx+2,last_idx+1
nxt_val = iaux(i)
iaux(i) = iaux(i-1) + old_val
old_val = nxt_val
end do
! --------------------------------------
!write(0,*) 'fix_coo_inner: trying with exscan'
call psi_exscan(nr+1,iaux,info,shift=ione)
!$OMP BARRIER !$OMP BARRIER
! ------------------ Sorting and buffers ------------------- ! ------------------ Sorting and buffers -------------------
! Let's use an auxiliary buffer, 'idxaux', to get indices leaving ! Let's use an auxiliary buffer, 'idxaux', to get indices leaving
! unmodified 'iaux' ! unmodified 'iaux'
do j=first_idx,last_idx do j=idxstart,idxend
idxaux(j) = iaux(j) idxaux(j) = iaux(j)
end do end do
! Here we sort data inside the auxiliary buffers ! Here we sort data inside the auxiliary buffers
do i=1,nzin do i=1,nzin
act_row = ia(i) act_row = ia(i)
if ((act_row >= first_idx) .and. (act_row <= last_idx)) then if ((act_row >= idxstart) .and. (act_row <= idxend)) then
ias(idxaux(act_row)) = ia(i) ias(idxaux(act_row)) = ia(i)
jas(idxaux(act_row)) = ja(i) jas(idxaux(act_row)) = ja(i)
vs(idxaux(act_row)) = val(i) vs(idxaux(act_row)) = val(i)
idxaux(act_row) = idxaux(act_row) + 1 idxaux(act_row) = idxaux(act_row) + 1
end if end if
end do end do
@ -3848,7 +3806,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
! Let's sort column indices and values. After that we will store ! Let's sort column indices and values. After that we will store
! the number of unique values in 'kaux' ! the number of unique values in 'kaux'
do j=first_idx,last_idx do j=idxstart,idxend
first_elem = iaux(j) first_elem = iaux(j)
last_elem = iaux(j+1) - 1 last_elem = iaux(j+1) - 1
nzl = last_elem - first_elem + 1 nzl = last_elem - first_elem + 1
@ -3877,45 +3835,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
! -------------------------------------------------- ! --------------------------------------------------
! ---------------- kaux composition ---------------- ! ---------------- kaux composition ----------------
!$OMP SINGLE call psi_exscan(nr+1,kaux,i,shift=ione)
sum(:) = 0
sum(1) = 1
!$OMP END SINGLE
s = 0
do i=first_idx,last_idx
s = s + kaux(i)
end do
sum(ithread+2) = s
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
sum(i) = sum(i) + sum(i-1)
end do
kaux(nr+1) = sum(nthreads+1)
!$OMP END SINGLE
if (work > 0) then
saved_elem = kaux(first_idx)
end if
if (ithread == 0) then
kaux(1) = 1
end if
!$OMP BARRIER
if (work > 0) then
old_val = kaux(first_idx+1)
kaux(first_idx+1) = saved_elem + sum(ithread+1)
end if
do i=first_idx+2,last_idx+1
nxt_val = kaux(i)
kaux(i) = kaux(i-1) + old_val
old_val = nxt_val
end do
!$OMP BARRIER !$OMP BARRIER
@ -3923,7 +3843,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
select case(dupl) select case(dupl)
case(psb_dupl_ovwrt_) case(psb_dupl_ovwrt_)
!$OMP DO schedule(STATIC) !$OMP DO schedule(dynamic,32)
do j=1,nr do j=1,nr
first_elem = iaux(j) first_elem = iaux(j)
last_elem = iaux(j+1) - 1 last_elem = iaux(j+1) - 1
@ -3952,7 +3872,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!$OMP END DO !$OMP END DO
case(psb_dupl_add_) case(psb_dupl_add_)
!$OMP DO schedule(STATIC) !$OMP DO schedule(dynamic,32)
do j=1,nr do j=1,nr
first_elem = iaux(j) first_elem = iaux(j)
last_elem = iaux(j+1) - 1 last_elem = iaux(j+1) - 1
@ -3981,7 +3901,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!$OMP END DO !$OMP END DO
case(psb_dupl_err_) case(psb_dupl_err_)
!$OMP DO schedule(STATIC) !$OMP DO schedule(dynamic,32)
do j=1,nr do j=1,nr
first_elem = iaux(j) first_elem = iaux(j)
last_elem = iaux(j+1) - 1 last_elem = iaux(j+1) - 1
@ -4186,7 +4106,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
#if defined(OPENMP) #if defined(OPENMP)
!$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) & !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) &
!$OMP private(i,j,first_idx,last_idx,nzl,act_row,iret,ithread, & !$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, &
!$OMP work,first_elem,last_elem) !$OMP work,first_elem,last_elem)
!$OMP SINGLE !$OMP SINGLE
@ -4200,22 +4120,22 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
work = nr/nthreads work = nr/nthreads
if (ithread < MOD(nr,nthreads)) then if (ithread < MOD(nr,nthreads)) then
work = work + 1 work = work + 1
first_idx = ithread*work + 1 idxstart = ithread*work + 1
else else
first_idx = ithread*work + MOD(nr,nthreads) + 1 idxstart = ithread*work + MOD(nr,nthreads) + 1
end if end if
last_idx = first_idx + work - 1 idxend = idxstart + work - 1
! --------------------------------------------------- ! ---------------------------------------------------
first_elem = 0 first_elem = 0
last_elem = -1 last_elem = -1
act_row = first_idx act_row = idxstart
do j=1,nzin do j=1,nzin
if (ia(j) < act_row) then if (ia(j) < act_row) then
cycle cycle
else if ((ia(j) > last_idx) .or. (work < 1)) then else if ((ia(j) > idxend) .or. (work < 1)) then
exit exit
else if (ia(j) > act_row) then else if (ia(j) > act_row) then
nzl = last_elem - first_elem + 1 nzl = last_elem - first_elem + 1
@ -4345,676 +4265,6 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
end subroutine psb_d_fix_coo_inner_rowmajor end subroutine psb_d_fix_coo_inner_rowmajor
subroutine psb_d_fix_coo_inner_colmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,info)
use psb_const_mod
use psb_error_mod
use psb_d_base_mat_mod, psb_protect_name => psb_d_fix_coo_inner_colmajor
use psb_string_mod
use psb_ip_reord_mod
use psb_sort_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none
integer(psb_ipk_), intent(in) :: nr, nc, nzin, dupl
integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:)
real(psb_dpk_), intent(inout) :: val(:)
integer(psb_ipk_), intent(out) :: nzout, info
!locals
integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:)
real(psb_dpk_), allocatable :: vs(:)
integer(psb_ipk_) :: nza, nzl,iret
integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name = 'psb_fixcoo'
logical :: srt_inp, use_buffers
#if defined(OPENMP)
integer(psb_ipk_) :: work,first_idx,last_idx,first_elem,last_elem,s,nthreads,ithread
integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads
integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:)
#endif
info = psb_success_
call psb_erractionsave(err_act)
debug_unit = psb_get_debug_unit()
debug_level = psb_get_debug_level()
if (nc <= nzin) then
! Avoid strange situations with large indices
#if defined(OPENMP)
allocate(ias(nzin),jas(nzin),vs(nzin), stat=info)
#else
allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info)
#endif
use_buffers = (info == 0)
else
use_buffers = .false.
end if
if (use_buffers) then
iaux(:) = 0
#if defined(OPENMP)
!$OMP PARALLEL DO default(none) schedule(STATIC) &
!$OMP shared(nzin,ja,nc,iaux) &
!$OMP private(i) &
!$OMP reduction(.and.:use_buffers)
do i=1,nzin
if ((ja(i) < 1).or.(ja(i) > nc)) then
use_buffers = .false.
! Invalid indices are placed outside the considered range
ja(i) = nc+2
else
!$OMP ATOMIC UPDATE
iaux(ja(i)) = iaux(ja(i)) + 1
end if
end do
!$OMP END PARALLEL DO
#else
!srt_inp = .true.
do i=1,nzin
if ((ja(i) < 1).or.(ja(i) > nc)) then
use_buffers = .false.
!ja(i) = nc+2
!srt_inp = .false.
exit
end if
iaux(ja(i)) = iaux(ja(i)) + 1
!srt_inp = srt_inp .and.(ja(i-1)<=ja(i))
end do
#endif
end if
!use_buffers=use_buffers.and.srt_inp
! Check again use_buffers. We enter here if nzin >= nc and
! all the indices are valid
if (use_buffers) then
#if defined(OPENMP)
allocate(kaux(MAX(nzin,nc)+2),idxaux(MAX((nr+2)*maxthreads,nc)),sum(maxthreads+1),stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
kaux(:) = 0
sum(:) = 0
sum(1) = 1
err = 0
! Here, starting from 'iaux', we apply a fixing in order to obtain the starting
! index for each column. We do the same on 'kaux'
!$OMP PARALLEL default(none) &
!$OMP shared(idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) &
!$OMP private(s,i,j,k,ithread,first_idx,last_idx,work,nxt_val,old_val,saved_elem, &
!$OMP first_elem,last_elem,nzl,iret,act_col)
!$OMP SINGLE
nthreads = omp_get_num_threads()
!$OMP END SINGLE
ithread = omp_get_thread_num()
! -------- thread-specific workload --------
work = nc/nthreads
if (ithread < MOD(nc,nthreads)) then
work = work + 1
first_idx = ithread*work + 1
else
first_idx = ithread*work + MOD(nc,nthreads) + 1
end if
last_idx = first_idx + work - 1
!-------------------------------------------
! ---------- iaux composition --------------
s = 0
do i=first_idx,last_idx
s = s + iaux(i)
end do
sum(ithread+2) = s
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
sum(i) = sum(i) + sum(i-1)
end do
!$OMP END SINGLE
if (work > 0) then
saved_elem = iaux(first_idx)
end if
if (ithread == 0) then
iaux(1) = 1
end if
!$OMP BARRIER
if (work > 0) then
old_val = iaux(first_idx+1)
iaux(first_idx+1) = saved_elem + sum(ithread+1)
end if
do i=first_idx+2,last_idx+1
nxt_val = iaux(i)
iaux(i) = iaux(i-1) + old_val
old_val = nxt_val
end do
! --------------------------------------
!$OMP BARRIER
! ------------------ Sorting and buffers -------------------
! Let's use an auxiliary buffer to get indices
do j=first_idx,last_idx
idxaux(j) = iaux(j)
end do
! Here we sort data inside the auxiliary buffers
do i=1,nzin
act_col = ja(i)
if (act_col >= first_idx .and. act_col <= last_idx) then
ias(idxaux(act_col)) = ia(i)
jas(idxaux(act_col)) = ja(i)
vs(idxaux(act_col)) = val(i)
idxaux(act_col) = idxaux(act_col) + 1
end if
end do
!$OMP BARRIER
! Let's sort column indices and values. After that we will store
! the number of unique values in 'kaux'
do j=first_idx,last_idx
first_elem = iaux(j)
last_elem = iaux(j+1) - 1
nzl = last_elem - first_elem + 1
! The column has elements?
if (nzl > 0) then
call psi_msort_up(nzl,ias(first_elem:last_elem), &
& idxaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2),iret)
if (iret == 0) then
call psb_ip_reord(nzl,vs(first_elem:last_elem),&
& ias(first_elem:last_elem),jas(first_elem:last_elem), &
& idxaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2))
end if
! Over each column we count the unique values
kaux(j) = 1
do i=first_elem+1,last_elem
if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then
cycle
end if
kaux(j) = kaux(j) + 1
end do
end if
end do
! --------------------------------------------------
! ---------------- kaux composition ----------------
!$OMP SINGLE
sum(:) = 0
sum(1) = 1
!$OMP END SINGLE
s = 0
do i=first_idx,last_idx
s = s + kaux(i)
end do
sum(ithread+2) = s
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
sum(i) = sum(i) + sum(i-1)
end do
!$OMP END SINGLE
if (work > 0) then
saved_elem = kaux(first_idx)
end if
if (ithread == 0) then
kaux(1) = 1
end if
!$OMP BARRIER
if (work > 0) then
old_val = kaux(first_idx+1)
kaux(first_idx+1) = saved_elem + sum(ithread+1)
end if
do i=first_idx+2,last_idx+1
nxt_val = kaux(i)
kaux(i) = kaux(i-1) + old_val
old_val = nxt_val
end do
!$OMP BARRIER
! ------------------------------------------------
select case(dupl)
case(psb_dupl_ovwrt_)
!$OMP DO schedule(STATIC)
do j=1,nc
first_elem = iaux(j)
last_elem = iaux(j+1) - 1
if (first_elem > last_elem) then
cycle
end if
k = kaux(j)
val(k) = vs(first_elem)
ia(k) = ias(first_elem)
ja(k) = jas(first_elem)
do i=first_elem+1,last_elem
if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then
val(k) = vs(i)
else
k = k + 1
val(k) = vs(i)
ia(k) = ias(i)
ja(k) = jas(i)
endif
end do
end do
!$OMP END DO
case(psb_dupl_add_)
!$OMP DO schedule(STATIC)
do j=1,nc
first_elem = iaux(j)
last_elem = iaux(j+1) - 1
if (first_elem > last_elem) then
cycle
end if
k = kaux(j)
val(k) = vs(first_elem)
ia(k) = ias(first_elem)
ja(k) = jas(first_elem)
do i=first_elem+1,last_elem
if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then
val(k) = val(k) + vs(i)
else
k = k + 1
val(k) = vs(i)
ia(k) = ias(i)
ja(k) = jas(i)
endif
end do
end do
!$OMP END DO
case(psb_dupl_err_)
!$OMP DO schedule(STATIC)
do j=1,nc
first_elem = iaux(j)
last_elem = iaux(j+1) - 1
if (first_elem > last_elem) then
cycle
end if
k = kaux(j)
val(k) = vs(first_elem)
ia(k) = ias(first_elem)
ja(k) = jas(first_elem)
do i=first_elem+1,last_elem
if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then
err = 1
else
k = k + 1
val(k) = vs(i)
ia(k) = ias(i)
ja(k) = jas(i)
end if
end do
end do
!$OMP END DO
case default
!$OMP SINGLE
err = 2
!$OMP END SINGLE
end select
!$OMP END PARALLEL
if (err == 1) then
call psb_errpush(psb_err_duplicate_coo,name)
goto 9999
else if (err == 2) then
write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl
info =-7
return
end if
nzout = kaux(nc+1) - 1
deallocate(sum,kaux,idxaux,stat=info)
#else
!if (.not.srt_inp) then
ip = iaux(1)
iaux(1) = 0
do i=2, nc
is = iaux(i)
iaux(i) = ip
ip = ip + is
end do
iaux(nc+1) = ip
do i=1,nzin
icl = ja(i)
ip = iaux(icl) + 1
ias(ip) = ia(i)
jas(ip) = ja(i)
vs(ip) = val(i)
iaux(icl) = ip
end do
!end if
select case(dupl)
case(psb_dupl_ovwrt_)
k = 0
i = 1
do j=1, nc
nzl = iaux(j)-i+1
imx = i+nzl-1
if (nzl > 0) then
call psi_msort_up(nzl,ias(i:imx),ix2,iret)
if (iret == 0) &
& call psb_ip_reord(nzl,vs(i:imx),&
& ias(i:imx),jas(i:imx),ix2)
k = k + 1
ia(k) = ias(i)
ja(k) = jas(i)
val(k) = vs(i)
irw = ia(k)
icl = ja(k)
do
i = i + 1
if (i > imx) exit
if ((ias(i) == irw).and.(jas(i) == icl)) then
val(k) = vs(i)
else
k = k+1
val(k) = vs(i)
ia(k) = ias(i)
ja(k) = jas(i)
irw = ia(k)
icl = ja(k)
endif
enddo
end if
end do
case(psb_dupl_add_)
k = 0
i = 1
do j=1, nc
nzl = iaux(j)-i+1
imx = i+nzl-1
if (nzl > 0) then
call psi_msort_up(nzl,ias(i:imx),ix2,iret)
if (iret == 0) &
& call psb_ip_reord(nzl,vs(i:imx),&
& ias(i:imx),jas(i:imx),ix2)
k = k + 1
ia(k) = ias(i)
ja(k) = jas(i)
val(k) = vs(i)
irw = ia(k)
icl = ja(k)
do
i = i + 1
if (i > imx) exit
if ((ias(i) == irw).and.(jas(i) == icl)) then
val(k) = val(k) + vs(i)
else
k = k+1
val(k) = vs(i)
ia(k) = ias(i)
ja(k) = jas(i)
irw = ia(k)
icl = ja(k)
endif
enddo
end if
end do
case(psb_dupl_err_)
k = 0
i = 1
do j=1, nc
nzl = iaux(j)-i+1
imx = i+nzl-1
if (nzl > 0) then
call psi_msort_up(nzl,ias(i:imx),ix2,iret)
if (iret == 0) &
& call psb_ip_reord(nzl,vs(i:imx),&
& ias(i:imx),jas(i:imx),ix2)
k = k + 1
ia(k) = ias(i)
ja(k) = jas(i)
val(k) = vs(i)
irw = ia(k)
icl = ja(k)
do
i = i + 1
if (i > imx) exit
if ((ias(i) == irw).and.(jas(i) == icl)) then
call psb_errpush(psb_err_duplicate_coo,name)
goto 9999
else
k = k+1
val(k) = vs(i)
ia(k) = ias(i)
ja(k) = jas(i)
irw = ia(k)
icl = ja(k)
endif
enddo
end if
end do
case default
write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl
info =-7
return
end select
nzout = k
deallocate(ix2, stat=info)
#endif
deallocate(ias,jas,vs, stat=info)
else if (.not.use_buffers) then
call psi_msort_up(nzin,ja(1:),iaux(1:),iret)
if (iret == 0) &
& call psb_ip_reord(nzin,val,ia,ja,iaux)
#if defined(OPENMP)
!$OMP PARALLEL default(none) &
!$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) &
!$OMP private(i,j,first_idx,last_idx,nzl,act_col, &
!$OMP iret,ithread,work,first_elem,last_elem)
!$OMP SINGLE
nthreads = omp_get_num_threads()
!$OMP END SINGLE
ithread = omp_get_thread_num()
! -------- thread-specific workload --------
work = nc/nthreads
if (ithread < MOD(nc,nthreads)) then
work = work + 1
first_idx = ithread*work + 1
else
first_idx = ithread*work + MOD(nc,nthreads) + 1
end if
last_idx = first_idx + work - 1
! ---------------------------------------------------
first_elem = 0
last_elem = -1
act_col = first_idx
do j=1,nzin
if (ja(j) < act_col) then
cycle
else if ((ja(j) > last_idx) .or. (work < 1)) then
exit
else if (ja(j) > act_col) then
nzl = last_elem - first_elem + 1
if (nzl > 0) then
call psi_msort_up(nzl,ia(first_elem:),iaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2),iret)
if (iret == 0) &
& call psb_ip_reord(nzl,val(first_elem:last_elem),&
& ia(first_elem:last_elem),ja(first_elem:last_elem),&
& iaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2))
end if
act_col = act_col + 1
first_elem = 0
last_elem = -1
else
if (first_elem == 0) then
first_elem = j
end if
last_elem = j
end if
end do
!$OMP END PARALLEL
#else
i = 1
j = i
do while (i <= nzin)
do while ((ja(j) == ja(i)))
j = j+1
if (j > nzin) exit
enddo
nzl = j - i
call psi_msort_up(nzl,ia(i:),iaux(1:),iret)
if (iret == 0) &
& call psb_ip_reord(nzl,val(i:i+nzl-1),&
& ia(i:i+nzl-1),ja(i:i+nzl-1),iaux)
i = j
enddo
#endif
i = 1
irw = ia(i)
icl = ja(i)
j = 1
select case(dupl)
case(psb_dupl_ovwrt_)
do
j = j + 1
if (j > nzin) exit
if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle
if ((ia(j) == irw).and.(ja(j) == icl)) then
val(i) = val(j)
else
i = i+1
val(i) = val(j)
ia(i) = ia(j)
ja(i) = ja(j)
irw = ia(i)
icl = ja(i)
endif
enddo
case(psb_dupl_add_)
do
j = j + 1
if (j > nzin) exit
if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle
if ((ia(j) == irw).and.(ja(j) == icl)) then
val(i) = val(i) + val(j)
else
i = i+1
val(i) = val(j)
ia(i) = ia(j)
ja(i) = ja(j)
irw = ia(i)
icl = ja(i)
endif
enddo
case(psb_dupl_err_)
do
j = j + 1
if (j > nzin) exit
if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle
if ((ia(j) == irw).and.(ja(j) == icl)) then
call psb_errpush(psb_err_duplicate_coo,name)
goto 9999
else
i = i+1
val(i) = val(j)
ia(i) = ia(j)
ja(i) = ja(j)
irw = ia(i)
icl = ja(i)
endif
enddo
case default
write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl
info =-7
end select
nzout = i
if (debug_level >= psb_debug_serial_)&
& write(debug_unit,*) trim(name),': end second loop'
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_d_fix_coo_inner_colmajor
subroutine psb_d_cp_coo_to_lcoo(a,b,info) subroutine psb_d_cp_coo_to_lcoo(a,b,info)
use psb_error_mod use psb_error_mod
@ -8347,3 +7597,4 @@ subroutine psb_ld_cp_coo_from_icoo(a,b,info)
return return
end subroutine psb_ld_cp_coo_from_icoo end subroutine psb_ld_cp_coo_from_icoo

@ -3573,7 +3573,7 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
character(len=20) :: name = 'psb_fixcoo' character(len=20) :: name = 'psb_fixcoo'
logical :: srt_inp, use_buffers logical :: srt_inp, use_buffers
#if defined(OPENMP) #if defined(OPENMP)
integer(psb_ipk_) :: work,first_idx,last_idx,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread
integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads
integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:)
#endif #endif
@ -3632,8 +3632,6 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
! Dirty trick: call ROWMAJOR with rows <-> columns ! Dirty trick: call ROWMAJOR with rows <-> columns
call psb_s_fix_coo_inner_rowmajor(nc,nr,nzin,dupl,& call psb_s_fix_coo_inner_rowmajor(nc,nr,nzin,dupl,&
& ja,ia,val,iaux,nzout,info) & ja,ia,val,iaux,nzout,info)
!!$ call psb_s_fix_coo_inner_colmajor(nr,nc,nzin,dupl,&
!!$ & ia,ja,val,iaux,nzout,info)
case default case default
write(debug_unit,*) trim(name),': unknown direction ',idir_ write(debug_unit,*) trim(name),': unknown direction ',idir_
info = psb_err_internal_error_ info = psb_err_internal_error_
@ -3677,7 +3675,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
character(len=20) :: name = 'psb_fixcoo' character(len=20) :: name = 'psb_fixcoo'
logical :: srt_inp, use_buffers logical :: srt_inp, use_buffers
#if defined(OPENMP) #if defined(OPENMP)
integer(psb_ipk_) :: work,first_idx,last_idx,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread
integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads
integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:)
#endif #endif
@ -3760,8 +3758,8 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
! index for each row. We do the same on 'kaux' ! index for each row. We do the same on 'kaux'
!$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!$OMP shared(idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) & !$OMP shared(idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) &
!$OMP private(s,i,j,k,ithread,first_idx,last_idx,work,nxt_val,old_val,saved_elem, & !$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, &
!$OMP first_elem,last_elem,nzl,iret,act_row) !$OMP first_elem,last_elem,nzl,iret,act_row) reduction(max: info)
!$OMP SINGLE !$OMP SINGLE
nthreads = omp_get_num_threads() nthreads = omp_get_num_threads()
@ -3774,72 +3772,32 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
work = nr/nthreads work = nr/nthreads
if (ithread < MOD(nr,nthreads)) then if (ithread < MOD(nr,nthreads)) then
work = work + 1 work = work + 1
first_idx = ithread*work + 1 idxstart = ithread*work + 1
else else
first_idx = ithread*work + MOD(nr,nthreads) + 1 idxstart = ithread*work + MOD(nr,nthreads) + 1
end if end if
last_idx = first_idx + work - 1 idxend = idxstart + work - 1
!-------------------------------------------
! ------------ iaux composition --------------
! 'iaux' will have the start index for each row
s = 0
do i=first_idx,last_idx
s = s + iaux(i)
end do
sum(ithread+2) = s
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
sum(i) = sum(i) + sum(i-1)
end do
!$OMP END SINGLE
if (work > 0) then
saved_elem = iaux(first_idx)
end if
if (ithread == 0) then
iaux(1) = 1
end if
!$OMP BARRIER
if (work > 0) then
old_val = iaux(first_idx+1)
iaux(first_idx+1) = saved_elem + sum(ithread+1)
end if
do i=first_idx+2,last_idx+1
nxt_val = iaux(i)
iaux(i) = iaux(i-1) + old_val
old_val = nxt_val
end do
! --------------------------------------
!write(0,*) 'fix_coo_inner: trying with exscan'
call psi_exscan(nr+1,iaux,info,shift=ione)
!$OMP BARRIER !$OMP BARRIER
! ------------------ Sorting and buffers ------------------- ! ------------------ Sorting and buffers -------------------
! Let's use an auxiliary buffer, 'idxaux', to get indices leaving ! Let's use an auxiliary buffer, 'idxaux', to get indices leaving
! unmodified 'iaux' ! unmodified 'iaux'
do j=first_idx,last_idx do j=idxstart,idxend
idxaux(j) = iaux(j) idxaux(j) = iaux(j)
end do end do
! Here we sort data inside the auxiliary buffers ! Here we sort data inside the auxiliary buffers
do i=1,nzin do i=1,nzin
act_row = ia(i) act_row = ia(i)
if ((act_row >= first_idx) .and. (act_row <= last_idx)) then if ((act_row >= idxstart) .and. (act_row <= idxend)) then
ias(idxaux(act_row)) = ia(i) ias(idxaux(act_row)) = ia(i)
jas(idxaux(act_row)) = ja(i) jas(idxaux(act_row)) = ja(i)
vs(idxaux(act_row)) = val(i) vs(idxaux(act_row)) = val(i)
idxaux(act_row) = idxaux(act_row) + 1 idxaux(act_row) = idxaux(act_row) + 1
end if end if
end do end do
@ -3848,7 +3806,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
! Let's sort column indices and values. After that we will store ! Let's sort column indices and values. After that we will store
! the number of unique values in 'kaux' ! the number of unique values in 'kaux'
do j=first_idx,last_idx do j=idxstart,idxend
first_elem = iaux(j) first_elem = iaux(j)
last_elem = iaux(j+1) - 1 last_elem = iaux(j+1) - 1
nzl = last_elem - first_elem + 1 nzl = last_elem - first_elem + 1
@ -3877,45 +3835,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
! -------------------------------------------------- ! --------------------------------------------------
! ---------------- kaux composition ---------------- ! ---------------- kaux composition ----------------
!$OMP SINGLE call psi_exscan(nr+1,kaux,i,shift=ione)
sum(:) = 0
sum(1) = 1
!$OMP END SINGLE
s = 0
do i=first_idx,last_idx
s = s + kaux(i)
end do
sum(ithread+2) = s
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
sum(i) = sum(i) + sum(i-1)
end do
kaux(nr+1) = sum(nthreads+1)
!$OMP END SINGLE
if (work > 0) then
saved_elem = kaux(first_idx)
end if
if (ithread == 0) then
kaux(1) = 1
end if
!$OMP BARRIER
if (work > 0) then
old_val = kaux(first_idx+1)
kaux(first_idx+1) = saved_elem + sum(ithread+1)
end if
do i=first_idx+2,last_idx+1
nxt_val = kaux(i)
kaux(i) = kaux(i-1) + old_val
old_val = nxt_val
end do
!$OMP BARRIER !$OMP BARRIER
@ -3923,7 +3843,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
select case(dupl) select case(dupl)
case(psb_dupl_ovwrt_) case(psb_dupl_ovwrt_)
!$OMP DO schedule(STATIC) !$OMP DO schedule(dynamic,32)
do j=1,nr do j=1,nr
first_elem = iaux(j) first_elem = iaux(j)
last_elem = iaux(j+1) - 1 last_elem = iaux(j+1) - 1
@ -3952,7 +3872,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!$OMP END DO !$OMP END DO
case(psb_dupl_add_) case(psb_dupl_add_)
!$OMP DO schedule(STATIC) !$OMP DO schedule(dynamic,32)
do j=1,nr do j=1,nr
first_elem = iaux(j) first_elem = iaux(j)
last_elem = iaux(j+1) - 1 last_elem = iaux(j+1) - 1
@ -3981,7 +3901,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!$OMP END DO !$OMP END DO
case(psb_dupl_err_) case(psb_dupl_err_)
!$OMP DO schedule(STATIC) !$OMP DO schedule(dynamic,32)
do j=1,nr do j=1,nr
first_elem = iaux(j) first_elem = iaux(j)
last_elem = iaux(j+1) - 1 last_elem = iaux(j+1) - 1
@ -4186,7 +4106,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
#if defined(OPENMP) #if defined(OPENMP)
!$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) & !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) &
!$OMP private(i,j,first_idx,last_idx,nzl,act_row,iret,ithread, & !$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, &
!$OMP work,first_elem,last_elem) !$OMP work,first_elem,last_elem)
!$OMP SINGLE !$OMP SINGLE
@ -4200,22 +4120,22 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
work = nr/nthreads work = nr/nthreads
if (ithread < MOD(nr,nthreads)) then if (ithread < MOD(nr,nthreads)) then
work = work + 1 work = work + 1
first_idx = ithread*work + 1 idxstart = ithread*work + 1
else else
first_idx = ithread*work + MOD(nr,nthreads) + 1 idxstart = ithread*work + MOD(nr,nthreads) + 1
end if end if
last_idx = first_idx + work - 1 idxend = idxstart + work - 1
! --------------------------------------------------- ! ---------------------------------------------------
first_elem = 0 first_elem = 0
last_elem = -1 last_elem = -1
act_row = first_idx act_row = idxstart
do j=1,nzin do j=1,nzin
if (ia(j) < act_row) then if (ia(j) < act_row) then
cycle cycle
else if ((ia(j) > last_idx) .or. (work < 1)) then else if ((ia(j) > idxend) .or. (work < 1)) then
exit exit
else if (ia(j) > act_row) then else if (ia(j) > act_row) then
nzl = last_elem - first_elem + 1 nzl = last_elem - first_elem + 1
@ -4345,676 +4265,6 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
end subroutine psb_s_fix_coo_inner_rowmajor end subroutine psb_s_fix_coo_inner_rowmajor
subroutine psb_s_fix_coo_inner_colmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,info)
use psb_const_mod
use psb_error_mod
use psb_s_base_mat_mod, psb_protect_name => psb_s_fix_coo_inner_colmajor
use psb_string_mod
use psb_ip_reord_mod
use psb_sort_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none
integer(psb_ipk_), intent(in) :: nr, nc, nzin, dupl
integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:)
real(psb_spk_), intent(inout) :: val(:)
integer(psb_ipk_), intent(out) :: nzout, info
!locals
integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:)
real(psb_spk_), allocatable :: vs(:)
integer(psb_ipk_) :: nza, nzl,iret
integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name = 'psb_fixcoo'
logical :: srt_inp, use_buffers
#if defined(OPENMP)
integer(psb_ipk_) :: work,first_idx,last_idx,first_elem,last_elem,s,nthreads,ithread
integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads
integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:)
#endif
info = psb_success_
call psb_erractionsave(err_act)
debug_unit = psb_get_debug_unit()
debug_level = psb_get_debug_level()
if (nc <= nzin) then
! Avoid strange situations with large indices
#if defined(OPENMP)
allocate(ias(nzin),jas(nzin),vs(nzin), stat=info)
#else
allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info)
#endif
use_buffers = (info == 0)
else
use_buffers = .false.
end if
if (use_buffers) then
iaux(:) = 0
#if defined(OPENMP)
!$OMP PARALLEL DO default(none) schedule(STATIC) &
!$OMP shared(nzin,ja,nc,iaux) &
!$OMP private(i) &
!$OMP reduction(.and.:use_buffers)
do i=1,nzin
if ((ja(i) < 1).or.(ja(i) > nc)) then
use_buffers = .false.
! Invalid indices are placed outside the considered range
ja(i) = nc+2
else
!$OMP ATOMIC UPDATE
iaux(ja(i)) = iaux(ja(i)) + 1
end if
end do
!$OMP END PARALLEL DO
#else
!srt_inp = .true.
do i=1,nzin
if ((ja(i) < 1).or.(ja(i) > nc)) then
use_buffers = .false.
!ja(i) = nc+2
!srt_inp = .false.
exit
end if
iaux(ja(i)) = iaux(ja(i)) + 1
!srt_inp = srt_inp .and.(ja(i-1)<=ja(i))
end do
#endif
end if
!use_buffers=use_buffers.and.srt_inp
! Check again use_buffers. We enter here if nzin >= nc and
! all the indices are valid
if (use_buffers) then
#if defined(OPENMP)
allocate(kaux(MAX(nzin,nc)+2),idxaux(MAX((nr+2)*maxthreads,nc)),sum(maxthreads+1),stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
kaux(:) = 0
sum(:) = 0
sum(1) = 1
err = 0
! Here, starting from 'iaux', we apply a fixing in order to obtain the starting
! index for each column. We do the same on 'kaux'
!$OMP PARALLEL default(none) &
!$OMP shared(idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) &
!$OMP private(s,i,j,k,ithread,first_idx,last_idx,work,nxt_val,old_val,saved_elem, &
!$OMP first_elem,last_elem,nzl,iret,act_col)
!$OMP SINGLE
nthreads = omp_get_num_threads()
!$OMP END SINGLE
ithread = omp_get_thread_num()
! -------- thread-specific workload --------
work = nc/nthreads
if (ithread < MOD(nc,nthreads)) then
work = work + 1
first_idx = ithread*work + 1
else
first_idx = ithread*work + MOD(nc,nthreads) + 1
end if
last_idx = first_idx + work - 1
!-------------------------------------------
! ---------- iaux composition --------------
s = 0
do i=first_idx,last_idx
s = s + iaux(i)
end do
sum(ithread+2) = s
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
sum(i) = sum(i) + sum(i-1)
end do
!$OMP END SINGLE
if (work > 0) then
saved_elem = iaux(first_idx)
end if
if (ithread == 0) then
iaux(1) = 1
end if
!$OMP BARRIER
if (work > 0) then
old_val = iaux(first_idx+1)
iaux(first_idx+1) = saved_elem + sum(ithread+1)
end if
do i=first_idx+2,last_idx+1
nxt_val = iaux(i)
iaux(i) = iaux(i-1) + old_val
old_val = nxt_val
end do
! --------------------------------------
!$OMP BARRIER
! ------------------ Sorting and buffers -------------------
! Let's use an auxiliary buffer to get indices
do j=first_idx,last_idx
idxaux(j) = iaux(j)
end do
! Here we sort data inside the auxiliary buffers
do i=1,nzin
act_col = ja(i)
if (act_col >= first_idx .and. act_col <= last_idx) then
ias(idxaux(act_col)) = ia(i)
jas(idxaux(act_col)) = ja(i)
vs(idxaux(act_col)) = val(i)
idxaux(act_col) = idxaux(act_col) + 1
end if
end do
!$OMP BARRIER
! Let's sort column indices and values. After that we will store
! the number of unique values in 'kaux'
do j=first_idx,last_idx
first_elem = iaux(j)
last_elem = iaux(j+1) - 1
nzl = last_elem - first_elem + 1
! The column has elements?
if (nzl > 0) then
call psi_msort_up(nzl,ias(first_elem:last_elem), &
& idxaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2),iret)
if (iret == 0) then
call psb_ip_reord(nzl,vs(first_elem:last_elem),&
& ias(first_elem:last_elem),jas(first_elem:last_elem), &
& idxaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2))
end if
! Over each column we count the unique values
kaux(j) = 1
do i=first_elem+1,last_elem
if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then
cycle
end if
kaux(j) = kaux(j) + 1
end do
end if
end do
! --------------------------------------------------
! ---------------- kaux composition ----------------
!$OMP SINGLE
sum(:) = 0
sum(1) = 1
!$OMP END SINGLE
s = 0
do i=first_idx,last_idx
s = s + kaux(i)
end do
sum(ithread+2) = s
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
sum(i) = sum(i) + sum(i-1)
end do
!$OMP END SINGLE
if (work > 0) then
saved_elem = kaux(first_idx)
end if
if (ithread == 0) then
kaux(1) = 1
end if
!$OMP BARRIER
if (work > 0) then
old_val = kaux(first_idx+1)
kaux(first_idx+1) = saved_elem + sum(ithread+1)
end if
do i=first_idx+2,last_idx+1
nxt_val = kaux(i)
kaux(i) = kaux(i-1) + old_val
old_val = nxt_val
end do
!$OMP BARRIER
! ------------------------------------------------
select case(dupl)
case(psb_dupl_ovwrt_)
!$OMP DO schedule(STATIC)
do j=1,nc
first_elem = iaux(j)
last_elem = iaux(j+1) - 1
if (first_elem > last_elem) then
cycle
end if
k = kaux(j)
val(k) = vs(first_elem)
ia(k) = ias(first_elem)
ja(k) = jas(first_elem)
do i=first_elem+1,last_elem
if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then
val(k) = vs(i)
else
k = k + 1
val(k) = vs(i)
ia(k) = ias(i)
ja(k) = jas(i)
endif
end do
end do
!$OMP END DO
case(psb_dupl_add_)
!$OMP DO schedule(STATIC)
do j=1,nc
first_elem = iaux(j)
last_elem = iaux(j+1) - 1
if (first_elem > last_elem) then
cycle
end if
k = kaux(j)
val(k) = vs(first_elem)
ia(k) = ias(first_elem)
ja(k) = jas(first_elem)
do i=first_elem+1,last_elem
if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then
val(k) = val(k) + vs(i)
else
k = k + 1
val(k) = vs(i)
ia(k) = ias(i)
ja(k) = jas(i)
endif
end do
end do
!$OMP END DO
case(psb_dupl_err_)
!$OMP DO schedule(STATIC)
do j=1,nc
first_elem = iaux(j)
last_elem = iaux(j+1) - 1
if (first_elem > last_elem) then
cycle
end if
k = kaux(j)
val(k) = vs(first_elem)
ia(k) = ias(first_elem)
ja(k) = jas(first_elem)
do i=first_elem+1,last_elem
if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then
err = 1
else
k = k + 1
val(k) = vs(i)
ia(k) = ias(i)
ja(k) = jas(i)
end if
end do
end do
!$OMP END DO
case default
!$OMP SINGLE
err = 2
!$OMP END SINGLE
end select
!$OMP END PARALLEL
if (err == 1) then
call psb_errpush(psb_err_duplicate_coo,name)
goto 9999
else if (err == 2) then
write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl
info =-7
return
end if
nzout = kaux(nc+1) - 1
deallocate(sum,kaux,idxaux,stat=info)
#else
!if (.not.srt_inp) then
ip = iaux(1)
iaux(1) = 0
do i=2, nc
is = iaux(i)
iaux(i) = ip
ip = ip + is
end do
iaux(nc+1) = ip
do i=1,nzin
icl = ja(i)
ip = iaux(icl) + 1
ias(ip) = ia(i)
jas(ip) = ja(i)
vs(ip) = val(i)
iaux(icl) = ip
end do
!end if
select case(dupl)
case(psb_dupl_ovwrt_)
k = 0
i = 1
do j=1, nc
nzl = iaux(j)-i+1
imx = i+nzl-1
if (nzl > 0) then
call psi_msort_up(nzl,ias(i:imx),ix2,iret)
if (iret == 0) &
& call psb_ip_reord(nzl,vs(i:imx),&
& ias(i:imx),jas(i:imx),ix2)
k = k + 1
ia(k) = ias(i)
ja(k) = jas(i)
val(k) = vs(i)
irw = ia(k)
icl = ja(k)
do
i = i + 1
if (i > imx) exit
if ((ias(i) == irw).and.(jas(i) == icl)) then
val(k) = vs(i)
else
k = k+1
val(k) = vs(i)
ia(k) = ias(i)
ja(k) = jas(i)
irw = ia(k)
icl = ja(k)
endif
enddo
end if
end do
case(psb_dupl_add_)
k = 0
i = 1
do j=1, nc
nzl = iaux(j)-i+1
imx = i+nzl-1
if (nzl > 0) then
call psi_msort_up(nzl,ias(i:imx),ix2,iret)
if (iret == 0) &
& call psb_ip_reord(nzl,vs(i:imx),&
& ias(i:imx),jas(i:imx),ix2)
k = k + 1
ia(k) = ias(i)
ja(k) = jas(i)
val(k) = vs(i)
irw = ia(k)
icl = ja(k)
do
i = i + 1
if (i > imx) exit
if ((ias(i) == irw).and.(jas(i) == icl)) then
val(k) = val(k) + vs(i)
else
k = k+1
val(k) = vs(i)
ia(k) = ias(i)
ja(k) = jas(i)
irw = ia(k)
icl = ja(k)
endif
enddo
end if
end do
case(psb_dupl_err_)
k = 0
i = 1
do j=1, nc
nzl = iaux(j)-i+1
imx = i+nzl-1
if (nzl > 0) then
call psi_msort_up(nzl,ias(i:imx),ix2,iret)
if (iret == 0) &
& call psb_ip_reord(nzl,vs(i:imx),&
& ias(i:imx),jas(i:imx),ix2)
k = k + 1
ia(k) = ias(i)
ja(k) = jas(i)
val(k) = vs(i)
irw = ia(k)
icl = ja(k)
do
i = i + 1
if (i > imx) exit
if ((ias(i) == irw).and.(jas(i) == icl)) then
call psb_errpush(psb_err_duplicate_coo,name)
goto 9999
else
k = k+1
val(k) = vs(i)
ia(k) = ias(i)
ja(k) = jas(i)
irw = ia(k)
icl = ja(k)
endif
enddo
end if
end do
case default
write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl
info =-7
return
end select
nzout = k
deallocate(ix2, stat=info)
#endif
deallocate(ias,jas,vs, stat=info)
else if (.not.use_buffers) then
call psi_msort_up(nzin,ja(1:),iaux(1:),iret)
if (iret == 0) &
& call psb_ip_reord(nzin,val,ia,ja,iaux)
#if defined(OPENMP)
!$OMP PARALLEL default(none) &
!$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) &
!$OMP private(i,j,first_idx,last_idx,nzl,act_col, &
!$OMP iret,ithread,work,first_elem,last_elem)
!$OMP SINGLE
nthreads = omp_get_num_threads()
!$OMP END SINGLE
ithread = omp_get_thread_num()
! -------- thread-specific workload --------
work = nc/nthreads
if (ithread < MOD(nc,nthreads)) then
work = work + 1
first_idx = ithread*work + 1
else
first_idx = ithread*work + MOD(nc,nthreads) + 1
end if
last_idx = first_idx + work - 1
! ---------------------------------------------------
first_elem = 0
last_elem = -1
act_col = first_idx
do j=1,nzin
if (ja(j) < act_col) then
cycle
else if ((ja(j) > last_idx) .or. (work < 1)) then
exit
else if (ja(j) > act_col) then
nzl = last_elem - first_elem + 1
if (nzl > 0) then
call psi_msort_up(nzl,ia(first_elem:),iaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2),iret)
if (iret == 0) &
& call psb_ip_reord(nzl,val(first_elem:last_elem),&
& ia(first_elem:last_elem),ja(first_elem:last_elem),&
& iaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2))
end if
act_col = act_col + 1
first_elem = 0
last_elem = -1
else
if (first_elem == 0) then
first_elem = j
end if
last_elem = j
end if
end do
!$OMP END PARALLEL
#else
i = 1
j = i
do while (i <= nzin)
do while ((ja(j) == ja(i)))
j = j+1
if (j > nzin) exit
enddo
nzl = j - i
call psi_msort_up(nzl,ia(i:),iaux(1:),iret)
if (iret == 0) &
& call psb_ip_reord(nzl,val(i:i+nzl-1),&
& ia(i:i+nzl-1),ja(i:i+nzl-1),iaux)
i = j
enddo
#endif
i = 1
irw = ia(i)
icl = ja(i)
j = 1
select case(dupl)
case(psb_dupl_ovwrt_)
do
j = j + 1
if (j > nzin) exit
if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle
if ((ia(j) == irw).and.(ja(j) == icl)) then
val(i) = val(j)
else
i = i+1
val(i) = val(j)
ia(i) = ia(j)
ja(i) = ja(j)
irw = ia(i)
icl = ja(i)
endif
enddo
case(psb_dupl_add_)
do
j = j + 1
if (j > nzin) exit
if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle
if ((ia(j) == irw).and.(ja(j) == icl)) then
val(i) = val(i) + val(j)
else
i = i+1
val(i) = val(j)
ia(i) = ia(j)
ja(i) = ja(j)
irw = ia(i)
icl = ja(i)
endif
enddo
case(psb_dupl_err_)
do
j = j + 1
if (j > nzin) exit
if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle
if ((ia(j) == irw).and.(ja(j) == icl)) then
call psb_errpush(psb_err_duplicate_coo,name)
goto 9999
else
i = i+1
val(i) = val(j)
ia(i) = ia(j)
ja(i) = ja(j)
irw = ia(i)
icl = ja(i)
endif
enddo
case default
write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl
info =-7
end select
nzout = i
if (debug_level >= psb_debug_serial_)&
& write(debug_unit,*) trim(name),': end second loop'
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_fix_coo_inner_colmajor
subroutine psb_s_cp_coo_to_lcoo(a,b,info) subroutine psb_s_cp_coo_to_lcoo(a,b,info)
use psb_error_mod use psb_error_mod
@ -8347,3 +7597,4 @@ subroutine psb_ls_cp_coo_from_icoo(a,b,info)
return return
end subroutine psb_ls_cp_coo_from_icoo end subroutine psb_ls_cp_coo_from_icoo

@ -3573,7 +3573,7 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
character(len=20) :: name = 'psb_fixcoo' character(len=20) :: name = 'psb_fixcoo'
logical :: srt_inp, use_buffers logical :: srt_inp, use_buffers
#if defined(OPENMP) #if defined(OPENMP)
integer(psb_ipk_) :: work,first_idx,last_idx,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread
integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads
integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:)
#endif #endif
@ -3632,8 +3632,6 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
! Dirty trick: call ROWMAJOR with rows <-> columns ! Dirty trick: call ROWMAJOR with rows <-> columns
call psb_z_fix_coo_inner_rowmajor(nc,nr,nzin,dupl,& call psb_z_fix_coo_inner_rowmajor(nc,nr,nzin,dupl,&
& ja,ia,val,iaux,nzout,info) & ja,ia,val,iaux,nzout,info)
!!$ call psb_z_fix_coo_inner_colmajor(nr,nc,nzin,dupl,&
!!$ & ia,ja,val,iaux,nzout,info)
case default case default
write(debug_unit,*) trim(name),': unknown direction ',idir_ write(debug_unit,*) trim(name),': unknown direction ',idir_
info = psb_err_internal_error_ info = psb_err_internal_error_
@ -3677,7 +3675,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
character(len=20) :: name = 'psb_fixcoo' character(len=20) :: name = 'psb_fixcoo'
logical :: srt_inp, use_buffers logical :: srt_inp, use_buffers
#if defined(OPENMP) #if defined(OPENMP)
integer(psb_ipk_) :: work,first_idx,last_idx,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread
integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads
integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:)
#endif #endif
@ -3760,8 +3758,8 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
! index for each row. We do the same on 'kaux' ! index for each row. We do the same on 'kaux'
!$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!$OMP shared(idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) & !$OMP shared(idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) &
!$OMP private(s,i,j,k,ithread,first_idx,last_idx,work,nxt_val,old_val,saved_elem, & !$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, &
!$OMP first_elem,last_elem,nzl,iret,act_row) !$OMP first_elem,last_elem,nzl,iret,act_row) reduction(max: info)
!$OMP SINGLE !$OMP SINGLE
nthreads = omp_get_num_threads() nthreads = omp_get_num_threads()
@ -3774,72 +3772,32 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
work = nr/nthreads work = nr/nthreads
if (ithread < MOD(nr,nthreads)) then if (ithread < MOD(nr,nthreads)) then
work = work + 1 work = work + 1
first_idx = ithread*work + 1 idxstart = ithread*work + 1
else else
first_idx = ithread*work + MOD(nr,nthreads) + 1 idxstart = ithread*work + MOD(nr,nthreads) + 1
end if end if
last_idx = first_idx + work - 1 idxend = idxstart + work - 1
!-------------------------------------------
! ------------ iaux composition --------------
! 'iaux' will have the start index for each row
s = 0
do i=first_idx,last_idx
s = s + iaux(i)
end do
sum(ithread+2) = s
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
sum(i) = sum(i) + sum(i-1)
end do
!$OMP END SINGLE
if (work > 0) then
saved_elem = iaux(first_idx)
end if
if (ithread == 0) then
iaux(1) = 1
end if
!$OMP BARRIER
if (work > 0) then
old_val = iaux(first_idx+1)
iaux(first_idx+1) = saved_elem + sum(ithread+1)
end if
do i=first_idx+2,last_idx+1
nxt_val = iaux(i)
iaux(i) = iaux(i-1) + old_val
old_val = nxt_val
end do
! --------------------------------------
!write(0,*) 'fix_coo_inner: trying with exscan'
call psi_exscan(nr+1,iaux,info,shift=ione)
!$OMP BARRIER !$OMP BARRIER
! ------------------ Sorting and buffers ------------------- ! ------------------ Sorting and buffers -------------------
! Let's use an auxiliary buffer, 'idxaux', to get indices leaving ! Let's use an auxiliary buffer, 'idxaux', to get indices leaving
! unmodified 'iaux' ! unmodified 'iaux'
do j=first_idx,last_idx do j=idxstart,idxend
idxaux(j) = iaux(j) idxaux(j) = iaux(j)
end do end do
! Here we sort data inside the auxiliary buffers ! Here we sort data inside the auxiliary buffers
do i=1,nzin do i=1,nzin
act_row = ia(i) act_row = ia(i)
if ((act_row >= first_idx) .and. (act_row <= last_idx)) then if ((act_row >= idxstart) .and. (act_row <= idxend)) then
ias(idxaux(act_row)) = ia(i) ias(idxaux(act_row)) = ia(i)
jas(idxaux(act_row)) = ja(i) jas(idxaux(act_row)) = ja(i)
vs(idxaux(act_row)) = val(i) vs(idxaux(act_row)) = val(i)
idxaux(act_row) = idxaux(act_row) + 1 idxaux(act_row) = idxaux(act_row) + 1
end if end if
end do end do
@ -3848,7 +3806,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
! Let's sort column indices and values. After that we will store ! Let's sort column indices and values. After that we will store
! the number of unique values in 'kaux' ! the number of unique values in 'kaux'
do j=first_idx,last_idx do j=idxstart,idxend
first_elem = iaux(j) first_elem = iaux(j)
last_elem = iaux(j+1) - 1 last_elem = iaux(j+1) - 1
nzl = last_elem - first_elem + 1 nzl = last_elem - first_elem + 1
@ -3877,45 +3835,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
! -------------------------------------------------- ! --------------------------------------------------
! ---------------- kaux composition ---------------- ! ---------------- kaux composition ----------------
!$OMP SINGLE call psi_exscan(nr+1,kaux,i,shift=ione)
sum(:) = 0
sum(1) = 1
!$OMP END SINGLE
s = 0
do i=first_idx,last_idx
s = s + kaux(i)
end do
sum(ithread+2) = s
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
sum(i) = sum(i) + sum(i-1)
end do
kaux(nr+1) = sum(nthreads+1)
!$OMP END SINGLE
if (work > 0) then
saved_elem = kaux(first_idx)
end if
if (ithread == 0) then
kaux(1) = 1
end if
!$OMP BARRIER
if (work > 0) then
old_val = kaux(first_idx+1)
kaux(first_idx+1) = saved_elem + sum(ithread+1)
end if
do i=first_idx+2,last_idx+1
nxt_val = kaux(i)
kaux(i) = kaux(i-1) + old_val
old_val = nxt_val
end do
!$OMP BARRIER !$OMP BARRIER
@ -3923,7 +3843,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
select case(dupl) select case(dupl)
case(psb_dupl_ovwrt_) case(psb_dupl_ovwrt_)
!$OMP DO schedule(STATIC) !$OMP DO schedule(dynamic,32)
do j=1,nr do j=1,nr
first_elem = iaux(j) first_elem = iaux(j)
last_elem = iaux(j+1) - 1 last_elem = iaux(j+1) - 1
@ -3952,7 +3872,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!$OMP END DO !$OMP END DO
case(psb_dupl_add_) case(psb_dupl_add_)
!$OMP DO schedule(STATIC) !$OMP DO schedule(dynamic,32)
do j=1,nr do j=1,nr
first_elem = iaux(j) first_elem = iaux(j)
last_elem = iaux(j+1) - 1 last_elem = iaux(j+1) - 1
@ -3981,7 +3901,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!$OMP END DO !$OMP END DO
case(psb_dupl_err_) case(psb_dupl_err_)
!$OMP DO schedule(STATIC) !$OMP DO schedule(dynamic,32)
do j=1,nr do j=1,nr
first_elem = iaux(j) first_elem = iaux(j)
last_elem = iaux(j+1) - 1 last_elem = iaux(j+1) - 1
@ -4186,7 +4106,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
#if defined(OPENMP) #if defined(OPENMP)
!$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) & !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) &
!$OMP private(i,j,first_idx,last_idx,nzl,act_row,iret,ithread, & !$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, &
!$OMP work,first_elem,last_elem) !$OMP work,first_elem,last_elem)
!$OMP SINGLE !$OMP SINGLE
@ -4200,22 +4120,22 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
work = nr/nthreads work = nr/nthreads
if (ithread < MOD(nr,nthreads)) then if (ithread < MOD(nr,nthreads)) then
work = work + 1 work = work + 1
first_idx = ithread*work + 1 idxstart = ithread*work + 1
else else
first_idx = ithread*work + MOD(nr,nthreads) + 1 idxstart = ithread*work + MOD(nr,nthreads) + 1
end if end if
last_idx = first_idx + work - 1 idxend = idxstart + work - 1
! --------------------------------------------------- ! ---------------------------------------------------
first_elem = 0 first_elem = 0
last_elem = -1 last_elem = -1
act_row = first_idx act_row = idxstart
do j=1,nzin do j=1,nzin
if (ia(j) < act_row) then if (ia(j) < act_row) then
cycle cycle
else if ((ia(j) > last_idx) .or. (work < 1)) then else if ((ia(j) > idxend) .or. (work < 1)) then
exit exit
else if (ia(j) > act_row) then else if (ia(j) > act_row) then
nzl = last_elem - first_elem + 1 nzl = last_elem - first_elem + 1
@ -4345,676 +4265,6 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
end subroutine psb_z_fix_coo_inner_rowmajor end subroutine psb_z_fix_coo_inner_rowmajor
subroutine psb_z_fix_coo_inner_colmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,info)
use psb_const_mod
use psb_error_mod
use psb_z_base_mat_mod, psb_protect_name => psb_z_fix_coo_inner_colmajor
use psb_string_mod
use psb_ip_reord_mod
use psb_sort_mod
#if defined(OPENMP)
use omp_lib
#endif
implicit none
integer(psb_ipk_), intent(in) :: nr, nc, nzin, dupl
integer(psb_ipk_), intent(inout) :: ia(:), ja(:), iaux(:)
complex(psb_dpk_), intent(inout) :: val(:)
integer(psb_ipk_), intent(out) :: nzout, info
!locals
integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:)
complex(psb_dpk_), allocatable :: vs(:)
integer(psb_ipk_) :: nza, nzl,iret
integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name = 'psb_fixcoo'
logical :: srt_inp, use_buffers
#if defined(OPENMP)
integer(psb_ipk_) :: work,first_idx,last_idx,first_elem,last_elem,s,nthreads,ithread
integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads
integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:)
#endif
info = psb_success_
call psb_erractionsave(err_act)
debug_unit = psb_get_debug_unit()
debug_level = psb_get_debug_level()
if (nc <= nzin) then
! Avoid strange situations with large indices
#if defined(OPENMP)
allocate(ias(nzin),jas(nzin),vs(nzin), stat=info)
#else
allocate(ias(nzin),jas(nzin),vs(nzin),ix2(nzin+2), stat=info)
#endif
use_buffers = (info == 0)
else
use_buffers = .false.
end if
if (use_buffers) then
iaux(:) = 0
#if defined(OPENMP)
!$OMP PARALLEL DO default(none) schedule(STATIC) &
!$OMP shared(nzin,ja,nc,iaux) &
!$OMP private(i) &
!$OMP reduction(.and.:use_buffers)
do i=1,nzin
if ((ja(i) < 1).or.(ja(i) > nc)) then
use_buffers = .false.
! Invalid indices are placed outside the considered range
ja(i) = nc+2
else
!$OMP ATOMIC UPDATE
iaux(ja(i)) = iaux(ja(i)) + 1
end if
end do
!$OMP END PARALLEL DO
#else
!srt_inp = .true.
do i=1,nzin
if ((ja(i) < 1).or.(ja(i) > nc)) then
use_buffers = .false.
!ja(i) = nc+2
!srt_inp = .false.
exit
end if
iaux(ja(i)) = iaux(ja(i)) + 1
!srt_inp = srt_inp .and.(ja(i-1)<=ja(i))
end do
#endif
end if
!use_buffers=use_buffers.and.srt_inp
! Check again use_buffers. We enter here if nzin >= nc and
! all the indices are valid
if (use_buffers) then
#if defined(OPENMP)
allocate(kaux(MAX(nzin,nc)+2),idxaux(MAX((nr+2)*maxthreads,nc)),sum(maxthreads+1),stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
kaux(:) = 0
sum(:) = 0
sum(1) = 1
err = 0
! Here, starting from 'iaux', we apply a fixing in order to obtain the starting
! index for each column. We do the same on 'kaux'
!$OMP PARALLEL default(none) &
!$OMP shared(idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) &
!$OMP private(s,i,j,k,ithread,first_idx,last_idx,work,nxt_val,old_val,saved_elem, &
!$OMP first_elem,last_elem,nzl,iret,act_col)
!$OMP SINGLE
nthreads = omp_get_num_threads()
!$OMP END SINGLE
ithread = omp_get_thread_num()
! -------- thread-specific workload --------
work = nc/nthreads
if (ithread < MOD(nc,nthreads)) then
work = work + 1
first_idx = ithread*work + 1
else
first_idx = ithread*work + MOD(nc,nthreads) + 1
end if
last_idx = first_idx + work - 1
!-------------------------------------------
! ---------- iaux composition --------------
s = 0
do i=first_idx,last_idx
s = s + iaux(i)
end do
sum(ithread+2) = s
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
sum(i) = sum(i) + sum(i-1)
end do
!$OMP END SINGLE
if (work > 0) then
saved_elem = iaux(first_idx)
end if
if (ithread == 0) then
iaux(1) = 1
end if
!$OMP BARRIER
if (work > 0) then
old_val = iaux(first_idx+1)
iaux(first_idx+1) = saved_elem + sum(ithread+1)
end if
do i=first_idx+2,last_idx+1
nxt_val = iaux(i)
iaux(i) = iaux(i-1) + old_val
old_val = nxt_val
end do
! --------------------------------------
!$OMP BARRIER
! ------------------ Sorting and buffers -------------------
! Let's use an auxiliary buffer to get indices
do j=first_idx,last_idx
idxaux(j) = iaux(j)
end do
! Here we sort data inside the auxiliary buffers
do i=1,nzin
act_col = ja(i)
if (act_col >= first_idx .and. act_col <= last_idx) then
ias(idxaux(act_col)) = ia(i)
jas(idxaux(act_col)) = ja(i)
vs(idxaux(act_col)) = val(i)
idxaux(act_col) = idxaux(act_col) + 1
end if
end do
!$OMP BARRIER
! Let's sort column indices and values. After that we will store
! the number of unique values in 'kaux'
do j=first_idx,last_idx
first_elem = iaux(j)
last_elem = iaux(j+1) - 1
nzl = last_elem - first_elem + 1
! The column has elements?
if (nzl > 0) then
call psi_msort_up(nzl,ias(first_elem:last_elem), &
& idxaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2),iret)
if (iret == 0) then
call psb_ip_reord(nzl,vs(first_elem:last_elem),&
& ias(first_elem:last_elem),jas(first_elem:last_elem), &
& idxaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2))
end if
! Over each column we count the unique values
kaux(j) = 1
do i=first_elem+1,last_elem
if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then
cycle
end if
kaux(j) = kaux(j) + 1
end do
end if
end do
! --------------------------------------------------
! ---------------- kaux composition ----------------
!$OMP SINGLE
sum(:) = 0
sum(1) = 1
!$OMP END SINGLE
s = 0
do i=first_idx,last_idx
s = s + kaux(i)
end do
sum(ithread+2) = s
!$OMP BARRIER
!$OMP SINGLE
do i=2,nthreads+1
sum(i) = sum(i) + sum(i-1)
end do
!$OMP END SINGLE
if (work > 0) then
saved_elem = kaux(first_idx)
end if
if (ithread == 0) then
kaux(1) = 1
end if
!$OMP BARRIER
if (work > 0) then
old_val = kaux(first_idx+1)
kaux(first_idx+1) = saved_elem + sum(ithread+1)
end if
do i=first_idx+2,last_idx+1
nxt_val = kaux(i)
kaux(i) = kaux(i-1) + old_val
old_val = nxt_val
end do
!$OMP BARRIER
! ------------------------------------------------
select case(dupl)
case(psb_dupl_ovwrt_)
!$OMP DO schedule(STATIC)
do j=1,nc
first_elem = iaux(j)
last_elem = iaux(j+1) - 1
if (first_elem > last_elem) then
cycle
end if
k = kaux(j)
val(k) = vs(first_elem)
ia(k) = ias(first_elem)
ja(k) = jas(first_elem)
do i=first_elem+1,last_elem
if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then
val(k) = vs(i)
else
k = k + 1
val(k) = vs(i)
ia(k) = ias(i)
ja(k) = jas(i)
endif
end do
end do
!$OMP END DO
case(psb_dupl_add_)
!$OMP DO schedule(STATIC)
do j=1,nc
first_elem = iaux(j)
last_elem = iaux(j+1) - 1
if (first_elem > last_elem) then
cycle
end if
k = kaux(j)
val(k) = vs(first_elem)
ia(k) = ias(first_elem)
ja(k) = jas(first_elem)
do i=first_elem+1,last_elem
if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then
val(k) = val(k) + vs(i)
else
k = k + 1
val(k) = vs(i)
ia(k) = ias(i)
ja(k) = jas(i)
endif
end do
end do
!$OMP END DO
case(psb_dupl_err_)
!$OMP DO schedule(STATIC)
do j=1,nc
first_elem = iaux(j)
last_elem = iaux(j+1) - 1
if (first_elem > last_elem) then
cycle
end if
k = kaux(j)
val(k) = vs(first_elem)
ia(k) = ias(first_elem)
ja(k) = jas(first_elem)
do i=first_elem+1,last_elem
if ((ias(i) == ias(i-1)).and.(jas(i) == jas(i-1))) then
err = 1
else
k = k + 1
val(k) = vs(i)
ia(k) = ias(i)
ja(k) = jas(i)
end if
end do
end do
!$OMP END DO
case default
!$OMP SINGLE
err = 2
!$OMP END SINGLE
end select
!$OMP END PARALLEL
if (err == 1) then
call psb_errpush(psb_err_duplicate_coo,name)
goto 9999
else if (err == 2) then
write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl
info =-7
return
end if
nzout = kaux(nc+1) - 1
deallocate(sum,kaux,idxaux,stat=info)
#else
!if (.not.srt_inp) then
ip = iaux(1)
iaux(1) = 0
do i=2, nc
is = iaux(i)
iaux(i) = ip
ip = ip + is
end do
iaux(nc+1) = ip
do i=1,nzin
icl = ja(i)
ip = iaux(icl) + 1
ias(ip) = ia(i)
jas(ip) = ja(i)
vs(ip) = val(i)
iaux(icl) = ip
end do
!end if
select case(dupl)
case(psb_dupl_ovwrt_)
k = 0
i = 1
do j=1, nc
nzl = iaux(j)-i+1
imx = i+nzl-1
if (nzl > 0) then
call psi_msort_up(nzl,ias(i:imx),ix2,iret)
if (iret == 0) &
& call psb_ip_reord(nzl,vs(i:imx),&
& ias(i:imx),jas(i:imx),ix2)
k = k + 1
ia(k) = ias(i)
ja(k) = jas(i)
val(k) = vs(i)
irw = ia(k)
icl = ja(k)
do
i = i + 1
if (i > imx) exit
if ((ias(i) == irw).and.(jas(i) == icl)) then
val(k) = vs(i)
else
k = k+1
val(k) = vs(i)
ia(k) = ias(i)
ja(k) = jas(i)
irw = ia(k)
icl = ja(k)
endif
enddo
end if
end do
case(psb_dupl_add_)
k = 0
i = 1
do j=1, nc
nzl = iaux(j)-i+1
imx = i+nzl-1
if (nzl > 0) then
call psi_msort_up(nzl,ias(i:imx),ix2,iret)
if (iret == 0) &
& call psb_ip_reord(nzl,vs(i:imx),&
& ias(i:imx),jas(i:imx),ix2)
k = k + 1
ia(k) = ias(i)
ja(k) = jas(i)
val(k) = vs(i)
irw = ia(k)
icl = ja(k)
do
i = i + 1
if (i > imx) exit
if ((ias(i) == irw).and.(jas(i) == icl)) then
val(k) = val(k) + vs(i)
else
k = k+1
val(k) = vs(i)
ia(k) = ias(i)
ja(k) = jas(i)
irw = ia(k)
icl = ja(k)
endif
enddo
end if
end do
case(psb_dupl_err_)
k = 0
i = 1
do j=1, nc
nzl = iaux(j)-i+1
imx = i+nzl-1
if (nzl > 0) then
call psi_msort_up(nzl,ias(i:imx),ix2,iret)
if (iret == 0) &
& call psb_ip_reord(nzl,vs(i:imx),&
& ias(i:imx),jas(i:imx),ix2)
k = k + 1
ia(k) = ias(i)
ja(k) = jas(i)
val(k) = vs(i)
irw = ia(k)
icl = ja(k)
do
i = i + 1
if (i > imx) exit
if ((ias(i) == irw).and.(jas(i) == icl)) then
call psb_errpush(psb_err_duplicate_coo,name)
goto 9999
else
k = k+1
val(k) = vs(i)
ia(k) = ias(i)
ja(k) = jas(i)
irw = ia(k)
icl = ja(k)
endif
enddo
end if
end do
case default
write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl
info =-7
return
end select
nzout = k
deallocate(ix2, stat=info)
#endif
deallocate(ias,jas,vs, stat=info)
else if (.not.use_buffers) then
call psi_msort_up(nzin,ja(1:),iaux(1:),iret)
if (iret == 0) &
& call psb_ip_reord(nzin,val,ia,ja,iaux)
#if defined(OPENMP)
!$OMP PARALLEL default(none) &
!$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) &
!$OMP private(i,j,first_idx,last_idx,nzl,act_col, &
!$OMP iret,ithread,work,first_elem,last_elem)
!$OMP SINGLE
nthreads = omp_get_num_threads()
!$OMP END SINGLE
ithread = omp_get_thread_num()
! -------- thread-specific workload --------
work = nc/nthreads
if (ithread < MOD(nc,nthreads)) then
work = work + 1
first_idx = ithread*work + 1
else
first_idx = ithread*work + MOD(nc,nthreads) + 1
end if
last_idx = first_idx + work - 1
! ---------------------------------------------------
first_elem = 0
last_elem = -1
act_col = first_idx
do j=1,nzin
if (ja(j) < act_col) then
cycle
else if ((ja(j) > last_idx) .or. (work < 1)) then
exit
else if (ja(j) > act_col) then
nzl = last_elem - first_elem + 1
if (nzl > 0) then
call psi_msort_up(nzl,ia(first_elem:),iaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2),iret)
if (iret == 0) &
& call psb_ip_reord(nzl,val(first_elem:last_elem),&
& ia(first_elem:last_elem),ja(first_elem:last_elem),&
& iaux((ithread*(nr+2))+1:(ithread*(nr+2))+nzl+2))
end if
act_col = act_col + 1
first_elem = 0
last_elem = -1
else
if (first_elem == 0) then
first_elem = j
end if
last_elem = j
end if
end do
!$OMP END PARALLEL
#else
i = 1
j = i
do while (i <= nzin)
do while ((ja(j) == ja(i)))
j = j+1
if (j > nzin) exit
enddo
nzl = j - i
call psi_msort_up(nzl,ia(i:),iaux(1:),iret)
if (iret == 0) &
& call psb_ip_reord(nzl,val(i:i+nzl-1),&
& ia(i:i+nzl-1),ja(i:i+nzl-1),iaux)
i = j
enddo
#endif
i = 1
irw = ia(i)
icl = ja(i)
j = 1
select case(dupl)
case(psb_dupl_ovwrt_)
do
j = j + 1
if (j > nzin) exit
if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle
if ((ia(j) == irw).and.(ja(j) == icl)) then
val(i) = val(j)
else
i = i+1
val(i) = val(j)
ia(i) = ia(j)
ja(i) = ja(j)
irw = ia(i)
icl = ja(i)
endif
enddo
case(psb_dupl_add_)
do
j = j + 1
if (j > nzin) exit
if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle
if ((ia(j) == irw).and.(ja(j) == icl)) then
val(i) = val(i) + val(j)
else
i = i+1
val(i) = val(j)
ia(i) = ia(j)
ja(i) = ja(j)
irw = ia(i)
icl = ja(i)
endif
enddo
case(psb_dupl_err_)
do
j = j + 1
if (j > nzin) exit
if ((ia(j) < 1 .or. ia(j) > nr) .or. (ja(j) < 1 .or. ja(j) > nc)) cycle
if ((ia(j) == irw).and.(ja(j) == icl)) then
call psb_errpush(psb_err_duplicate_coo,name)
goto 9999
else
i = i+1
val(i) = val(j)
ia(i) = ia(j)
ja(i) = ja(j)
irw = ia(i)
icl = ja(i)
endif
enddo
case default
write(psb_err_unit,*) 'Error in fix_coo: unsafe dupl',dupl
info =-7
end select
nzout = i
if (debug_level >= psb_debug_serial_)&
& write(debug_unit,*) trim(name),': end second loop'
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_z_fix_coo_inner_colmajor
subroutine psb_z_cp_coo_to_lcoo(a,b,info) subroutine psb_z_cp_coo_to_lcoo(a,b,info)
use psb_error_mod use psb_error_mod
@ -8347,3 +7597,4 @@ subroutine psb_lz_cp_coo_from_icoo(a,b,info)
return return
end subroutine psb_lz_cp_coo_from_icoo end subroutine psb_lz_cp_coo_from_icoo

Loading…
Cancel
Save