|
|
@ -4268,7 +4268,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
|
|
|
|
!locals
|
|
|
|
!locals
|
|
|
|
integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:)
|
|
|
|
integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:)
|
|
|
|
complex(psb_spk_), allocatable :: vs(:)
|
|
|
|
complex(psb_spk_), allocatable :: vs(:)
|
|
|
|
integer(psb_ipk_) :: nza, nzl,iret
|
|
|
|
integer(psb_ipk_) :: nza, nzl,iret, maxnr
|
|
|
|
integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2
|
|
|
|
integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2
|
|
|
|
integer(psb_ipk_) :: debug_level, debug_unit
|
|
|
|
integer(psb_ipk_) :: debug_level, debug_unit
|
|
|
|
character(len=20) :: name = 'psb_fixcoo'
|
|
|
|
character(len=20) :: name = 'psb_fixcoo'
|
|
|
@ -4277,7 +4277,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
|
|
|
|
#if defined(OPENMP)
|
|
|
|
#if defined(OPENMP)
|
|
|
|
integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread
|
|
|
|
integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread
|
|
|
|
integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads
|
|
|
|
integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads
|
|
|
|
integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:)
|
|
|
|
integer(psb_ipk_), allocatable :: kaux(:),idxaux(:)
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
info = psb_success_
|
|
|
|
info = psb_success_
|
|
|
@ -4302,10 +4302,13 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
|
|
|
|
end if
|
|
|
|
end if
|
|
|
|
|
|
|
|
|
|
|
|
if (use_buffers) then
|
|
|
|
if (use_buffers) then
|
|
|
|
iaux(:) = 0
|
|
|
|
|
|
|
|
#if defined(OPENMP)
|
|
|
|
#if defined(OPENMP)
|
|
|
|
|
|
|
|
!$omp workshare
|
|
|
|
|
|
|
|
iaux(:) = 0
|
|
|
|
|
|
|
|
!$omp end workshare
|
|
|
|
|
|
|
|
maxnr = 0
|
|
|
|
!$OMP PARALLEL DO default(none) schedule(STATIC) &
|
|
|
|
!$OMP PARALLEL DO default(none) schedule(STATIC) &
|
|
|
|
!$OMP shared(nzin,ia,nr,iaux) &
|
|
|
|
!$OMP shared(nzin,ia,nr,iaux,maxnr) &
|
|
|
|
!$OMP private(i) &
|
|
|
|
!$OMP private(i) &
|
|
|
|
!$OMP reduction(.and.:use_buffers)
|
|
|
|
!$OMP reduction(.and.:use_buffers)
|
|
|
|
do i=1,nzin
|
|
|
|
do i=1,nzin
|
|
|
@ -4319,7 +4322,9 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
|
|
|
|
end if
|
|
|
|
end if
|
|
|
|
end do
|
|
|
|
end do
|
|
|
|
!$OMP END PARALLEL DO
|
|
|
|
!$OMP END PARALLEL DO
|
|
|
|
|
|
|
|
maxnr = maxval(iaux(1:nr))
|
|
|
|
#else
|
|
|
|
#else
|
|
|
|
|
|
|
|
iaux(:) = 0
|
|
|
|
!srt_inp = .true.
|
|
|
|
!srt_inp = .true.
|
|
|
|
do i=1,nzin
|
|
|
|
do i=1,nzin
|
|
|
|
if ((ia(i) < 1).or.(ia(i) > nr)) then
|
|
|
|
if ((ia(i) < 1).or.(ia(i) > nr)) then
|
|
|
@ -4342,22 +4347,21 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
|
|
|
|
if (use_buffers) then
|
|
|
|
if (use_buffers) then
|
|
|
|
#if defined(OPENMP)
|
|
|
|
#if defined(OPENMP)
|
|
|
|
maxthreads = omp_get_max_threads()
|
|
|
|
maxthreads = omp_get_max_threads()
|
|
|
|
allocate(kaux(nr+1),idxaux(MAX((nc+2)*maxthreads,nr)),sum(maxthreads+1),stat=info)
|
|
|
|
allocate(kaux(nr+1),idxaux(MAX(nc+2,nr+2)),stat=info)
|
|
|
|
if (info /= psb_success_) then
|
|
|
|
if (info /= psb_success_) then
|
|
|
|
info = psb_err_alloc_dealloc_
|
|
|
|
info = psb_err_alloc_dealloc_
|
|
|
|
call psb_errpush(info,name)
|
|
|
|
call psb_errpush(info,name)
|
|
|
|
goto 9999
|
|
|
|
goto 9999
|
|
|
|
end if
|
|
|
|
end if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
!$omp workshare
|
|
|
|
kaux(:) = 0
|
|
|
|
kaux(:) = 0
|
|
|
|
sum(:) = 0
|
|
|
|
!$omp end workshare
|
|
|
|
sum(1) = 1
|
|
|
|
|
|
|
|
err = 0
|
|
|
|
err = 0
|
|
|
|
|
|
|
|
|
|
|
|
! Here, starting from 'iaux', we apply a fixing in order to obtain the starting
|
|
|
|
! Here, starting from 'iaux', we apply a fixing in order to obtain the starting
|
|
|
|
! index for each row. We do the same on 'kaux'
|
|
|
|
! index for each row. We do the same on 'kaux'
|
|
|
|
!$OMP PARALLEL default(none) &
|
|
|
|
!$OMP PARALLEL default(none) &
|
|
|
|
!$OMP shared(t0,t1,idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) &
|
|
|
|
!$OMP shared(maxnr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) &
|
|
|
|
!$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, &
|
|
|
|
!$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, &
|
|
|
|
!$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info)
|
|
|
|
!$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info)
|
|
|
|
|
|
|
|
|
|
|
@ -4382,60 +4386,67 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
|
|
|
|
!write(0,*) 'fix_coo_inner: trying with exscan'
|
|
|
|
!write(0,*) 'fix_coo_inner: trying with exscan'
|
|
|
|
call psi_exscan(nr+1,iaux,info,shift=ione)
|
|
|
|
call psi_exscan(nr+1,iaux,info,shift=ione)
|
|
|
|
!$OMP BARRIER
|
|
|
|
!$OMP BARRIER
|
|
|
|
!$OMP SINGLE
|
|
|
|
|
|
|
|
!t0 = omp_get_wtime()
|
|
|
|
|
|
|
|
!$OMP END SINGLE
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
! ------------------ Sorting and buffers -------------------
|
|
|
|
! ------------------ Sorting and buffers -------------------
|
|
|
|
|
|
|
|
|
|
|
|
! Let's use an auxiliary buffer, 'idxaux', to get indices leaving
|
|
|
|
! Let's use an auxiliary buffer, 'idxaux', to get indices leaving
|
|
|
|
! unmodified 'iaux'
|
|
|
|
! unmodified 'iaux'
|
|
|
|
do j=idxstart,idxend
|
|
|
|
|
|
|
|
|
|
|
|
!$omp do private(j)
|
|
|
|
|
|
|
|
do j=1,nr+1
|
|
|
|
idxaux(j) = iaux(j)
|
|
|
|
idxaux(j) = iaux(j)
|
|
|
|
end do
|
|
|
|
end do
|
|
|
|
|
|
|
|
!$omp end do
|
|
|
|
|
|
|
|
|
|
|
|
! Here we sort data inside the auxiliary buffers
|
|
|
|
! Here we sort data inside the auxiliary buffers
|
|
|
|
|
|
|
|
!$omp do private(act_row,i,i1)
|
|
|
|
do i=1,nzin
|
|
|
|
do i=1,nzin
|
|
|
|
act_row = ia(i)
|
|
|
|
act_row = ia(i)
|
|
|
|
if ((act_row >= idxstart) .and. (act_row <= idxend)) then
|
|
|
|
!$omp atomic capture
|
|
|
|
ias(idxaux(act_row)) = ia(i)
|
|
|
|
i1 =idxaux(act_row)
|
|
|
|
jas(idxaux(act_row)) = ja(i)
|
|
|
|
idxaux(act_row) = idxaux(act_row) + 1
|
|
|
|
vs(idxaux(act_row)) = val(i)
|
|
|
|
!$omp end atomic
|
|
|
|
idxaux(act_row) = idxaux(act_row) + 1
|
|
|
|
ias(i1) = ia(i)
|
|
|
|
end if
|
|
|
|
jas(i1) = ja(i)
|
|
|
|
|
|
|
|
vs(i1) = val(i)
|
|
|
|
end do
|
|
|
|
end do
|
|
|
|
|
|
|
|
!$omp end do
|
|
|
|
|
|
|
|
|
|
|
|
!$OMP BARRIER
|
|
|
|
|
|
|
|
!$OMP SINGLE
|
|
|
|
|
|
|
|
!t1 = omp_get_wtime()
|
|
|
|
|
|
|
|
!write(0,*) ithread,'Srt&Cpy :',t1-t0
|
|
|
|
|
|
|
|
!$OMP END SINGLE
|
|
|
|
|
|
|
|
! Let's sort column indices and values. After that we will store
|
|
|
|
! Let's sort column indices and values. After that we will store
|
|
|
|
! the number of unique values in 'kaux'
|
|
|
|
! the number of unique values in 'kaux'
|
|
|
|
do j=idxstart,idxend
|
|
|
|
block
|
|
|
|
first_elem = iaux(j)
|
|
|
|
integer(psb_ipk_), allocatable :: ixt(:)
|
|
|
|
last_elem = iaux(j+1) - 1
|
|
|
|
allocate(ixt(maxnr+2))
|
|
|
|
nzl = last_elem - first_elem + 1
|
|
|
|
!$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256)
|
|
|
|
|
|
|
|
do j=1,nr
|
|
|
|
! The row has elements?
|
|
|
|
first_elem = iaux(j)
|
|
|
|
if (nzl > 0) then
|
|
|
|
last_elem = iaux(j+1) - 1
|
|
|
|
call psi_msort_up(nzl,jas(first_elem:last_elem), &
|
|
|
|
nzl = last_elem - first_elem + 1
|
|
|
|
& idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret)
|
|
|
|
|
|
|
|
if (iret == 0) then
|
|
|
|
|
|
|
|
call psb_ip_reord(nzl,vs(first_elem:last_elem),&
|
|
|
|
|
|
|
|
& ias(first_elem:last_elem),jas(first_elem:last_elem), &
|
|
|
|
|
|
|
|
& idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2))
|
|
|
|
|
|
|
|
end if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
! Over each row we count the unique values
|
|
|
|
! The row has elements?
|
|
|
|
kaux(j) = 1
|
|
|
|
if (nzl > 0) then
|
|
|
|
do i=first_elem+1,last_elem
|
|
|
|
call psi_msort_up(nzl,jas(first_elem:last_elem), &
|
|
|
|
if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then
|
|
|
|
& ixt,iret)
|
|
|
|
cycle
|
|
|
|
if (iret == 0) then
|
|
|
|
|
|
|
|
call psb_ip_reord(nzl,vs(first_elem:last_elem),&
|
|
|
|
|
|
|
|
& ias(first_elem:last_elem),jas(first_elem:last_elem), &
|
|
|
|
|
|
|
|
& ixt)
|
|
|
|
end if
|
|
|
|
end if
|
|
|
|
kaux(j) = kaux(j) + 1
|
|
|
|
|
|
|
|
end do
|
|
|
|
! Over each row we count the unique values
|
|
|
|
end if
|
|
|
|
kaux(j) = 1
|
|
|
|
end do
|
|
|
|
do i=first_elem+1,last_elem
|
|
|
|
|
|
|
|
if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then
|
|
|
|
|
|
|
|
cycle
|
|
|
|
|
|
|
|
end if
|
|
|
|
|
|
|
|
kaux(j) = kaux(j) + 1
|
|
|
|
|
|
|
|
end do
|
|
|
|
|
|
|
|
end if
|
|
|
|
|
|
|
|
end do
|
|
|
|
|
|
|
|
!$omp end do
|
|
|
|
|
|
|
|
deallocate(ixt)
|
|
|
|
|
|
|
|
end block
|
|
|
|
|
|
|
|
|
|
|
|
! --------------------------------------------------
|
|
|
|
! --------------------------------------------------
|
|
|
|
! ---------------- kaux composition ----------------
|
|
|
|
! ---------------- kaux composition ----------------
|
|
|
@ -4553,7 +4564,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
|
|
|
|
|
|
|
|
|
|
|
|
nzout = kaux(nr+1) - 1
|
|
|
|
nzout = kaux(nr+1) - 1
|
|
|
|
|
|
|
|
|
|
|
|
deallocate(sum,kaux,idxaux,stat=info)
|
|
|
|
deallocate(kaux,idxaux,stat=info)
|
|
|
|
#else
|
|
|
|
#else
|
|
|
|
!if (.not.srt_inp) then
|
|
|
|
!if (.not.srt_inp) then
|
|
|
|
|
|
|
|
|
|
|
|