Fix COO fix_coo_inner_rowmajor not to overflow on integers.

repack-csga
sfilippone 8 months ago
parent a613e963db
commit ecccb13914

@ -4268,7 +4268,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!locals !locals
integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:)
complex(psb_spk_), allocatable :: vs(:) complex(psb_spk_), allocatable :: vs(:)
integer(psb_ipk_) :: nza, nzl,iret integer(psb_ipk_) :: nza, nzl,iret, maxnr
integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2 integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name = 'psb_fixcoo' character(len=20) :: name = 'psb_fixcoo'
@ -4277,7 +4277,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
#if defined(OPENMP) #if defined(OPENMP)
integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread
integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads
integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) integer(psb_ipk_), allocatable :: kaux(:),idxaux(:)
#endif #endif
info = psb_success_ info = psb_success_
@ -4302,10 +4302,13 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
end if end if
if (use_buffers) then if (use_buffers) then
iaux(:) = 0
#if defined(OPENMP) #if defined(OPENMP)
!$omp workshare
iaux(:) = 0
!$omp end workshare
maxnr = 0
!$OMP PARALLEL DO default(none) schedule(STATIC) & !$OMP PARALLEL DO default(none) schedule(STATIC) &
!$OMP shared(nzin,ia,nr,iaux) & !$OMP shared(nzin,ia,nr,iaux,maxnr) &
!$OMP private(i) & !$OMP private(i) &
!$OMP reduction(.and.:use_buffers) !$OMP reduction(.and.:use_buffers)
do i=1,nzin do i=1,nzin
@ -4319,7 +4322,9 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
end if end if
end do end do
!$OMP END PARALLEL DO !$OMP END PARALLEL DO
maxnr = maxval(iaux(1:nr))
#else #else
iaux(:) = 0
!srt_inp = .true. !srt_inp = .true.
do i=1,nzin do i=1,nzin
if ((ia(i) < 1).or.(ia(i) > nr)) then if ((ia(i) < 1).or.(ia(i) > nr)) then
@ -4342,22 +4347,21 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
if (use_buffers) then if (use_buffers) then
#if defined(OPENMP) #if defined(OPENMP)
maxthreads = omp_get_max_threads() maxthreads = omp_get_max_threads()
allocate(kaux(nr+1),idxaux(MAX((nc+2)*maxthreads,nr)),sum(maxthreads+1),stat=info) allocate(kaux(nr+1),idxaux(MAX(nc+2,nr+2)),stat=info)
if (info /= psb_success_) then if (info /= psb_success_) then
info = psb_err_alloc_dealloc_ info = psb_err_alloc_dealloc_
call psb_errpush(info,name) call psb_errpush(info,name)
goto 9999 goto 9999
end if end if
!$omp workshare
kaux(:) = 0 kaux(:) = 0
sum(:) = 0 !$omp end workshare
sum(1) = 1
err = 0 err = 0
! Here, starting from 'iaux', we apply a fixing in order to obtain the starting ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting
! index for each row. We do the same on 'kaux' ! index for each row. We do the same on 'kaux'
!$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!$OMP shared(t0,t1,idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) & !$OMP shared(maxnr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) &
!$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, & !$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, &
!$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info) !$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info)
@ -4382,36 +4386,40 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!write(0,*) 'fix_coo_inner: trying with exscan' !write(0,*) 'fix_coo_inner: trying with exscan'
call psi_exscan(nr+1,iaux,info,shift=ione) call psi_exscan(nr+1,iaux,info,shift=ione)
!$OMP BARRIER !$OMP BARRIER
!$OMP SINGLE
!t0 = omp_get_wtime()
!$OMP END SINGLE
! ------------------ Sorting and buffers ------------------- ! ------------------ Sorting and buffers -------------------
! Let's use an auxiliary buffer, 'idxaux', to get indices leaving ! Let's use an auxiliary buffer, 'idxaux', to get indices leaving
! unmodified 'iaux' ! unmodified 'iaux'
do j=idxstart,idxend
!$omp do private(j)
do j=1,nr+1
idxaux(j) = iaux(j) idxaux(j) = iaux(j)
end do end do
!$omp end do
! Here we sort data inside the auxiliary buffers ! Here we sort data inside the auxiliary buffers
!$omp do private(act_row,i,i1)
do i=1,nzin do i=1,nzin
act_row = ia(i) act_row = ia(i)
if ((act_row >= idxstart) .and. (act_row <= idxend)) then !$omp atomic capture
ias(idxaux(act_row)) = ia(i) i1 =idxaux(act_row)
jas(idxaux(act_row)) = ja(i)
vs(idxaux(act_row)) = val(i)
idxaux(act_row) = idxaux(act_row) + 1 idxaux(act_row) = idxaux(act_row) + 1
end if !$omp end atomic
ias(i1) = ia(i)
jas(i1) = ja(i)
vs(i1) = val(i)
end do end do
!$omp end do
!$OMP BARRIER
!$OMP SINGLE
!t1 = omp_get_wtime()
!write(0,*) ithread,'Srt&Cpy :',t1-t0
!$OMP END SINGLE
! Let's sort column indices and values. After that we will store ! Let's sort column indices and values. After that we will store
! the number of unique values in 'kaux' ! the number of unique values in 'kaux'
do j=idxstart,idxend block
integer(psb_ipk_), allocatable :: ixt(:)
allocate(ixt(maxnr+2))
!$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256)
do j=1,nr
first_elem = iaux(j) first_elem = iaux(j)
last_elem = iaux(j+1) - 1 last_elem = iaux(j+1) - 1
nzl = last_elem - first_elem + 1 nzl = last_elem - first_elem + 1
@ -4419,11 +4427,11 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
! The row has elements? ! The row has elements?
if (nzl > 0) then if (nzl > 0) then
call psi_msort_up(nzl,jas(first_elem:last_elem), & call psi_msort_up(nzl,jas(first_elem:last_elem), &
& idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) & ixt,iret)
if (iret == 0) then if (iret == 0) then
call psb_ip_reord(nzl,vs(first_elem:last_elem),& call psb_ip_reord(nzl,vs(first_elem:last_elem),&
& ias(first_elem:last_elem),jas(first_elem:last_elem), & & ias(first_elem:last_elem),jas(first_elem:last_elem), &
& idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) & ixt)
end if end if
! Over each row we count the unique values ! Over each row we count the unique values
@ -4436,6 +4444,9 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
end do end do
end if end if
end do end do
!$omp end do
deallocate(ixt)
end block
! -------------------------------------------------- ! --------------------------------------------------
! ---------------- kaux composition ---------------- ! ---------------- kaux composition ----------------
@ -4553,7 +4564,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
nzout = kaux(nr+1) - 1 nzout = kaux(nr+1) - 1
deallocate(sum,kaux,idxaux,stat=info) deallocate(kaux,idxaux,stat=info)
#else #else
!if (.not.srt_inp) then !if (.not.srt_inp) then

@ -4268,7 +4268,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!locals !locals
integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:)
real(psb_dpk_), allocatable :: vs(:) real(psb_dpk_), allocatable :: vs(:)
integer(psb_ipk_) :: nza, nzl,iret integer(psb_ipk_) :: nza, nzl,iret, maxnr
integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2 integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name = 'psb_fixcoo' character(len=20) :: name = 'psb_fixcoo'
@ -4277,7 +4277,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
#if defined(OPENMP) #if defined(OPENMP)
integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread
integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads
integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) integer(psb_ipk_), allocatable :: kaux(:),idxaux(:)
#endif #endif
info = psb_success_ info = psb_success_
@ -4302,10 +4302,13 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
end if end if
if (use_buffers) then if (use_buffers) then
iaux(:) = 0
#if defined(OPENMP) #if defined(OPENMP)
!$omp workshare
iaux(:) = 0
!$omp end workshare
maxnr = 0
!$OMP PARALLEL DO default(none) schedule(STATIC) & !$OMP PARALLEL DO default(none) schedule(STATIC) &
!$OMP shared(nzin,ia,nr,iaux) & !$OMP shared(nzin,ia,nr,iaux,maxnr) &
!$OMP private(i) & !$OMP private(i) &
!$OMP reduction(.and.:use_buffers) !$OMP reduction(.and.:use_buffers)
do i=1,nzin do i=1,nzin
@ -4319,7 +4322,9 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
end if end if
end do end do
!$OMP END PARALLEL DO !$OMP END PARALLEL DO
maxnr = maxval(iaux(1:nr))
#else #else
iaux(:) = 0
!srt_inp = .true. !srt_inp = .true.
do i=1,nzin do i=1,nzin
if ((ia(i) < 1).or.(ia(i) > nr)) then if ((ia(i) < 1).or.(ia(i) > nr)) then
@ -4342,22 +4347,21 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
if (use_buffers) then if (use_buffers) then
#if defined(OPENMP) #if defined(OPENMP)
maxthreads = omp_get_max_threads() maxthreads = omp_get_max_threads()
allocate(kaux(nr+1),idxaux(MAX((nc+2)*maxthreads,nr)),sum(maxthreads+1),stat=info) allocate(kaux(nr+1),idxaux(MAX(nc+2,nr+2)),stat=info)
if (info /= psb_success_) then if (info /= psb_success_) then
info = psb_err_alloc_dealloc_ info = psb_err_alloc_dealloc_
call psb_errpush(info,name) call psb_errpush(info,name)
goto 9999 goto 9999
end if end if
!$omp workshare
kaux(:) = 0 kaux(:) = 0
sum(:) = 0 !$omp end workshare
sum(1) = 1
err = 0 err = 0
! Here, starting from 'iaux', we apply a fixing in order to obtain the starting ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting
! index for each row. We do the same on 'kaux' ! index for each row. We do the same on 'kaux'
!$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!$OMP shared(t0,t1,idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) & !$OMP shared(maxnr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) &
!$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, & !$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, &
!$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info) !$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info)
@ -4382,36 +4386,40 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!write(0,*) 'fix_coo_inner: trying with exscan' !write(0,*) 'fix_coo_inner: trying with exscan'
call psi_exscan(nr+1,iaux,info,shift=ione) call psi_exscan(nr+1,iaux,info,shift=ione)
!$OMP BARRIER !$OMP BARRIER
!$OMP SINGLE
!t0 = omp_get_wtime()
!$OMP END SINGLE
! ------------------ Sorting and buffers ------------------- ! ------------------ Sorting and buffers -------------------
! Let's use an auxiliary buffer, 'idxaux', to get indices leaving ! Let's use an auxiliary buffer, 'idxaux', to get indices leaving
! unmodified 'iaux' ! unmodified 'iaux'
do j=idxstart,idxend
!$omp do private(j)
do j=1,nr+1
idxaux(j) = iaux(j) idxaux(j) = iaux(j)
end do end do
!$omp end do
! Here we sort data inside the auxiliary buffers ! Here we sort data inside the auxiliary buffers
!$omp do private(act_row,i,i1)
do i=1,nzin do i=1,nzin
act_row = ia(i) act_row = ia(i)
if ((act_row >= idxstart) .and. (act_row <= idxend)) then !$omp atomic capture
ias(idxaux(act_row)) = ia(i) i1 =idxaux(act_row)
jas(idxaux(act_row)) = ja(i)
vs(idxaux(act_row)) = val(i)
idxaux(act_row) = idxaux(act_row) + 1 idxaux(act_row) = idxaux(act_row) + 1
end if !$omp end atomic
ias(i1) = ia(i)
jas(i1) = ja(i)
vs(i1) = val(i)
end do end do
!$omp end do
!$OMP BARRIER
!$OMP SINGLE
!t1 = omp_get_wtime()
!write(0,*) ithread,'Srt&Cpy :',t1-t0
!$OMP END SINGLE
! Let's sort column indices and values. After that we will store ! Let's sort column indices and values. After that we will store
! the number of unique values in 'kaux' ! the number of unique values in 'kaux'
do j=idxstart,idxend block
integer(psb_ipk_), allocatable :: ixt(:)
allocate(ixt(maxnr+2))
!$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256)
do j=1,nr
first_elem = iaux(j) first_elem = iaux(j)
last_elem = iaux(j+1) - 1 last_elem = iaux(j+1) - 1
nzl = last_elem - first_elem + 1 nzl = last_elem - first_elem + 1
@ -4419,11 +4427,11 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
! The row has elements? ! The row has elements?
if (nzl > 0) then if (nzl > 0) then
call psi_msort_up(nzl,jas(first_elem:last_elem), & call psi_msort_up(nzl,jas(first_elem:last_elem), &
& idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) & ixt,iret)
if (iret == 0) then if (iret == 0) then
call psb_ip_reord(nzl,vs(first_elem:last_elem),& call psb_ip_reord(nzl,vs(first_elem:last_elem),&
& ias(first_elem:last_elem),jas(first_elem:last_elem), & & ias(first_elem:last_elem),jas(first_elem:last_elem), &
& idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) & ixt)
end if end if
! Over each row we count the unique values ! Over each row we count the unique values
@ -4436,6 +4444,9 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
end do end do
end if end if
end do end do
!$omp end do
deallocate(ixt)
end block
! -------------------------------------------------- ! --------------------------------------------------
! ---------------- kaux composition ---------------- ! ---------------- kaux composition ----------------
@ -4553,7 +4564,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
nzout = kaux(nr+1) - 1 nzout = kaux(nr+1) - 1
deallocate(sum,kaux,idxaux,stat=info) deallocate(kaux,idxaux,stat=info)
#else #else
!if (.not.srt_inp) then !if (.not.srt_inp) then

@ -4268,7 +4268,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!locals !locals
integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:)
real(psb_spk_), allocatable :: vs(:) real(psb_spk_), allocatable :: vs(:)
integer(psb_ipk_) :: nza, nzl,iret integer(psb_ipk_) :: nza, nzl,iret, maxnr
integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2 integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name = 'psb_fixcoo' character(len=20) :: name = 'psb_fixcoo'
@ -4277,7 +4277,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
#if defined(OPENMP) #if defined(OPENMP)
integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread
integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads
integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) integer(psb_ipk_), allocatable :: kaux(:),idxaux(:)
#endif #endif
info = psb_success_ info = psb_success_
@ -4302,10 +4302,13 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
end if end if
if (use_buffers) then if (use_buffers) then
iaux(:) = 0
#if defined(OPENMP) #if defined(OPENMP)
!$omp workshare
iaux(:) = 0
!$omp end workshare
maxnr = 0
!$OMP PARALLEL DO default(none) schedule(STATIC) & !$OMP PARALLEL DO default(none) schedule(STATIC) &
!$OMP shared(nzin,ia,nr,iaux) & !$OMP shared(nzin,ia,nr,iaux,maxnr) &
!$OMP private(i) & !$OMP private(i) &
!$OMP reduction(.and.:use_buffers) !$OMP reduction(.and.:use_buffers)
do i=1,nzin do i=1,nzin
@ -4319,7 +4322,9 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
end if end if
end do end do
!$OMP END PARALLEL DO !$OMP END PARALLEL DO
maxnr = maxval(iaux(1:nr))
#else #else
iaux(:) = 0
!srt_inp = .true. !srt_inp = .true.
do i=1,nzin do i=1,nzin
if ((ia(i) < 1).or.(ia(i) > nr)) then if ((ia(i) < 1).or.(ia(i) > nr)) then
@ -4342,22 +4347,21 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
if (use_buffers) then if (use_buffers) then
#if defined(OPENMP) #if defined(OPENMP)
maxthreads = omp_get_max_threads() maxthreads = omp_get_max_threads()
allocate(kaux(nr+1),idxaux(MAX((nc+2)*maxthreads,nr)),sum(maxthreads+1),stat=info) allocate(kaux(nr+1),idxaux(MAX(nc+2,nr+2)),stat=info)
if (info /= psb_success_) then if (info /= psb_success_) then
info = psb_err_alloc_dealloc_ info = psb_err_alloc_dealloc_
call psb_errpush(info,name) call psb_errpush(info,name)
goto 9999 goto 9999
end if end if
!$omp workshare
kaux(:) = 0 kaux(:) = 0
sum(:) = 0 !$omp end workshare
sum(1) = 1
err = 0 err = 0
! Here, starting from 'iaux', we apply a fixing in order to obtain the starting ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting
! index for each row. We do the same on 'kaux' ! index for each row. We do the same on 'kaux'
!$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!$OMP shared(t0,t1,idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) & !$OMP shared(maxnr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) &
!$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, & !$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, &
!$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info) !$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info)
@ -4382,36 +4386,40 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!write(0,*) 'fix_coo_inner: trying with exscan' !write(0,*) 'fix_coo_inner: trying with exscan'
call psi_exscan(nr+1,iaux,info,shift=ione) call psi_exscan(nr+1,iaux,info,shift=ione)
!$OMP BARRIER !$OMP BARRIER
!$OMP SINGLE
!t0 = omp_get_wtime()
!$OMP END SINGLE
! ------------------ Sorting and buffers ------------------- ! ------------------ Sorting and buffers -------------------
! Let's use an auxiliary buffer, 'idxaux', to get indices leaving ! Let's use an auxiliary buffer, 'idxaux', to get indices leaving
! unmodified 'iaux' ! unmodified 'iaux'
do j=idxstart,idxend
!$omp do private(j)
do j=1,nr+1
idxaux(j) = iaux(j) idxaux(j) = iaux(j)
end do end do
!$omp end do
! Here we sort data inside the auxiliary buffers ! Here we sort data inside the auxiliary buffers
!$omp do private(act_row,i,i1)
do i=1,nzin do i=1,nzin
act_row = ia(i) act_row = ia(i)
if ((act_row >= idxstart) .and. (act_row <= idxend)) then !$omp atomic capture
ias(idxaux(act_row)) = ia(i) i1 =idxaux(act_row)
jas(idxaux(act_row)) = ja(i)
vs(idxaux(act_row)) = val(i)
idxaux(act_row) = idxaux(act_row) + 1 idxaux(act_row) = idxaux(act_row) + 1
end if !$omp end atomic
ias(i1) = ia(i)
jas(i1) = ja(i)
vs(i1) = val(i)
end do end do
!$omp end do
!$OMP BARRIER
!$OMP SINGLE
!t1 = omp_get_wtime()
!write(0,*) ithread,'Srt&Cpy :',t1-t0
!$OMP END SINGLE
! Let's sort column indices and values. After that we will store ! Let's sort column indices and values. After that we will store
! the number of unique values in 'kaux' ! the number of unique values in 'kaux'
do j=idxstart,idxend block
integer(psb_ipk_), allocatable :: ixt(:)
allocate(ixt(maxnr+2))
!$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256)
do j=1,nr
first_elem = iaux(j) first_elem = iaux(j)
last_elem = iaux(j+1) - 1 last_elem = iaux(j+1) - 1
nzl = last_elem - first_elem + 1 nzl = last_elem - first_elem + 1
@ -4419,11 +4427,11 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
! The row has elements? ! The row has elements?
if (nzl > 0) then if (nzl > 0) then
call psi_msort_up(nzl,jas(first_elem:last_elem), & call psi_msort_up(nzl,jas(first_elem:last_elem), &
& idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) & ixt,iret)
if (iret == 0) then if (iret == 0) then
call psb_ip_reord(nzl,vs(first_elem:last_elem),& call psb_ip_reord(nzl,vs(first_elem:last_elem),&
& ias(first_elem:last_elem),jas(first_elem:last_elem), & & ias(first_elem:last_elem),jas(first_elem:last_elem), &
& idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) & ixt)
end if end if
! Over each row we count the unique values ! Over each row we count the unique values
@ -4436,6 +4444,9 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
end do end do
end if end if
end do end do
!$omp end do
deallocate(ixt)
end block
! -------------------------------------------------- ! --------------------------------------------------
! ---------------- kaux composition ---------------- ! ---------------- kaux composition ----------------
@ -4553,7 +4564,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
nzout = kaux(nr+1) - 1 nzout = kaux(nr+1) - 1
deallocate(sum,kaux,idxaux,stat=info) deallocate(kaux,idxaux,stat=info)
#else #else
!if (.not.srt_inp) then !if (.not.srt_inp) then

@ -4268,7 +4268,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!locals !locals
integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:)
complex(psb_dpk_), allocatable :: vs(:) complex(psb_dpk_), allocatable :: vs(:)
integer(psb_ipk_) :: nza, nzl,iret integer(psb_ipk_) :: nza, nzl,iret, maxnr
integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2 integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name = 'psb_fixcoo' character(len=20) :: name = 'psb_fixcoo'
@ -4277,7 +4277,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
#if defined(OPENMP) #if defined(OPENMP)
integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread
integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads
integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) integer(psb_ipk_), allocatable :: kaux(:),idxaux(:)
#endif #endif
info = psb_success_ info = psb_success_
@ -4302,10 +4302,13 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
end if end if
if (use_buffers) then if (use_buffers) then
iaux(:) = 0
#if defined(OPENMP) #if defined(OPENMP)
!$omp workshare
iaux(:) = 0
!$omp end workshare
maxnr = 0
!$OMP PARALLEL DO default(none) schedule(STATIC) & !$OMP PARALLEL DO default(none) schedule(STATIC) &
!$OMP shared(nzin,ia,nr,iaux) & !$OMP shared(nzin,ia,nr,iaux,maxnr) &
!$OMP private(i) & !$OMP private(i) &
!$OMP reduction(.and.:use_buffers) !$OMP reduction(.and.:use_buffers)
do i=1,nzin do i=1,nzin
@ -4319,7 +4322,9 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
end if end if
end do end do
!$OMP END PARALLEL DO !$OMP END PARALLEL DO
maxnr = maxval(iaux(1:nr))
#else #else
iaux(:) = 0
!srt_inp = .true. !srt_inp = .true.
do i=1,nzin do i=1,nzin
if ((ia(i) < 1).or.(ia(i) > nr)) then if ((ia(i) < 1).or.(ia(i) > nr)) then
@ -4342,22 +4347,21 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
if (use_buffers) then if (use_buffers) then
#if defined(OPENMP) #if defined(OPENMP)
maxthreads = omp_get_max_threads() maxthreads = omp_get_max_threads()
allocate(kaux(nr+1),idxaux(MAX((nc+2)*maxthreads,nr)),sum(maxthreads+1),stat=info) allocate(kaux(nr+1),idxaux(MAX(nc+2,nr+2)),stat=info)
if (info /= psb_success_) then if (info /= psb_success_) then
info = psb_err_alloc_dealloc_ info = psb_err_alloc_dealloc_
call psb_errpush(info,name) call psb_errpush(info,name)
goto 9999 goto 9999
end if end if
!$omp workshare
kaux(:) = 0 kaux(:) = 0
sum(:) = 0 !$omp end workshare
sum(1) = 1
err = 0 err = 0
! Here, starting from 'iaux', we apply a fixing in order to obtain the starting ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting
! index for each row. We do the same on 'kaux' ! index for each row. We do the same on 'kaux'
!$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!$OMP shared(t0,t1,idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) & !$OMP shared(maxnr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) &
!$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, & !$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, &
!$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info) !$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info)
@ -4382,36 +4386,40 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!write(0,*) 'fix_coo_inner: trying with exscan' !write(0,*) 'fix_coo_inner: trying with exscan'
call psi_exscan(nr+1,iaux,info,shift=ione) call psi_exscan(nr+1,iaux,info,shift=ione)
!$OMP BARRIER !$OMP BARRIER
!$OMP SINGLE
!t0 = omp_get_wtime()
!$OMP END SINGLE
! ------------------ Sorting and buffers ------------------- ! ------------------ Sorting and buffers -------------------
! Let's use an auxiliary buffer, 'idxaux', to get indices leaving ! Let's use an auxiliary buffer, 'idxaux', to get indices leaving
! unmodified 'iaux' ! unmodified 'iaux'
do j=idxstart,idxend
!$omp do private(j)
do j=1,nr+1
idxaux(j) = iaux(j) idxaux(j) = iaux(j)
end do end do
!$omp end do
! Here we sort data inside the auxiliary buffers ! Here we sort data inside the auxiliary buffers
!$omp do private(act_row,i,i1)
do i=1,nzin do i=1,nzin
act_row = ia(i) act_row = ia(i)
if ((act_row >= idxstart) .and. (act_row <= idxend)) then !$omp atomic capture
ias(idxaux(act_row)) = ia(i) i1 =idxaux(act_row)
jas(idxaux(act_row)) = ja(i)
vs(idxaux(act_row)) = val(i)
idxaux(act_row) = idxaux(act_row) + 1 idxaux(act_row) = idxaux(act_row) + 1
end if !$omp end atomic
ias(i1) = ia(i)
jas(i1) = ja(i)
vs(i1) = val(i)
end do end do
!$omp end do
!$OMP BARRIER
!$OMP SINGLE
!t1 = omp_get_wtime()
!write(0,*) ithread,'Srt&Cpy :',t1-t0
!$OMP END SINGLE
! Let's sort column indices and values. After that we will store ! Let's sort column indices and values. After that we will store
! the number of unique values in 'kaux' ! the number of unique values in 'kaux'
do j=idxstart,idxend block
integer(psb_ipk_), allocatable :: ixt(:)
allocate(ixt(maxnr+2))
!$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256)
do j=1,nr
first_elem = iaux(j) first_elem = iaux(j)
last_elem = iaux(j+1) - 1 last_elem = iaux(j+1) - 1
nzl = last_elem - first_elem + 1 nzl = last_elem - first_elem + 1
@ -4419,11 +4427,11 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
! The row has elements? ! The row has elements?
if (nzl > 0) then if (nzl > 0) then
call psi_msort_up(nzl,jas(first_elem:last_elem), & call psi_msort_up(nzl,jas(first_elem:last_elem), &
& idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) & ixt,iret)
if (iret == 0) then if (iret == 0) then
call psb_ip_reord(nzl,vs(first_elem:last_elem),& call psb_ip_reord(nzl,vs(first_elem:last_elem),&
& ias(first_elem:last_elem),jas(first_elem:last_elem), & & ias(first_elem:last_elem),jas(first_elem:last_elem), &
& idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) & ixt)
end if end if
! Over each row we count the unique values ! Over each row we count the unique values
@ -4436,6 +4444,9 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
end do end do
end if end if
end do end do
!$omp end do
deallocate(ixt)
end block
! -------------------------------------------------- ! --------------------------------------------------
! ---------------- kaux composition ---------------- ! ---------------- kaux composition ----------------
@ -4553,7 +4564,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
nzout = kaux(nr+1) - 1 nzout = kaux(nr+1) - 1
deallocate(sum,kaux,idxaux,stat=info) deallocate(kaux,idxaux,stat=info)
#else #else
!if (.not.srt_inp) then !if (.not.srt_inp) then

Loading…
Cancel
Save