Fixed silly bug in EXSCAN and usage in CSR_IMPL

omp-threadsafe
sfilippone 3 years ago
parent f3efea0a89
commit 74a8217520

@ -2931,7 +2931,7 @@ subroutine psb_c_cp_csr_from_coo(a,b,info)
do k=1,nza
i = itemp(k)
!$OMP ATOMIC UPDATE
a%irp(i+1) = a%irp(i+1) + 1
a%irp(i) = a%irp(i) + 1
!$OMP END ATOMIC
end do
!$OMP END DO
@ -3099,7 +3099,7 @@ subroutine psb_c_mv_csr_from_coo(a,b,info)
do k=1,nza
i = itemp(k)
!$OMP ATOMIC UPDATE
a%irp(i+1) = a%irp(i+1) + 1
a%irp(i) = a%irp(i) + 1
!$OMP END ATOMIC
end do
!$OMP END DO

@ -2931,7 +2931,7 @@ subroutine psb_d_cp_csr_from_coo(a,b,info)
do k=1,nza
i = itemp(k)
!$OMP ATOMIC UPDATE
a%irp(i+1) = a%irp(i+1) + 1
a%irp(i) = a%irp(i) + 1
!$OMP END ATOMIC
end do
!$OMP END DO
@ -3099,7 +3099,7 @@ subroutine psb_d_mv_csr_from_coo(a,b,info)
do k=1,nza
i = itemp(k)
!$OMP ATOMIC UPDATE
a%irp(i+1) = a%irp(i+1) + 1
a%irp(i) = a%irp(i) + 1
!$OMP END ATOMIC
end do
!$OMP END DO

@ -2931,7 +2931,7 @@ subroutine psb_s_cp_csr_from_coo(a,b,info)
do k=1,nza
i = itemp(k)
!$OMP ATOMIC UPDATE
a%irp(i+1) = a%irp(i+1) + 1
a%irp(i) = a%irp(i) + 1
!$OMP END ATOMIC
end do
!$OMP END DO
@ -3099,7 +3099,7 @@ subroutine psb_s_mv_csr_from_coo(a,b,info)
do k=1,nza
i = itemp(k)
!$OMP ATOMIC UPDATE
a%irp(i+1) = a%irp(i+1) + 1
a%irp(i) = a%irp(i) + 1
!$OMP END ATOMIC
end do
!$OMP END DO

@ -2931,7 +2931,7 @@ subroutine psb_z_cp_csr_from_coo(a,b,info)
do k=1,nza
i = itemp(k)
!$OMP ATOMIC UPDATE
a%irp(i+1) = a%irp(i+1) + 1
a%irp(i) = a%irp(i) + 1
!$OMP END ATOMIC
end do
!$OMP END DO
@ -3099,7 +3099,7 @@ subroutine psb_z_mv_csr_from_coo(a,b,info)
do k=1,nza
i = itemp(k)
!$OMP ATOMIC UPDATE
a%irp(i+1) = a%irp(i+1) + 1
a%irp(i) = a%irp(i) + 1
!$OMP END ATOMIC
end do
!$OMP END DO

@ -43,6 +43,7 @@ subroutine psi_c_exscanv(n,x,info,shift)
complex(psb_spk_), intent(in), optional :: shift
complex(psb_spk_) :: shift_, tp, ts
integer(psb_ipk_) :: i
logical is_nested, is_parallel
if (present(shift)) then
@ -76,9 +77,11 @@ contains
! a pointer. The semantics of COPYPRIVATE is that the POINTER is copied
! so effectively we are recovering a SHARED SUMB which is what
! we need in this case. If it was an ALLOCATABLE, then it would be the contents
! that would get copied, and the SHARED effect would no longer be there.
integer(psb_ipk_) :: i,ithread,nthreads,first_idx,last_idx,wrk
complex(psb_spk_), pointer :: sumb(:)
! that would get copied, and the SHARED effect would no longer be there.
! Simple parallel version of EXSCAN
integer(psb_ipk_) :: i,ithread,nthreads,idxstart,idxend,wrk
complex(psb_spk_), pointer :: sumb(:)
complex(psb_spk_) :: tp, ts
nthreads = omp_get_num_threads()
ithread = omp_get_thread_num()
@ -90,19 +93,21 @@ contains
wrk = (n)/nthreads
if (ithread < MOD((n),nthreads)) then
wrk = wrk + 1
first_idx = ithread*wrk + 1
idxstart = ithread*wrk + 1
else
first_idx = ithread*wrk + MOD((n),nthreads) + 1
idxstart = ithread*wrk + MOD((n),nthreads) + 1
end if
last_idx = min(first_idx + wrk - 1,n )
if (first_idx<=last_idx) then
sumb(ithread+2) = sumb(ithread+2) + x(first_idx)
do i=first_idx+1,last_idx
sumb(ithread+2) = sumb(ithread+2) + x(i)
x(i) = x(i)+x(i-1)
idxend = min(idxstart + wrk - 1,n )
tp = czero
if (idxstart<=idxend) then
do i=idxstart,idxend
ts = x(i)
x(i) = tp
tp = tp + ts
end do
end if
sumb(ithread+2) = tp
!$OMP BARRIER
!$OMP SINGLE
@ -115,11 +120,10 @@ contains
!$OMP DO SCHEDULE(STATIC)
do i=1,n
x(i) = sumb(ithread+1) + x(i) + shift_
x(i) = x(i) + sumb(ithread+1) + shift_
end do
!$OMP END DO
!$OMP SINGLE
x(1) = shift_
deallocate(sumb)
!$OMP END SINGLE
end subroutine inner_c_exscan

@ -43,6 +43,7 @@ subroutine psi_d_exscanv(n,x,info,shift)
real(psb_dpk_), intent(in), optional :: shift
real(psb_dpk_) :: shift_, tp, ts
integer(psb_ipk_) :: i
logical is_nested, is_parallel
if (present(shift)) then
@ -76,9 +77,11 @@ contains
! a pointer. The semantics of COPYPRIVATE is that the POINTER is copied
! so effectively we are recovering a SHARED SUMB which is what
! we need in this case. If it was an ALLOCATABLE, then it would be the contents
! that would get copied, and the SHARED effect would no longer be there.
integer(psb_ipk_) :: i,ithread,nthreads,first_idx,last_idx,wrk
real(psb_dpk_), pointer :: sumb(:)
! that would get copied, and the SHARED effect would no longer be there.
! Simple parallel version of EXSCAN
integer(psb_ipk_) :: i,ithread,nthreads,idxstart,idxend,wrk
real(psb_dpk_), pointer :: sumb(:)
real(psb_dpk_) :: tp, ts
nthreads = omp_get_num_threads()
ithread = omp_get_thread_num()
@ -90,19 +93,21 @@ contains
wrk = (n)/nthreads
if (ithread < MOD((n),nthreads)) then
wrk = wrk + 1
first_idx = ithread*wrk + 1
idxstart = ithread*wrk + 1
else
first_idx = ithread*wrk + MOD((n),nthreads) + 1
idxstart = ithread*wrk + MOD((n),nthreads) + 1
end if
last_idx = min(first_idx + wrk - 1,n )
if (first_idx<=last_idx) then
sumb(ithread+2) = sumb(ithread+2) + x(first_idx)
do i=first_idx+1,last_idx
sumb(ithread+2) = sumb(ithread+2) + x(i)
x(i) = x(i)+x(i-1)
idxend = min(idxstart + wrk - 1,n )
tp = dzero
if (idxstart<=idxend) then
do i=idxstart,idxend
ts = x(i)
x(i) = tp
tp = tp + ts
end do
end if
sumb(ithread+2) = tp
!$OMP BARRIER
!$OMP SINGLE
@ -115,11 +120,10 @@ contains
!$OMP DO SCHEDULE(STATIC)
do i=1,n
x(i) = sumb(ithread+1) + x(i) + shift_
x(i) = x(i) + sumb(ithread+1) + shift_
end do
!$OMP END DO
!$OMP SINGLE
x(1) = shift_
deallocate(sumb)
!$OMP END SINGLE
end subroutine inner_d_exscan

@ -43,6 +43,7 @@ subroutine psi_e_exscanv(n,x,info,shift)
integer(psb_epk_), intent(in), optional :: shift
integer(psb_epk_) :: shift_, tp, ts
integer(psb_ipk_) :: i
logical is_nested, is_parallel
if (present(shift)) then
@ -76,9 +77,11 @@ contains
! a pointer. The semantics of COPYPRIVATE is that the POINTER is copied
! so effectively we are recovering a SHARED SUMB which is what
! we need in this case. If it was an ALLOCATABLE, then it would be the contents
! that would get copied, and the SHARED effect would no longer be there.
integer(psb_ipk_) :: i,ithread,nthreads,first_idx,last_idx,wrk
integer(psb_epk_), pointer :: sumb(:)
! that would get copied, and the SHARED effect would no longer be there.
! Simple parallel version of EXSCAN
integer(psb_ipk_) :: i,ithread,nthreads,idxstart,idxend,wrk
integer(psb_epk_), pointer :: sumb(:)
integer(psb_epk_) :: tp, ts
nthreads = omp_get_num_threads()
ithread = omp_get_thread_num()
@ -90,19 +93,21 @@ contains
wrk = (n)/nthreads
if (ithread < MOD((n),nthreads)) then
wrk = wrk + 1
first_idx = ithread*wrk + 1
idxstart = ithread*wrk + 1
else
first_idx = ithread*wrk + MOD((n),nthreads) + 1
idxstart = ithread*wrk + MOD((n),nthreads) + 1
end if
last_idx = min(first_idx + wrk - 1,n )
if (first_idx<=last_idx) then
sumb(ithread+2) = sumb(ithread+2) + x(first_idx)
do i=first_idx+1,last_idx
sumb(ithread+2) = sumb(ithread+2) + x(i)
x(i) = x(i)+x(i-1)
idxend = min(idxstart + wrk - 1,n )
tp = ezero
if (idxstart<=idxend) then
do i=idxstart,idxend
ts = x(i)
x(i) = tp
tp = tp + ts
end do
end if
sumb(ithread+2) = tp
!$OMP BARRIER
!$OMP SINGLE
@ -115,11 +120,10 @@ contains
!$OMP DO SCHEDULE(STATIC)
do i=1,n
x(i) = sumb(ithread+1) + x(i) + shift_
x(i) = x(i) + sumb(ithread+1) + shift_
end do
!$OMP END DO
!$OMP SINGLE
x(1) = shift_
deallocate(sumb)
!$OMP END SINGLE
end subroutine inner_e_exscan

@ -43,6 +43,7 @@ subroutine psi_i2_exscanv(n,x,info,shift)
integer(psb_i2pk_), intent(in), optional :: shift
integer(psb_i2pk_) :: shift_, tp, ts
integer(psb_ipk_) :: i
logical is_nested, is_parallel
if (present(shift)) then
@ -76,9 +77,11 @@ contains
! a pointer. The semantics of COPYPRIVATE is that the POINTER is copied
! so effectively we are recovering a SHARED SUMB which is what
! we need in this case. If it was an ALLOCATABLE, then it would be the contents
! that would get copied, and the SHARED effect would no longer be there.
integer(psb_ipk_) :: i,ithread,nthreads,first_idx,last_idx,wrk
integer(psb_i2pk_), pointer :: sumb(:)
! that would get copied, and the SHARED effect would no longer be there.
! Simple parallel version of EXSCAN
integer(psb_ipk_) :: i,ithread,nthreads,idxstart,idxend,wrk
integer(psb_i2pk_), pointer :: sumb(:)
integer(psb_i2pk_) :: tp, ts
nthreads = omp_get_num_threads()
ithread = omp_get_thread_num()
@ -90,19 +93,21 @@ contains
wrk = (n)/nthreads
if (ithread < MOD((n),nthreads)) then
wrk = wrk + 1
first_idx = ithread*wrk + 1
idxstart = ithread*wrk + 1
else
first_idx = ithread*wrk + MOD((n),nthreads) + 1
idxstart = ithread*wrk + MOD((n),nthreads) + 1
end if
last_idx = min(first_idx + wrk - 1,n )
if (first_idx<=last_idx) then
sumb(ithread+2) = sumb(ithread+2) + x(first_idx)
do i=first_idx+1,last_idx
sumb(ithread+2) = sumb(ithread+2) + x(i)
x(i) = x(i)+x(i-1)
idxend = min(idxstart + wrk - 1,n )
tp = i2zero
if (idxstart<=idxend) then
do i=idxstart,idxend
ts = x(i)
x(i) = tp
tp = tp + ts
end do
end if
sumb(ithread+2) = tp
!$OMP BARRIER
!$OMP SINGLE
@ -115,11 +120,10 @@ contains
!$OMP DO SCHEDULE(STATIC)
do i=1,n
x(i) = sumb(ithread+1) + x(i) + shift_
x(i) = x(i) + sumb(ithread+1) + shift_
end do
!$OMP END DO
!$OMP SINGLE
x(1) = shift_
deallocate(sumb)
!$OMP END SINGLE
end subroutine inner_i2_exscan

@ -43,6 +43,7 @@ subroutine psi_m_exscanv(n,x,info,shift)
integer(psb_mpk_), intent(in), optional :: shift
integer(psb_mpk_) :: shift_, tp, ts
integer(psb_ipk_) :: i
logical is_nested, is_parallel
if (present(shift)) then
@ -76,9 +77,11 @@ contains
! a pointer. The semantics of COPYPRIVATE is that the POINTER is copied
! so effectively we are recovering a SHARED SUMB which is what
! we need in this case. If it was an ALLOCATABLE, then it would be the contents
! that would get copied, and the SHARED effect would no longer be there.
integer(psb_ipk_) :: i,ithread,nthreads,first_idx,last_idx,wrk
integer(psb_mpk_), pointer :: sumb(:)
! that would get copied, and the SHARED effect would no longer be there.
! Simple parallel version of EXSCAN
integer(psb_ipk_) :: i,ithread,nthreads,idxstart,idxend,wrk
integer(psb_mpk_), pointer :: sumb(:)
integer(psb_mpk_) :: tp, ts
nthreads = omp_get_num_threads()
ithread = omp_get_thread_num()
@ -90,19 +93,21 @@ contains
wrk = (n)/nthreads
if (ithread < MOD((n),nthreads)) then
wrk = wrk + 1
first_idx = ithread*wrk + 1
idxstart = ithread*wrk + 1
else
first_idx = ithread*wrk + MOD((n),nthreads) + 1
idxstart = ithread*wrk + MOD((n),nthreads) + 1
end if
last_idx = min(first_idx + wrk - 1,n )
if (first_idx<=last_idx) then
sumb(ithread+2) = sumb(ithread+2) + x(first_idx)
do i=first_idx+1,last_idx
sumb(ithread+2) = sumb(ithread+2) + x(i)
x(i) = x(i)+x(i-1)
idxend = min(idxstart + wrk - 1,n )
tp = mzero
if (idxstart<=idxend) then
do i=idxstart,idxend
ts = x(i)
x(i) = tp
tp = tp + ts
end do
end if
sumb(ithread+2) = tp
!$OMP BARRIER
!$OMP SINGLE
@ -115,11 +120,10 @@ contains
!$OMP DO SCHEDULE(STATIC)
do i=1,n
x(i) = sumb(ithread+1) + x(i) + shift_
x(i) = x(i) + sumb(ithread+1) + shift_
end do
!$OMP END DO
!$OMP SINGLE
x(1) = shift_
deallocate(sumb)
!$OMP END SINGLE
end subroutine inner_m_exscan

@ -43,6 +43,7 @@ subroutine psi_s_exscanv(n,x,info,shift)
real(psb_spk_), intent(in), optional :: shift
real(psb_spk_) :: shift_, tp, ts
integer(psb_ipk_) :: i
logical is_nested, is_parallel
if (present(shift)) then
@ -76,9 +77,11 @@ contains
! a pointer. The semantics of COPYPRIVATE is that the POINTER is copied
! so effectively we are recovering a SHARED SUMB which is what
! we need in this case. If it was an ALLOCATABLE, then it would be the contents
! that would get copied, and the SHARED effect would no longer be there.
integer(psb_ipk_) :: i,ithread,nthreads,first_idx,last_idx,wrk
real(psb_spk_), pointer :: sumb(:)
! that would get copied, and the SHARED effect would no longer be there.
! Simple parallel version of EXSCAN
integer(psb_ipk_) :: i,ithread,nthreads,idxstart,idxend,wrk
real(psb_spk_), pointer :: sumb(:)
real(psb_spk_) :: tp, ts
nthreads = omp_get_num_threads()
ithread = omp_get_thread_num()
@ -90,19 +93,21 @@ contains
wrk = (n)/nthreads
if (ithread < MOD((n),nthreads)) then
wrk = wrk + 1
first_idx = ithread*wrk + 1
idxstart = ithread*wrk + 1
else
first_idx = ithread*wrk + MOD((n),nthreads) + 1
idxstart = ithread*wrk + MOD((n),nthreads) + 1
end if
last_idx = min(first_idx + wrk - 1,n )
if (first_idx<=last_idx) then
sumb(ithread+2) = sumb(ithread+2) + x(first_idx)
do i=first_idx+1,last_idx
sumb(ithread+2) = sumb(ithread+2) + x(i)
x(i) = x(i)+x(i-1)
idxend = min(idxstart + wrk - 1,n )
tp = szero
if (idxstart<=idxend) then
do i=idxstart,idxend
ts = x(i)
x(i) = tp
tp = tp + ts
end do
end if
sumb(ithread+2) = tp
!$OMP BARRIER
!$OMP SINGLE
@ -115,11 +120,10 @@ contains
!$OMP DO SCHEDULE(STATIC)
do i=1,n
x(i) = sumb(ithread+1) + x(i) + shift_
x(i) = x(i) + sumb(ithread+1) + shift_
end do
!$OMP END DO
!$OMP SINGLE
x(1) = shift_
deallocate(sumb)
!$OMP END SINGLE
end subroutine inner_s_exscan

@ -43,6 +43,7 @@ subroutine psi_z_exscanv(n,x,info,shift)
complex(psb_dpk_), intent(in), optional :: shift
complex(psb_dpk_) :: shift_, tp, ts
integer(psb_ipk_) :: i
logical is_nested, is_parallel
if (present(shift)) then
@ -76,9 +77,11 @@ contains
! a pointer. The semantics of COPYPRIVATE is that the POINTER is copied
! so effectively we are recovering a SHARED SUMB which is what
! we need in this case. If it was an ALLOCATABLE, then it would be the contents
! that would get copied, and the SHARED effect would no longer be there.
integer(psb_ipk_) :: i,ithread,nthreads,first_idx,last_idx,wrk
complex(psb_dpk_), pointer :: sumb(:)
! that would get copied, and the SHARED effect would no longer be there.
! Simple parallel version of EXSCAN
integer(psb_ipk_) :: i,ithread,nthreads,idxstart,idxend,wrk
complex(psb_dpk_), pointer :: sumb(:)
complex(psb_dpk_) :: tp, ts
nthreads = omp_get_num_threads()
ithread = omp_get_thread_num()
@ -90,19 +93,21 @@ contains
wrk = (n)/nthreads
if (ithread < MOD((n),nthreads)) then
wrk = wrk + 1
first_idx = ithread*wrk + 1
idxstart = ithread*wrk + 1
else
first_idx = ithread*wrk + MOD((n),nthreads) + 1
idxstart = ithread*wrk + MOD((n),nthreads) + 1
end if
last_idx = min(first_idx + wrk - 1,n )
if (first_idx<=last_idx) then
sumb(ithread+2) = sumb(ithread+2) + x(first_idx)
do i=first_idx+1,last_idx
sumb(ithread+2) = sumb(ithread+2) + x(i)
x(i) = x(i)+x(i-1)
idxend = min(idxstart + wrk - 1,n )
tp = zzero
if (idxstart<=idxend) then
do i=idxstart,idxend
ts = x(i)
x(i) = tp
tp = tp + ts
end do
end if
sumb(ithread+2) = tp
!$OMP BARRIER
!$OMP SINGLE
@ -115,11 +120,10 @@ contains
!$OMP DO SCHEDULE(STATIC)
do i=1,n
x(i) = sumb(ithread+1) + x(i) + shift_
x(i) = x(i) + sumb(ithread+1) + shift_
end do
!$OMP END DO
!$OMP SINGLE
x(1) = shift_
deallocate(sumb)
!$OMP END SINGLE
end subroutine inner_z_exscan

Loading…
Cancel
Save