Fixed CSR mv and cp _from_coo with OpenMP.

omp-threadsafe
sfilippone 2 years ago
parent 6ba7d93159
commit dbd55321f8

@ -2877,7 +2877,7 @@ subroutine psb_c_cp_csr_from_coo(a,b,info)
logical :: use_openmp = .false. logical :: use_openmp = .false.
#if defined(OPENMP) #if defined(OPENMP)
integer(psb_ipk_), allocatable :: sum(:) integer(psb_ipk_), allocatable :: suma(:)
integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j
integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads
#endif #endif
@ -2923,81 +2923,74 @@ subroutine psb_c_cp_csr_from_coo(a,b,info)
endif endif
a%irp(:) = 0
#if defined(OPENMP) #if defined(OPENMP)
maxthreads = omp_get_max_threads()
allocate(sum(maxthreads+1))
sum(:) = 0
sum(1) = 1
!$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!$OMP shared(nza,itemp,a,nthreads,sum,nr) & !$OMP shared(suma,nthreads,nr,a,itemp,nza) &
!$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) !$OMP private(ithread,work,i,first_idx,last_idx,s)
!$OMP WORKSHARE
a%irp(:) = 0
!$OMP END WORKSHARE
!$OMP DO schedule(STATIC) & !$OMP DO schedule(STATIC) &
!$OMP private(k,i) !$OMP private(k)
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
a%irp(i) = a%irp(i) + 1 !$OMP ATOMIC UPDATE
a%irp(i+1) = a%irp(i+1) + 1
!$OMP END ATOMIC
end do end do
!$OMP END DO !$OMP END DO
!$OMP SINGLE !$OMP SINGLE
nthreads = omp_get_num_threads() nthreads = omp_get_num_threads()
allocate(suma(nthreads+1))
suma(:) = 0
!suma(1) = 1
!$OMP END SINGLE !$OMP END SINGLE
ithread = omp_get_thread_num() ithread = omp_get_thread_num()
work = nr/nthreads
if (ithread < MOD(nr,nthreads)) then work = (nr+1)/nthreads
if (ithread < MOD((nr+1),nthreads)) then
work = work + 1 work = work + 1
first_idx = ithread*work + 1 first_idx = ithread*work + 1
else else
first_idx = ithread*work + MOD(nr,nthreads) + 1 first_idx = ithread*work + MOD((nr+1),nthreads) + 1
end if end if
last_idx = first_idx + work - 1 last_idx = min(first_idx + work - 1,nr+1)
s = 0 s = 0
do i=first_idx,last_idx if (first_idx<=last_idx) then
s = s + a%irp(i) suma(ithread+2) = suma(ithread+2) + a%irp(first_idx)
end do do i=first_idx+1,last_idx
if (work > 0) then suma(ithread+2) = suma(ithread+2) + a%irp(i)
sum(ithread+2) = s a%irp(i) = a%irp(i)+a%irp(i-1)
end do
end if end if
!$OMP BARRIER !$OMP BARRIER
!$OMP SINGLE !$OMP SINGLE
do i=2,nthreads+1 do i=2,nthreads+1
sum(i) = sum(i) + sum(i-1) suma(i) = suma(i) + suma(i-1)
end do end do
!$OMP END SINGLE !$OMP END SINGLE
if (work > 0) then
saved_elem = a%irp(first_idx)
end if
if (ithread == 0) then
a%irp(1) = 1
end if
!$OMP BARRIER !$OMP BARRIER
if (work > 0) then !$OMP DO SCHEDULE(STATIC)
old_val = a%irp(first_idx+1) do i=1,nr+1
a%irp(first_idx+1) = saved_elem + sum(ithread+1) a%irp(i) = suma(ithread+1) + a%irp(i) +1
end if
do i=first_idx+2,last_idx+1
nxt_val = a%irp(i)
a%irp(i) = a%irp(i-1) + old_val
old_val = nxt_val
end do end do
!$OMP END DO
!$OMP SINGLE
a%irp(1) = 1
!$OMP END SINGLE
!$OMP END PARALLEL !$OMP END PARALLEL
#else #else
a%irp(:) = 0
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
a%irp(i) = a%irp(i) + 1 a%irp(i) = a%irp(i) + 1
@ -3010,6 +3003,7 @@ subroutine psb_c_cp_csr_from_coo(a,b,info)
end do end do
a%irp(nr+1) = ip a%irp(nr+1) = ip
#endif #endif
call a%set_host() call a%set_host()
end subroutine psb_c_cp_csr_from_coo end subroutine psb_c_cp_csr_from_coo
@ -3129,7 +3123,7 @@ subroutine psb_c_mv_csr_from_coo(a,b,info)
character(len=20) :: name='mv_from_coo' character(len=20) :: name='mv_from_coo'
#if defined(OPENMP) #if defined(OPENMP)
integer(psb_ipk_), allocatable :: sum(:) integer(psb_ipk_), allocatable :: suma(:)
integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s
integer(psb_ipk_) :: nxt_val,old_val,saved_elem integer(psb_ipk_) :: nxt_val,old_val,saved_elem
#endif #endif
@ -3156,90 +3150,88 @@ subroutine psb_c_mv_csr_from_coo(a,b,info)
call psb_realloc(max(nr+1,nc+1),a%irp,info) call psb_realloc(max(nr+1,nc+1),a%irp,info)
call b%free() call b%free()
a%irp(:) = 0
#if defined(OPENMP) #if defined(OPENMP)
!$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!$OMP shared(sum,nthreads,nr,a,itemp,nza) & !$OMP shared(suma,nthreads,nr,a,itemp,nza) &
!$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) !$OMP private(ithread,work,i,first_idx,last_idx,s)
!$OMP WORKSHARE
a%irp(:) = 0
!$OMP END WORKSHARE
!$OMP DO schedule(STATIC) & !$OMP DO schedule(STATIC) &
!$OMP private(k,i) !$OMP private(k)
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
a%irp(i) = a%irp(i) + 1 !$OMP ATOMIC UPDATE
a%irp(i+1) = a%irp(i+1) + 1
!$OMP END ATOMIC
end do end do
!$OMP END DO !$OMP END DO
!$OMP SINGLE !$OMP SINGLE
nthreads = omp_get_num_threads() nthreads = omp_get_num_threads()
allocate(sum(nthreads+1)) allocate(suma(nthreads+1))
sum(:) = 0 suma(:) = 0
sum(1) = 1 !suma(1) = 1
!$OMP END SINGLE !$OMP END SINGLE
ithread = omp_get_thread_num() ithread = omp_get_thread_num()
work = nr/nthreads
if (ithread < MOD(nr,nthreads)) then work = (nr+1)/nthreads
if (ithread < MOD((nr+1),nthreads)) then
work = work + 1 work = work + 1
first_idx = ithread*work + 1 first_idx = ithread*work + 1
else else
first_idx = ithread*work + MOD(nr,nthreads) + 1 first_idx = ithread*work + MOD((nr+1),nthreads) + 1
end if end if
last_idx = first_idx + work - 1 last_idx = min(first_idx + work - 1,nr+1)
s = 0 s = 0
do i=first_idx,last_idx if (first_idx<=last_idx) then
s = s + a%irp(i) suma(ithread+2) = suma(ithread+2) + a%irp(first_idx)
end do do i=first_idx+1,last_idx
if (work > 0) then suma(ithread+2) = suma(ithread+2) + a%irp(i)
sum(ithread+2) = s a%irp(i) = a%irp(i)+a%irp(i-1)
end do
end if end if
!$OMP BARRIER !$OMP BARRIER
!$OMP SINGLE !$OMP SINGLE
do i=2,nthreads+1 do i=2,nthreads+1
sum(i) = sum(i) + sum(i-1) suma(i) = suma(i) + suma(i-1)
end do end do
!$OMP END SINGLE !$OMP END SINGLE
if (work > 0) then !$OMP BARRIER
saved_elem = a%irp(first_idx)
end if
if (ithread == 0) then
a%irp(1) = 1
end if
if (work > 0) then
old_val = a%irp(first_idx+1)
a%irp(first_idx+1) = saved_elem + sum(ithread+1)
end if
do i=first_idx+2,last_idx+1 !$OMP DO SCHEDULE(STATIC)
nxt_val = a%irp(i) do i=1,nr+1
a%irp(i) = a%irp(i-1) + old_val a%irp(i) = suma(ithread+1) + a%irp(i) +1
old_val = nxt_val
end do end do
!$OMP END DO
!$OMP SINGLE
a%irp(1) = 1
!$OMP END SINGLE
!$OMP END PARALLEL !$OMP END PARALLEL
#else #else
do k=1,nza a%irp(:) = 0
i = itemp(k) do k=1,nza
a%irp(i) = a%irp(i) + 1 i = itemp(k)
end do a%irp(i) = a%irp(i) + 1
ip = 1 end do
do i=1,nr ip = 1
ncl = a%irp(i) do i=1,nr
a%irp(i) = ip ncl = a%irp(i)
ip = ip + ncl a%irp(i) = ip
end do ip = ip + ncl
a%irp(nr+1) = ip end do
a%irp(nr+1) = ip
#endif #endif
!write(0,*) name,' Check:',a%irp(nr+1),all(a%irp(1:nr) < a%irp(nr+1))
!write(0,*) name,a%irp(:)
call a%set_host() call a%set_host()
end subroutine psb_c_mv_csr_from_coo end subroutine psb_c_mv_csr_from_coo

@ -2877,7 +2877,7 @@ subroutine psb_d_cp_csr_from_coo(a,b,info)
logical :: use_openmp = .false. logical :: use_openmp = .false.
#if defined(OPENMP) #if defined(OPENMP)
integer(psb_ipk_), allocatable :: sum(:) integer(psb_ipk_), allocatable :: suma(:)
integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j
integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads
#endif #endif
@ -2923,81 +2923,74 @@ subroutine psb_d_cp_csr_from_coo(a,b,info)
endif endif
a%irp(:) = 0
#if defined(OPENMP) #if defined(OPENMP)
maxthreads = omp_get_max_threads()
allocate(sum(maxthreads+1))
sum(:) = 0
sum(1) = 1
!$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!$OMP shared(nza,itemp,a,nthreads,sum,nr) & !$OMP shared(suma,nthreads,nr,a,itemp,nza) &
!$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) !$OMP private(ithread,work,i,first_idx,last_idx,s)
!$OMP WORKSHARE
a%irp(:) = 0
!$OMP END WORKSHARE
!$OMP DO schedule(STATIC) & !$OMP DO schedule(STATIC) &
!$OMP private(k,i) !$OMP private(k)
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
a%irp(i) = a%irp(i) + 1 !$OMP ATOMIC UPDATE
a%irp(i+1) = a%irp(i+1) + 1
!$OMP END ATOMIC
end do end do
!$OMP END DO !$OMP END DO
!$OMP SINGLE !$OMP SINGLE
nthreads = omp_get_num_threads() nthreads = omp_get_num_threads()
allocate(suma(nthreads+1))
suma(:) = 0
!suma(1) = 1
!$OMP END SINGLE !$OMP END SINGLE
ithread = omp_get_thread_num() ithread = omp_get_thread_num()
work = nr/nthreads
if (ithread < MOD(nr,nthreads)) then work = (nr+1)/nthreads
if (ithread < MOD((nr+1),nthreads)) then
work = work + 1 work = work + 1
first_idx = ithread*work + 1 first_idx = ithread*work + 1
else else
first_idx = ithread*work + MOD(nr,nthreads) + 1 first_idx = ithread*work + MOD((nr+1),nthreads) + 1
end if end if
last_idx = first_idx + work - 1 last_idx = min(first_idx + work - 1,nr+1)
s = 0 s = 0
do i=first_idx,last_idx if (first_idx<=last_idx) then
s = s + a%irp(i) suma(ithread+2) = suma(ithread+2) + a%irp(first_idx)
end do do i=first_idx+1,last_idx
if (work > 0) then suma(ithread+2) = suma(ithread+2) + a%irp(i)
sum(ithread+2) = s a%irp(i) = a%irp(i)+a%irp(i-1)
end do
end if end if
!$OMP BARRIER !$OMP BARRIER
!$OMP SINGLE !$OMP SINGLE
do i=2,nthreads+1 do i=2,nthreads+1
sum(i) = sum(i) + sum(i-1) suma(i) = suma(i) + suma(i-1)
end do end do
!$OMP END SINGLE !$OMP END SINGLE
if (work > 0) then
saved_elem = a%irp(first_idx)
end if
if (ithread == 0) then
a%irp(1) = 1
end if
!$OMP BARRIER !$OMP BARRIER
if (work > 0) then !$OMP DO SCHEDULE(STATIC)
old_val = a%irp(first_idx+1) do i=1,nr+1
a%irp(first_idx+1) = saved_elem + sum(ithread+1) a%irp(i) = suma(ithread+1) + a%irp(i) +1
end if
do i=first_idx+2,last_idx+1
nxt_val = a%irp(i)
a%irp(i) = a%irp(i-1) + old_val
old_val = nxt_val
end do end do
!$OMP END DO
!$OMP SINGLE
a%irp(1) = 1
!$OMP END SINGLE
!$OMP END PARALLEL !$OMP END PARALLEL
#else #else
a%irp(:) = 0
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
a%irp(i) = a%irp(i) + 1 a%irp(i) = a%irp(i) + 1
@ -3010,6 +3003,7 @@ subroutine psb_d_cp_csr_from_coo(a,b,info)
end do end do
a%irp(nr+1) = ip a%irp(nr+1) = ip
#endif #endif
call a%set_host() call a%set_host()
end subroutine psb_d_cp_csr_from_coo end subroutine psb_d_cp_csr_from_coo
@ -3129,7 +3123,7 @@ subroutine psb_d_mv_csr_from_coo(a,b,info)
character(len=20) :: name='mv_from_coo' character(len=20) :: name='mv_from_coo'
#if defined(OPENMP) #if defined(OPENMP)
integer(psb_ipk_), allocatable :: sum(:) integer(psb_ipk_), allocatable :: suma(:)
integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s
integer(psb_ipk_) :: nxt_val,old_val,saved_elem integer(psb_ipk_) :: nxt_val,old_val,saved_elem
#endif #endif
@ -3156,90 +3150,88 @@ subroutine psb_d_mv_csr_from_coo(a,b,info)
call psb_realloc(max(nr+1,nc+1),a%irp,info) call psb_realloc(max(nr+1,nc+1),a%irp,info)
call b%free() call b%free()
a%irp(:) = 0
#if defined(OPENMP) #if defined(OPENMP)
!$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!$OMP shared(sum,nthreads,nr,a,itemp,nza) & !$OMP shared(suma,nthreads,nr,a,itemp,nza) &
!$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) !$OMP private(ithread,work,i,first_idx,last_idx,s)
!$OMP WORKSHARE
a%irp(:) = 0
!$OMP END WORKSHARE
!$OMP DO schedule(STATIC) & !$OMP DO schedule(STATIC) &
!$OMP private(k,i) !$OMP private(k)
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
a%irp(i) = a%irp(i) + 1 !$OMP ATOMIC UPDATE
a%irp(i+1) = a%irp(i+1) + 1
!$OMP END ATOMIC
end do end do
!$OMP END DO !$OMP END DO
!$OMP SINGLE !$OMP SINGLE
nthreads = omp_get_num_threads() nthreads = omp_get_num_threads()
allocate(sum(nthreads+1)) allocate(suma(nthreads+1))
sum(:) = 0 suma(:) = 0
sum(1) = 1 !suma(1) = 1
!$OMP END SINGLE !$OMP END SINGLE
ithread = omp_get_thread_num() ithread = omp_get_thread_num()
work = nr/nthreads
if (ithread < MOD(nr,nthreads)) then work = (nr+1)/nthreads
if (ithread < MOD((nr+1),nthreads)) then
work = work + 1 work = work + 1
first_idx = ithread*work + 1 first_idx = ithread*work + 1
else else
first_idx = ithread*work + MOD(nr,nthreads) + 1 first_idx = ithread*work + MOD((nr+1),nthreads) + 1
end if end if
last_idx = first_idx + work - 1 last_idx = min(first_idx + work - 1,nr+1)
s = 0 s = 0
do i=first_idx,last_idx if (first_idx<=last_idx) then
s = s + a%irp(i) suma(ithread+2) = suma(ithread+2) + a%irp(first_idx)
end do do i=first_idx+1,last_idx
if (work > 0) then suma(ithread+2) = suma(ithread+2) + a%irp(i)
sum(ithread+2) = s a%irp(i) = a%irp(i)+a%irp(i-1)
end do
end if end if
!$OMP BARRIER !$OMP BARRIER
!$OMP SINGLE !$OMP SINGLE
do i=2,nthreads+1 do i=2,nthreads+1
sum(i) = sum(i) + sum(i-1) suma(i) = suma(i) + suma(i-1)
end do end do
!$OMP END SINGLE !$OMP END SINGLE
if (work > 0) then !$OMP BARRIER
saved_elem = a%irp(first_idx)
end if
if (ithread == 0) then
a%irp(1) = 1
end if
if (work > 0) then
old_val = a%irp(first_idx+1)
a%irp(first_idx+1) = saved_elem + sum(ithread+1)
end if
do i=first_idx+2,last_idx+1 !$OMP DO SCHEDULE(STATIC)
nxt_val = a%irp(i) do i=1,nr+1
a%irp(i) = a%irp(i-1) + old_val a%irp(i) = suma(ithread+1) + a%irp(i) +1
old_val = nxt_val
end do end do
!$OMP END DO
!$OMP SINGLE
a%irp(1) = 1
!$OMP END SINGLE
!$OMP END PARALLEL !$OMP END PARALLEL
#else #else
do k=1,nza a%irp(:) = 0
i = itemp(k) do k=1,nza
a%irp(i) = a%irp(i) + 1 i = itemp(k)
end do a%irp(i) = a%irp(i) + 1
ip = 1 end do
do i=1,nr ip = 1
ncl = a%irp(i) do i=1,nr
a%irp(i) = ip ncl = a%irp(i)
ip = ip + ncl a%irp(i) = ip
end do ip = ip + ncl
a%irp(nr+1) = ip end do
a%irp(nr+1) = ip
#endif #endif
!write(0,*) name,' Check:',a%irp(nr+1),all(a%irp(1:nr) < a%irp(nr+1))
!write(0,*) name,a%irp(:)
call a%set_host() call a%set_host()
end subroutine psb_d_mv_csr_from_coo end subroutine psb_d_mv_csr_from_coo

@ -2877,7 +2877,7 @@ subroutine psb_s_cp_csr_from_coo(a,b,info)
logical :: use_openmp = .false. logical :: use_openmp = .false.
#if defined(OPENMP) #if defined(OPENMP)
integer(psb_ipk_), allocatable :: sum(:) integer(psb_ipk_), allocatable :: suma(:)
integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j
integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads
#endif #endif
@ -2923,81 +2923,74 @@ subroutine psb_s_cp_csr_from_coo(a,b,info)
endif endif
a%irp(:) = 0
#if defined(OPENMP) #if defined(OPENMP)
maxthreads = omp_get_max_threads()
allocate(sum(maxthreads+1))
sum(:) = 0
sum(1) = 1
!$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!$OMP shared(nza,itemp,a,nthreads,sum,nr) & !$OMP shared(suma,nthreads,nr,a,itemp,nza) &
!$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) !$OMP private(ithread,work,i,first_idx,last_idx,s)
!$OMP WORKSHARE
a%irp(:) = 0
!$OMP END WORKSHARE
!$OMP DO schedule(STATIC) & !$OMP DO schedule(STATIC) &
!$OMP private(k,i) !$OMP private(k)
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
a%irp(i) = a%irp(i) + 1 !$OMP ATOMIC UPDATE
a%irp(i+1) = a%irp(i+1) + 1
!$OMP END ATOMIC
end do end do
!$OMP END DO !$OMP END DO
!$OMP SINGLE !$OMP SINGLE
nthreads = omp_get_num_threads() nthreads = omp_get_num_threads()
allocate(suma(nthreads+1))
suma(:) = 0
!suma(1) = 1
!$OMP END SINGLE !$OMP END SINGLE
ithread = omp_get_thread_num() ithread = omp_get_thread_num()
work = nr/nthreads
if (ithread < MOD(nr,nthreads)) then work = (nr+1)/nthreads
if (ithread < MOD((nr+1),nthreads)) then
work = work + 1 work = work + 1
first_idx = ithread*work + 1 first_idx = ithread*work + 1
else else
first_idx = ithread*work + MOD(nr,nthreads) + 1 first_idx = ithread*work + MOD((nr+1),nthreads) + 1
end if end if
last_idx = first_idx + work - 1 last_idx = min(first_idx + work - 1,nr+1)
s = 0 s = 0
do i=first_idx,last_idx if (first_idx<=last_idx) then
s = s + a%irp(i) suma(ithread+2) = suma(ithread+2) + a%irp(first_idx)
end do do i=first_idx+1,last_idx
if (work > 0) then suma(ithread+2) = suma(ithread+2) + a%irp(i)
sum(ithread+2) = s a%irp(i) = a%irp(i)+a%irp(i-1)
end do
end if end if
!$OMP BARRIER !$OMP BARRIER
!$OMP SINGLE !$OMP SINGLE
do i=2,nthreads+1 do i=2,nthreads+1
sum(i) = sum(i) + sum(i-1) suma(i) = suma(i) + suma(i-1)
end do end do
!$OMP END SINGLE !$OMP END SINGLE
if (work > 0) then
saved_elem = a%irp(first_idx)
end if
if (ithread == 0) then
a%irp(1) = 1
end if
!$OMP BARRIER !$OMP BARRIER
if (work > 0) then !$OMP DO SCHEDULE(STATIC)
old_val = a%irp(first_idx+1) do i=1,nr+1
a%irp(first_idx+1) = saved_elem + sum(ithread+1) a%irp(i) = suma(ithread+1) + a%irp(i) +1
end if
do i=first_idx+2,last_idx+1
nxt_val = a%irp(i)
a%irp(i) = a%irp(i-1) + old_val
old_val = nxt_val
end do end do
!$OMP END DO
!$OMP SINGLE
a%irp(1) = 1
!$OMP END SINGLE
!$OMP END PARALLEL !$OMP END PARALLEL
#else #else
a%irp(:) = 0
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
a%irp(i) = a%irp(i) + 1 a%irp(i) = a%irp(i) + 1
@ -3010,6 +3003,7 @@ subroutine psb_s_cp_csr_from_coo(a,b,info)
end do end do
a%irp(nr+1) = ip a%irp(nr+1) = ip
#endif #endif
call a%set_host() call a%set_host()
end subroutine psb_s_cp_csr_from_coo end subroutine psb_s_cp_csr_from_coo
@ -3129,7 +3123,7 @@ subroutine psb_s_mv_csr_from_coo(a,b,info)
character(len=20) :: name='mv_from_coo' character(len=20) :: name='mv_from_coo'
#if defined(OPENMP) #if defined(OPENMP)
integer(psb_ipk_), allocatable :: sum(:) integer(psb_ipk_), allocatable :: suma(:)
integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s
integer(psb_ipk_) :: nxt_val,old_val,saved_elem integer(psb_ipk_) :: nxt_val,old_val,saved_elem
#endif #endif
@ -3156,90 +3150,88 @@ subroutine psb_s_mv_csr_from_coo(a,b,info)
call psb_realloc(max(nr+1,nc+1),a%irp,info) call psb_realloc(max(nr+1,nc+1),a%irp,info)
call b%free() call b%free()
a%irp(:) = 0
#if defined(OPENMP) #if defined(OPENMP)
!$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!$OMP shared(sum,nthreads,nr,a,itemp,nza) & !$OMP shared(suma,nthreads,nr,a,itemp,nza) &
!$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) !$OMP private(ithread,work,i,first_idx,last_idx,s)
!$OMP WORKSHARE
a%irp(:) = 0
!$OMP END WORKSHARE
!$OMP DO schedule(STATIC) & !$OMP DO schedule(STATIC) &
!$OMP private(k,i) !$OMP private(k)
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
a%irp(i) = a%irp(i) + 1 !$OMP ATOMIC UPDATE
a%irp(i+1) = a%irp(i+1) + 1
!$OMP END ATOMIC
end do end do
!$OMP END DO !$OMP END DO
!$OMP SINGLE !$OMP SINGLE
nthreads = omp_get_num_threads() nthreads = omp_get_num_threads()
allocate(sum(nthreads+1)) allocate(suma(nthreads+1))
sum(:) = 0 suma(:) = 0
sum(1) = 1 !suma(1) = 1
!$OMP END SINGLE !$OMP END SINGLE
ithread = omp_get_thread_num() ithread = omp_get_thread_num()
work = nr/nthreads
if (ithread < MOD(nr,nthreads)) then work = (nr+1)/nthreads
if (ithread < MOD((nr+1),nthreads)) then
work = work + 1 work = work + 1
first_idx = ithread*work + 1 first_idx = ithread*work + 1
else else
first_idx = ithread*work + MOD(nr,nthreads) + 1 first_idx = ithread*work + MOD((nr+1),nthreads) + 1
end if end if
last_idx = first_idx + work - 1 last_idx = min(first_idx + work - 1,nr+1)
s = 0 s = 0
do i=first_idx,last_idx if (first_idx<=last_idx) then
s = s + a%irp(i) suma(ithread+2) = suma(ithread+2) + a%irp(first_idx)
end do do i=first_idx+1,last_idx
if (work > 0) then suma(ithread+2) = suma(ithread+2) + a%irp(i)
sum(ithread+2) = s a%irp(i) = a%irp(i)+a%irp(i-1)
end do
end if end if
!$OMP BARRIER !$OMP BARRIER
!$OMP SINGLE !$OMP SINGLE
do i=2,nthreads+1 do i=2,nthreads+1
sum(i) = sum(i) + sum(i-1) suma(i) = suma(i) + suma(i-1)
end do end do
!$OMP END SINGLE !$OMP END SINGLE
if (work > 0) then !$OMP BARRIER
saved_elem = a%irp(first_idx)
end if
if (ithread == 0) then
a%irp(1) = 1
end if
if (work > 0) then
old_val = a%irp(first_idx+1)
a%irp(first_idx+1) = saved_elem + sum(ithread+1)
end if
do i=first_idx+2,last_idx+1 !$OMP DO SCHEDULE(STATIC)
nxt_val = a%irp(i) do i=1,nr+1
a%irp(i) = a%irp(i-1) + old_val a%irp(i) = suma(ithread+1) + a%irp(i) +1
old_val = nxt_val
end do end do
!$OMP END DO
!$OMP SINGLE
a%irp(1) = 1
!$OMP END SINGLE
!$OMP END PARALLEL !$OMP END PARALLEL
#else #else
do k=1,nza a%irp(:) = 0
i = itemp(k) do k=1,nza
a%irp(i) = a%irp(i) + 1 i = itemp(k)
end do a%irp(i) = a%irp(i) + 1
ip = 1 end do
do i=1,nr ip = 1
ncl = a%irp(i) do i=1,nr
a%irp(i) = ip ncl = a%irp(i)
ip = ip + ncl a%irp(i) = ip
end do ip = ip + ncl
a%irp(nr+1) = ip end do
a%irp(nr+1) = ip
#endif #endif
!write(0,*) name,' Check:',a%irp(nr+1),all(a%irp(1:nr) < a%irp(nr+1))
!write(0,*) name,a%irp(:)
call a%set_host() call a%set_host()
end subroutine psb_s_mv_csr_from_coo end subroutine psb_s_mv_csr_from_coo

@ -2877,7 +2877,7 @@ subroutine psb_z_cp_csr_from_coo(a,b,info)
logical :: use_openmp = .false. logical :: use_openmp = .false.
#if defined(OPENMP) #if defined(OPENMP)
integer(psb_ipk_), allocatable :: sum(:) integer(psb_ipk_), allocatable :: suma(:)
integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j
integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads
#endif #endif
@ -2923,81 +2923,74 @@ subroutine psb_z_cp_csr_from_coo(a,b,info)
endif endif
a%irp(:) = 0
#if defined(OPENMP) #if defined(OPENMP)
maxthreads = omp_get_max_threads()
allocate(sum(maxthreads+1))
sum(:) = 0
sum(1) = 1
!$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!$OMP shared(nza,itemp,a,nthreads,sum,nr) & !$OMP shared(suma,nthreads,nr,a,itemp,nza) &
!$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) !$OMP private(ithread,work,i,first_idx,last_idx,s)
!$OMP WORKSHARE
a%irp(:) = 0
!$OMP END WORKSHARE
!$OMP DO schedule(STATIC) & !$OMP DO schedule(STATIC) &
!$OMP private(k,i) !$OMP private(k)
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
a%irp(i) = a%irp(i) + 1 !$OMP ATOMIC UPDATE
a%irp(i+1) = a%irp(i+1) + 1
!$OMP END ATOMIC
end do end do
!$OMP END DO !$OMP END DO
!$OMP SINGLE !$OMP SINGLE
nthreads = omp_get_num_threads() nthreads = omp_get_num_threads()
allocate(suma(nthreads+1))
suma(:) = 0
!suma(1) = 1
!$OMP END SINGLE !$OMP END SINGLE
ithread = omp_get_thread_num() ithread = omp_get_thread_num()
work = nr/nthreads
if (ithread < MOD(nr,nthreads)) then work = (nr+1)/nthreads
if (ithread < MOD((nr+1),nthreads)) then
work = work + 1 work = work + 1
first_idx = ithread*work + 1 first_idx = ithread*work + 1
else else
first_idx = ithread*work + MOD(nr,nthreads) + 1 first_idx = ithread*work + MOD((nr+1),nthreads) + 1
end if end if
last_idx = first_idx + work - 1 last_idx = min(first_idx + work - 1,nr+1)
s = 0 s = 0
do i=first_idx,last_idx if (first_idx<=last_idx) then
s = s + a%irp(i) suma(ithread+2) = suma(ithread+2) + a%irp(first_idx)
end do do i=first_idx+1,last_idx
if (work > 0) then suma(ithread+2) = suma(ithread+2) + a%irp(i)
sum(ithread+2) = s a%irp(i) = a%irp(i)+a%irp(i-1)
end do
end if end if
!$OMP BARRIER !$OMP BARRIER
!$OMP SINGLE !$OMP SINGLE
do i=2,nthreads+1 do i=2,nthreads+1
sum(i) = sum(i) + sum(i-1) suma(i) = suma(i) + suma(i-1)
end do end do
!$OMP END SINGLE !$OMP END SINGLE
if (work > 0) then
saved_elem = a%irp(first_idx)
end if
if (ithread == 0) then
a%irp(1) = 1
end if
!$OMP BARRIER !$OMP BARRIER
if (work > 0) then !$OMP DO SCHEDULE(STATIC)
old_val = a%irp(first_idx+1) do i=1,nr+1
a%irp(first_idx+1) = saved_elem + sum(ithread+1) a%irp(i) = suma(ithread+1) + a%irp(i) +1
end if
do i=first_idx+2,last_idx+1
nxt_val = a%irp(i)
a%irp(i) = a%irp(i-1) + old_val
old_val = nxt_val
end do end do
!$OMP END DO
!$OMP SINGLE
a%irp(1) = 1
!$OMP END SINGLE
!$OMP END PARALLEL !$OMP END PARALLEL
#else #else
a%irp(:) = 0
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
a%irp(i) = a%irp(i) + 1 a%irp(i) = a%irp(i) + 1
@ -3010,6 +3003,7 @@ subroutine psb_z_cp_csr_from_coo(a,b,info)
end do end do
a%irp(nr+1) = ip a%irp(nr+1) = ip
#endif #endif
call a%set_host() call a%set_host()
end subroutine psb_z_cp_csr_from_coo end subroutine psb_z_cp_csr_from_coo
@ -3129,7 +3123,7 @@ subroutine psb_z_mv_csr_from_coo(a,b,info)
character(len=20) :: name='mv_from_coo' character(len=20) :: name='mv_from_coo'
#if defined(OPENMP) #if defined(OPENMP)
integer(psb_ipk_), allocatable :: sum(:) integer(psb_ipk_), allocatable :: suma(:)
integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s
integer(psb_ipk_) :: nxt_val,old_val,saved_elem integer(psb_ipk_) :: nxt_val,old_val,saved_elem
#endif #endif
@ -3156,90 +3150,88 @@ subroutine psb_z_mv_csr_from_coo(a,b,info)
call psb_realloc(max(nr+1,nc+1),a%irp,info) call psb_realloc(max(nr+1,nc+1),a%irp,info)
call b%free() call b%free()
a%irp(:) = 0
#if defined(OPENMP) #if defined(OPENMP)
!$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!$OMP shared(sum,nthreads,nr,a,itemp,nza) & !$OMP shared(suma,nthreads,nr,a,itemp,nza) &
!$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) !$OMP private(ithread,work,i,first_idx,last_idx,s)
!$OMP WORKSHARE
a%irp(:) = 0
!$OMP END WORKSHARE
!$OMP DO schedule(STATIC) & !$OMP DO schedule(STATIC) &
!$OMP private(k,i) !$OMP private(k)
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
a%irp(i) = a%irp(i) + 1 !$OMP ATOMIC UPDATE
a%irp(i+1) = a%irp(i+1) + 1
!$OMP END ATOMIC
end do end do
!$OMP END DO !$OMP END DO
!$OMP SINGLE !$OMP SINGLE
nthreads = omp_get_num_threads() nthreads = omp_get_num_threads()
allocate(sum(nthreads+1)) allocate(suma(nthreads+1))
sum(:) = 0 suma(:) = 0
sum(1) = 1 !suma(1) = 1
!$OMP END SINGLE !$OMP END SINGLE
ithread = omp_get_thread_num() ithread = omp_get_thread_num()
work = nr/nthreads
if (ithread < MOD(nr,nthreads)) then work = (nr+1)/nthreads
if (ithread < MOD((nr+1),nthreads)) then
work = work + 1 work = work + 1
first_idx = ithread*work + 1 first_idx = ithread*work + 1
else else
first_idx = ithread*work + MOD(nr,nthreads) + 1 first_idx = ithread*work + MOD((nr+1),nthreads) + 1
end if end if
last_idx = first_idx + work - 1 last_idx = min(first_idx + work - 1,nr+1)
s = 0 s = 0
do i=first_idx,last_idx if (first_idx<=last_idx) then
s = s + a%irp(i) suma(ithread+2) = suma(ithread+2) + a%irp(first_idx)
end do do i=first_idx+1,last_idx
if (work > 0) then suma(ithread+2) = suma(ithread+2) + a%irp(i)
sum(ithread+2) = s a%irp(i) = a%irp(i)+a%irp(i-1)
end do
end if end if
!$OMP BARRIER !$OMP BARRIER
!$OMP SINGLE !$OMP SINGLE
do i=2,nthreads+1 do i=2,nthreads+1
sum(i) = sum(i) + sum(i-1) suma(i) = suma(i) + suma(i-1)
end do end do
!$OMP END SINGLE !$OMP END SINGLE
if (work > 0) then !$OMP BARRIER
saved_elem = a%irp(first_idx)
end if
if (ithread == 0) then
a%irp(1) = 1
end if
if (work > 0) then
old_val = a%irp(first_idx+1)
a%irp(first_idx+1) = saved_elem + sum(ithread+1)
end if
do i=first_idx+2,last_idx+1 !$OMP DO SCHEDULE(STATIC)
nxt_val = a%irp(i) do i=1,nr+1
a%irp(i) = a%irp(i-1) + old_val a%irp(i) = suma(ithread+1) + a%irp(i) +1
old_val = nxt_val
end do end do
!$OMP END DO
!$OMP SINGLE
a%irp(1) = 1
!$OMP END SINGLE
!$OMP END PARALLEL !$OMP END PARALLEL
#else #else
do k=1,nza a%irp(:) = 0
i = itemp(k) do k=1,nza
a%irp(i) = a%irp(i) + 1 i = itemp(k)
end do a%irp(i) = a%irp(i) + 1
ip = 1 end do
do i=1,nr ip = 1
ncl = a%irp(i) do i=1,nr
a%irp(i) = ip ncl = a%irp(i)
ip = ip + ncl a%irp(i) = ip
end do ip = ip + ncl
a%irp(nr+1) = ip end do
a%irp(nr+1) = ip
#endif #endif
!write(0,*) name,' Check:',a%irp(nr+1),all(a%irp(1:nr) < a%irp(nr+1))
!write(0,*) name,a%irp(:)
call a%set_host() call a%set_host()
end subroutine psb_z_mv_csr_from_coo end subroutine psb_z_mv_csr_from_coo

@ -639,7 +639,16 @@ contains
write(psb_out_unit,'("-total time : ",es12.5)') ttot write(psb_out_unit,'("-total time : ",es12.5)') ttot
end if end if
call a%print('a.mtx',head='Test') !!$ !$omp parallel
!!$ !$omp master
!!$ block
!!$ character(len=1024) :: fname
!!$ write(fname,'(a,i4.4,a,i4.4,a)') 'a-',iam,'-',np,'.mtx'
!!$ write(0,*) iam,' Size of A ',a%get_nrows(),a%get_ncols(),a%get_nzeros()
!!$ call a%print(fname,head='Test')
!!$ end block
!!$ !$omp end master
!!$ !$omp end parallel
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
return return

Loading…
Cancel
Save