Fix fix_coo for OpenMP

omp-threadsafe
Salvatore Filippone 3 years ago
parent e2ee8d8c07
commit db0577cd07

File diff suppressed because it is too large Load Diff

@ -2902,78 +2902,78 @@ subroutine psb_c_cp_csr_from_coo(a,b,info)
a%irp(:) = 0 a%irp(:) = 0
if (use_openmp) then !!$ if (use_openmp) then
!$ maxthreads = omp_get_max_threads() !!$ !$ maxthreads = omp_get_max_threads()
!$ allocate(sum(maxthreads+1)) !!$ !$ allocate(sum(maxthreads+1))
!$ sum(:) = 0 !!$ !$ sum(:) = 0
!$ sum(1) = 1 !!$ !$ sum(1) = 1
!!$
!$OMP PARALLEL default(none) & !!$ !$OMP PARALLEL default(none) &
!$OMP shared(nza,itemp,a,nthreads,sum,nr) & !!$ !$OMP shared(nza,itemp,a,nthreads,sum,nr) &
!$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) !!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
!!$
!$OMP DO schedule(STATIC) & !!$ !$OMP DO schedule(STATIC) &
!$OMP private(k,i) !!$ !$OMP private(k,i)
do k=1,nza !!$ do k=1,nza
i = itemp(k) !!$ i = itemp(k)
a%irp(i) = a%irp(i) + 1 !!$ a%irp(i) = a%irp(i) + 1
end do !!$ end do
!$OMP END DO !!$ !$OMP END DO
!!$
!$OMP SINGLE !!$ !$OMP SINGLE
!$ nthreads = omp_get_num_threads() !!$ !$ nthreads = omp_get_num_threads()
!$OMP END SINGLE !!$ !$OMP END SINGLE
!!$
!$ ithread = omp_get_thread_num() !!$ !$ ithread = omp_get_thread_num()
!!$
!$ work = nr/nthreads !!$ !$ work = nr/nthreads
!$ if (ithread < MOD(nr,nthreads)) then !!$ !$ if (ithread < MOD(nr,nthreads)) then
!$ work = work + 1 !!$ !$ work = work + 1
!$ first_idx = ithread*work + 1 !!$ !$ first_idx = ithread*work + 1
!$ else !!$ !$ else
!$ first_idx = ithread*work + MOD(nr,nthreads) + 1 !!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
!$ end if !!$ !$ end if
!!$
!$ last_idx = first_idx + work - 1 !!$ !$ last_idx = first_idx + work - 1
!!$
!$ s = 0 !!$ !$ s = 0
!$ do i=first_idx,last_idx !!$ !$ do i=first_idx,last_idx
!$ s = s + a%irp(i) !!$ !$ s = s + a%irp(i)
!$ end do !!$ !$ end do
!$ if (work > 0) then !!$ !$ if (work > 0) then
!$ sum(ithread+2) = s !!$ !$ sum(ithread+2) = s
!$ end if !!$ !$ end if
!!$
!$OMP BARRIER !!$ !$OMP BARRIER
!!$
!$OMP SINGLE !!$ !$OMP SINGLE
!$ do i=2,nthreads+1 !!$ !$ do i=2,nthreads+1
!$ sum(i) = sum(i) + sum(i-1) !!$ !$ sum(i) = sum(i) + sum(i-1)
!$ end do !!$ !$ end do
!$OMP END SINGLE !!$ !$OMP END SINGLE
!!$
!$ if (work > 0) then !!$ !$ if (work > 0) then
!$ saved_elem = a%irp(first_idx) !!$ !$ saved_elem = a%irp(first_idx)
!$ end if !!$ !$ end if
!$ if (ithread == 0) then !!$ !$ if (ithread == 0) then
!$ a%irp(1) = 1 !!$ !$ a%irp(1) = 1
!$ end if !!$ !$ end if
!!$
!$OMP BARRIER !!$ !$OMP BARRIER
!!$
!$ if (work > 0) then !!$ !$ if (work > 0) then
!$ old_val = a%irp(first_idx+1) !!$ !$ old_val = a%irp(first_idx+1)
!$ a%irp(first_idx+1) = saved_elem + sum(ithread+1) !!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!$ end if !!$ !$ end if
!!$
!$ do i=first_idx+2,last_idx+1 !!$ !$ do i=first_idx+2,last_idx+1
!$ nxt_val = a%irp(i) !!$ !$ nxt_val = a%irp(i)
!$ a%irp(i) = a%irp(i-1) + old_val !!$ !$ a%irp(i) = a%irp(i-1) + old_val
!$ old_val = nxt_val !!$ !$ old_val = nxt_val
!$ end do !!$ !$ end do
!!$
!$OMP END PARALLEL !!$ !$OMP END PARALLEL
else !!$ else
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
@ -2986,7 +2986,7 @@ subroutine psb_c_cp_csr_from_coo(a,b,info)
ip = ip + ncl ip = ip + ncl
end do end do
a%irp(nr+1) = ip a%irp(nr+1) = ip
end if !!$ end if
call a%set_host() call a%set_host()
@ -3104,10 +3104,10 @@ subroutine psb_c_mv_csr_from_coo(a,b,info)
character(len=20) :: name='mv_from_coo' character(len=20) :: name='mv_from_coo'
logical :: use_openmp = .false. logical :: use_openmp = .false.
!$ integer(psb_ipk_), allocatable :: sum(:) ! $ integer(psb_ipk_), allocatable :: sum(:)
!$ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s ! $ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s
!$ integer(psb_ipk_) :: nxt_val,old_val,saved_elem ! $ integer(psb_ipk_) :: nxt_val,old_val,saved_elem
!$ use_openmp = .true. ! $ use_openmp = .true.
info = psb_success_ info = psb_success_
@ -3135,74 +3135,74 @@ subroutine psb_c_mv_csr_from_coo(a,b,info)
a%irp(:) = 0 a%irp(:) = 0
if (use_openmp) then !!$ if (use_openmp) then
!$OMP PARALLEL default(none) & !!$ !$OMP PARALLEL default(none) &
!$OMP shared(sum,nthreads,nr,a,itemp,nza) & !!$ !$OMP shared(sum,nthreads,nr,a,itemp,nza) &
!$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) !!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
!!$
!$OMP DO schedule(STATIC) & !!$ !$OMP DO schedule(STATIC) &
!$OMP private(k,i) !!$ !$OMP private(k,i)
do k=1,nza !!$ do k=1,nza
i = itemp(k) !!$ i = itemp(k)
a%irp(i) = a%irp(i) + 1 !!$ a%irp(i) = a%irp(i) + 1
end do !!$ end do
!$OMP END DO !!$ !$OMP END DO
!!$
!$OMP SINGLE !!$ !$OMP SINGLE
!$ nthreads = omp_get_num_threads() !!$ !$ nthreads = omp_get_num_threads()
!$ allocate(sum(nthreads+1)) !!$ !$ allocate(sum(nthreads+1))
!$ sum(:) = 0 !!$ !$ sum(:) = 0
!$ sum(1) = 1 !!$ !$ sum(1) = 1
!$OMP END SINGLE !!$ !$OMP END SINGLE
!!$
!$ ithread = omp_get_thread_num() !!$ !$ ithread = omp_get_thread_num()
!!$
!$ work = nr/nthreads !!$ !$ work = nr/nthreads
!$ if (ithread < MOD(nr,nthreads)) then !!$ !$ if (ithread < MOD(nr,nthreads)) then
!$ work = work + 1 !!$ !$ work = work + 1
!$ first_idx = ithread*work + 1 !!$ !$ first_idx = ithread*work + 1
!$ else !!$ !$ else
!$ first_idx = ithread*work + MOD(nr,nthreads) + 1 !!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
!$ end if !!$ !$ end if
!!$
!$ last_idx = first_idx + work - 1 !!$ !$ last_idx = first_idx + work - 1
!!$
!$ s = 0 !!$ !$ s = 0
!$ do i=first_idx,last_idx !!$ !$ do i=first_idx,last_idx
!$ s = s + a%irp(i) !!$ !$ s = s + a%irp(i)
!$ end do !!$ !$ end do
!$ if (work > 0) then !!$ !$ if (work > 0) then
!$ sum(ithread+2) = s !!$ !$ sum(ithread+2) = s
!$ end if !!$ !$ end if
!!$
!$OMP BARRIER !!$ !$OMP BARRIER
!!$
!$OMP SINGLE !!$ !$OMP SINGLE
!$ do i=2,nthreads+1 !!$ !$ do i=2,nthreads+1
!$ sum(i) = sum(i) + sum(i-1) !!$ !$ sum(i) = sum(i) + sum(i-1)
!$ end do !!$ !$ end do
!$OMP END SINGLE !!$ !$OMP END SINGLE
!!$
!$ if (work > 0) then !!$ !$ if (work > 0) then
!$ saved_elem = a%irp(first_idx) !!$ !$ saved_elem = a%irp(first_idx)
!$ end if !!$ !$ end if
!$ if (ithread == 0) then !!$ !$ if (ithread == 0) then
!$ a%irp(1) = 1 !!$ !$ a%irp(1) = 1
!$ end if !!$ !$ end if
!!$
!$ if (work > 0) then !!$ !$ if (work > 0) then
!$ old_val = a%irp(first_idx+1) !!$ !$ old_val = a%irp(first_idx+1)
!$ a%irp(first_idx+1) = saved_elem + sum(ithread+1) !!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!$ end if !!$ !$ end if
!!$
!$ do i=first_idx+2,last_idx+1 !!$ !$ do i=first_idx+2,last_idx+1
!$ nxt_val = a%irp(i) !!$ !$ nxt_val = a%irp(i)
!$ a%irp(i) = a%irp(i-1) + old_val !!$ !$ a%irp(i) = a%irp(i-1) + old_val
!$ old_val = nxt_val !!$ !$ old_val = nxt_val
!$ end do !!$ !$ end do
!!$
!$OMP END PARALLEL !!$ !$OMP END PARALLEL
else !!$ else
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
a%irp(i) = a%irp(i) + 1 a%irp(i) = a%irp(i) + 1
@ -3214,7 +3214,7 @@ subroutine psb_c_mv_csr_from_coo(a,b,info)
ip = ip + ncl ip = ip + ncl
end do end do
a%irp(nr+1) = ip a%irp(nr+1) = ip
end if !!$ end if
call a%set_host() call a%set_host()

File diff suppressed because it is too large Load Diff

@ -2902,78 +2902,78 @@ subroutine psb_d_cp_csr_from_coo(a,b,info)
a%irp(:) = 0 a%irp(:) = 0
if (use_openmp) then !!$ if (use_openmp) then
!$ maxthreads = omp_get_max_threads() !!$ !$ maxthreads = omp_get_max_threads()
!$ allocate(sum(maxthreads+1)) !!$ !$ allocate(sum(maxthreads+1))
!$ sum(:) = 0 !!$ !$ sum(:) = 0
!$ sum(1) = 1 !!$ !$ sum(1) = 1
!!$
!$OMP PARALLEL default(none) & !!$ !$OMP PARALLEL default(none) &
!$OMP shared(nza,itemp,a,nthreads,sum,nr) & !!$ !$OMP shared(nza,itemp,a,nthreads,sum,nr) &
!$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) !!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
!!$
!$OMP DO schedule(STATIC) & !!$ !$OMP DO schedule(STATIC) &
!$OMP private(k,i) !!$ !$OMP private(k,i)
do k=1,nza !!$ do k=1,nza
i = itemp(k) !!$ i = itemp(k)
a%irp(i) = a%irp(i) + 1 !!$ a%irp(i) = a%irp(i) + 1
end do !!$ end do
!$OMP END DO !!$ !$OMP END DO
!!$
!$OMP SINGLE !!$ !$OMP SINGLE
!$ nthreads = omp_get_num_threads() !!$ !$ nthreads = omp_get_num_threads()
!$OMP END SINGLE !!$ !$OMP END SINGLE
!!$
!$ ithread = omp_get_thread_num() !!$ !$ ithread = omp_get_thread_num()
!!$
!$ work = nr/nthreads !!$ !$ work = nr/nthreads
!$ if (ithread < MOD(nr,nthreads)) then !!$ !$ if (ithread < MOD(nr,nthreads)) then
!$ work = work + 1 !!$ !$ work = work + 1
!$ first_idx = ithread*work + 1 !!$ !$ first_idx = ithread*work + 1
!$ else !!$ !$ else
!$ first_idx = ithread*work + MOD(nr,nthreads) + 1 !!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
!$ end if !!$ !$ end if
!!$
!$ last_idx = first_idx + work - 1 !!$ !$ last_idx = first_idx + work - 1
!!$
!$ s = 0 !!$ !$ s = 0
!$ do i=first_idx,last_idx !!$ !$ do i=first_idx,last_idx
!$ s = s + a%irp(i) !!$ !$ s = s + a%irp(i)
!$ end do !!$ !$ end do
!$ if (work > 0) then !!$ !$ if (work > 0) then
!$ sum(ithread+2) = s !!$ !$ sum(ithread+2) = s
!$ end if !!$ !$ end if
!!$
!$OMP BARRIER !!$ !$OMP BARRIER
!!$
!$OMP SINGLE !!$ !$OMP SINGLE
!$ do i=2,nthreads+1 !!$ !$ do i=2,nthreads+1
!$ sum(i) = sum(i) + sum(i-1) !!$ !$ sum(i) = sum(i) + sum(i-1)
!$ end do !!$ !$ end do
!$OMP END SINGLE !!$ !$OMP END SINGLE
!!$
!$ if (work > 0) then !!$ !$ if (work > 0) then
!$ saved_elem = a%irp(first_idx) !!$ !$ saved_elem = a%irp(first_idx)
!$ end if !!$ !$ end if
!$ if (ithread == 0) then !!$ !$ if (ithread == 0) then
!$ a%irp(1) = 1 !!$ !$ a%irp(1) = 1
!$ end if !!$ !$ end if
!!$
!$OMP BARRIER !!$ !$OMP BARRIER
!!$
!$ if (work > 0) then !!$ !$ if (work > 0) then
!$ old_val = a%irp(first_idx+1) !!$ !$ old_val = a%irp(first_idx+1)
!$ a%irp(first_idx+1) = saved_elem + sum(ithread+1) !!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!$ end if !!$ !$ end if
!!$
!$ do i=first_idx+2,last_idx+1 !!$ !$ do i=first_idx+2,last_idx+1
!$ nxt_val = a%irp(i) !!$ !$ nxt_val = a%irp(i)
!$ a%irp(i) = a%irp(i-1) + old_val !!$ !$ a%irp(i) = a%irp(i-1) + old_val
!$ old_val = nxt_val !!$ !$ old_val = nxt_val
!$ end do !!$ !$ end do
!!$
!$OMP END PARALLEL !!$ !$OMP END PARALLEL
else !!$ else
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
@ -2986,7 +2986,7 @@ subroutine psb_d_cp_csr_from_coo(a,b,info)
ip = ip + ncl ip = ip + ncl
end do end do
a%irp(nr+1) = ip a%irp(nr+1) = ip
end if !!$ end if
call a%set_host() call a%set_host()
@ -3104,10 +3104,10 @@ subroutine psb_d_mv_csr_from_coo(a,b,info)
character(len=20) :: name='mv_from_coo' character(len=20) :: name='mv_from_coo'
logical :: use_openmp = .false. logical :: use_openmp = .false.
!$ integer(psb_ipk_), allocatable :: sum(:) ! $ integer(psb_ipk_), allocatable :: sum(:)
!$ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s ! $ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s
!$ integer(psb_ipk_) :: nxt_val,old_val,saved_elem ! $ integer(psb_ipk_) :: nxt_val,old_val,saved_elem
!$ use_openmp = .true. ! $ use_openmp = .true.
info = psb_success_ info = psb_success_
@ -3135,74 +3135,74 @@ subroutine psb_d_mv_csr_from_coo(a,b,info)
a%irp(:) = 0 a%irp(:) = 0
if (use_openmp) then !!$ if (use_openmp) then
!$OMP PARALLEL default(none) & !!$ !$OMP PARALLEL default(none) &
!$OMP shared(sum,nthreads,nr,a,itemp,nza) & !!$ !$OMP shared(sum,nthreads,nr,a,itemp,nza) &
!$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) !!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
!!$
!$OMP DO schedule(STATIC) & !!$ !$OMP DO schedule(STATIC) &
!$OMP private(k,i) !!$ !$OMP private(k,i)
do k=1,nza !!$ do k=1,nza
i = itemp(k) !!$ i = itemp(k)
a%irp(i) = a%irp(i) + 1 !!$ a%irp(i) = a%irp(i) + 1
end do !!$ end do
!$OMP END DO !!$ !$OMP END DO
!!$
!$OMP SINGLE !!$ !$OMP SINGLE
!$ nthreads = omp_get_num_threads() !!$ !$ nthreads = omp_get_num_threads()
!$ allocate(sum(nthreads+1)) !!$ !$ allocate(sum(nthreads+1))
!$ sum(:) = 0 !!$ !$ sum(:) = 0
!$ sum(1) = 1 !!$ !$ sum(1) = 1
!$OMP END SINGLE !!$ !$OMP END SINGLE
!!$
!$ ithread = omp_get_thread_num() !!$ !$ ithread = omp_get_thread_num()
!!$
!$ work = nr/nthreads !!$ !$ work = nr/nthreads
!$ if (ithread < MOD(nr,nthreads)) then !!$ !$ if (ithread < MOD(nr,nthreads)) then
!$ work = work + 1 !!$ !$ work = work + 1
!$ first_idx = ithread*work + 1 !!$ !$ first_idx = ithread*work + 1
!$ else !!$ !$ else
!$ first_idx = ithread*work + MOD(nr,nthreads) + 1 !!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
!$ end if !!$ !$ end if
!!$
!$ last_idx = first_idx + work - 1 !!$ !$ last_idx = first_idx + work - 1
!!$
!$ s = 0 !!$ !$ s = 0
!$ do i=first_idx,last_idx !!$ !$ do i=first_idx,last_idx
!$ s = s + a%irp(i) !!$ !$ s = s + a%irp(i)
!$ end do !!$ !$ end do
!$ if (work > 0) then !!$ !$ if (work > 0) then
!$ sum(ithread+2) = s !!$ !$ sum(ithread+2) = s
!$ end if !!$ !$ end if
!!$
!$OMP BARRIER !!$ !$OMP BARRIER
!!$
!$OMP SINGLE !!$ !$OMP SINGLE
!$ do i=2,nthreads+1 !!$ !$ do i=2,nthreads+1
!$ sum(i) = sum(i) + sum(i-1) !!$ !$ sum(i) = sum(i) + sum(i-1)
!$ end do !!$ !$ end do
!$OMP END SINGLE !!$ !$OMP END SINGLE
!!$
!$ if (work > 0) then !!$ !$ if (work > 0) then
!$ saved_elem = a%irp(first_idx) !!$ !$ saved_elem = a%irp(first_idx)
!$ end if !!$ !$ end if
!$ if (ithread == 0) then !!$ !$ if (ithread == 0) then
!$ a%irp(1) = 1 !!$ !$ a%irp(1) = 1
!$ end if !!$ !$ end if
!!$
!$ if (work > 0) then !!$ !$ if (work > 0) then
!$ old_val = a%irp(first_idx+1) !!$ !$ old_val = a%irp(first_idx+1)
!$ a%irp(first_idx+1) = saved_elem + sum(ithread+1) !!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!$ end if !!$ !$ end if
!!$
!$ do i=first_idx+2,last_idx+1 !!$ !$ do i=first_idx+2,last_idx+1
!$ nxt_val = a%irp(i) !!$ !$ nxt_val = a%irp(i)
!$ a%irp(i) = a%irp(i-1) + old_val !!$ !$ a%irp(i) = a%irp(i-1) + old_val
!$ old_val = nxt_val !!$ !$ old_val = nxt_val
!$ end do !!$ !$ end do
!!$
!$OMP END PARALLEL !!$ !$OMP END PARALLEL
else !!$ else
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
a%irp(i) = a%irp(i) + 1 a%irp(i) = a%irp(i) + 1
@ -3214,7 +3214,7 @@ subroutine psb_d_mv_csr_from_coo(a,b,info)
ip = ip + ncl ip = ip + ncl
end do end do
a%irp(nr+1) = ip a%irp(nr+1) = ip
end if !!$ end if
call a%set_host() call a%set_host()

File diff suppressed because it is too large Load Diff

@ -2902,78 +2902,78 @@ subroutine psb_s_cp_csr_from_coo(a,b,info)
a%irp(:) = 0 a%irp(:) = 0
if (use_openmp) then !!$ if (use_openmp) then
!$ maxthreads = omp_get_max_threads() !!$ !$ maxthreads = omp_get_max_threads()
!$ allocate(sum(maxthreads+1)) !!$ !$ allocate(sum(maxthreads+1))
!$ sum(:) = 0 !!$ !$ sum(:) = 0
!$ sum(1) = 1 !!$ !$ sum(1) = 1
!!$
!$OMP PARALLEL default(none) & !!$ !$OMP PARALLEL default(none) &
!$OMP shared(nza,itemp,a,nthreads,sum,nr) & !!$ !$OMP shared(nza,itemp,a,nthreads,sum,nr) &
!$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) !!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
!!$
!$OMP DO schedule(STATIC) & !!$ !$OMP DO schedule(STATIC) &
!$OMP private(k,i) !!$ !$OMP private(k,i)
do k=1,nza !!$ do k=1,nza
i = itemp(k) !!$ i = itemp(k)
a%irp(i) = a%irp(i) + 1 !!$ a%irp(i) = a%irp(i) + 1
end do !!$ end do
!$OMP END DO !!$ !$OMP END DO
!!$
!$OMP SINGLE !!$ !$OMP SINGLE
!$ nthreads = omp_get_num_threads() !!$ !$ nthreads = omp_get_num_threads()
!$OMP END SINGLE !!$ !$OMP END SINGLE
!!$
!$ ithread = omp_get_thread_num() !!$ !$ ithread = omp_get_thread_num()
!!$
!$ work = nr/nthreads !!$ !$ work = nr/nthreads
!$ if (ithread < MOD(nr,nthreads)) then !!$ !$ if (ithread < MOD(nr,nthreads)) then
!$ work = work + 1 !!$ !$ work = work + 1
!$ first_idx = ithread*work + 1 !!$ !$ first_idx = ithread*work + 1
!$ else !!$ !$ else
!$ first_idx = ithread*work + MOD(nr,nthreads) + 1 !!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
!$ end if !!$ !$ end if
!!$
!$ last_idx = first_idx + work - 1 !!$ !$ last_idx = first_idx + work - 1
!!$
!$ s = 0 !!$ !$ s = 0
!$ do i=first_idx,last_idx !!$ !$ do i=first_idx,last_idx
!$ s = s + a%irp(i) !!$ !$ s = s + a%irp(i)
!$ end do !!$ !$ end do
!$ if (work > 0) then !!$ !$ if (work > 0) then
!$ sum(ithread+2) = s !!$ !$ sum(ithread+2) = s
!$ end if !!$ !$ end if
!!$
!$OMP BARRIER !!$ !$OMP BARRIER
!!$
!$OMP SINGLE !!$ !$OMP SINGLE
!$ do i=2,nthreads+1 !!$ !$ do i=2,nthreads+1
!$ sum(i) = sum(i) + sum(i-1) !!$ !$ sum(i) = sum(i) + sum(i-1)
!$ end do !!$ !$ end do
!$OMP END SINGLE !!$ !$OMP END SINGLE
!!$
!$ if (work > 0) then !!$ !$ if (work > 0) then
!$ saved_elem = a%irp(first_idx) !!$ !$ saved_elem = a%irp(first_idx)
!$ end if !!$ !$ end if
!$ if (ithread == 0) then !!$ !$ if (ithread == 0) then
!$ a%irp(1) = 1 !!$ !$ a%irp(1) = 1
!$ end if !!$ !$ end if
!!$
!$OMP BARRIER !!$ !$OMP BARRIER
!!$
!$ if (work > 0) then !!$ !$ if (work > 0) then
!$ old_val = a%irp(first_idx+1) !!$ !$ old_val = a%irp(first_idx+1)
!$ a%irp(first_idx+1) = saved_elem + sum(ithread+1) !!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!$ end if !!$ !$ end if
!!$
!$ do i=first_idx+2,last_idx+1 !!$ !$ do i=first_idx+2,last_idx+1
!$ nxt_val = a%irp(i) !!$ !$ nxt_val = a%irp(i)
!$ a%irp(i) = a%irp(i-1) + old_val !!$ !$ a%irp(i) = a%irp(i-1) + old_val
!$ old_val = nxt_val !!$ !$ old_val = nxt_val
!$ end do !!$ !$ end do
!!$
!$OMP END PARALLEL !!$ !$OMP END PARALLEL
else !!$ else
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
@ -2986,7 +2986,7 @@ subroutine psb_s_cp_csr_from_coo(a,b,info)
ip = ip + ncl ip = ip + ncl
end do end do
a%irp(nr+1) = ip a%irp(nr+1) = ip
end if !!$ end if
call a%set_host() call a%set_host()
@ -3104,10 +3104,10 @@ subroutine psb_s_mv_csr_from_coo(a,b,info)
character(len=20) :: name='mv_from_coo' character(len=20) :: name='mv_from_coo'
logical :: use_openmp = .false. logical :: use_openmp = .false.
!$ integer(psb_ipk_), allocatable :: sum(:) ! $ integer(psb_ipk_), allocatable :: sum(:)
!$ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s ! $ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s
!$ integer(psb_ipk_) :: nxt_val,old_val,saved_elem ! $ integer(psb_ipk_) :: nxt_val,old_val,saved_elem
!$ use_openmp = .true. ! $ use_openmp = .true.
info = psb_success_ info = psb_success_
@ -3135,74 +3135,74 @@ subroutine psb_s_mv_csr_from_coo(a,b,info)
a%irp(:) = 0 a%irp(:) = 0
if (use_openmp) then !!$ if (use_openmp) then
!$OMP PARALLEL default(none) & !!$ !$OMP PARALLEL default(none) &
!$OMP shared(sum,nthreads,nr,a,itemp,nza) & !!$ !$OMP shared(sum,nthreads,nr,a,itemp,nza) &
!$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) !!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
!!$
!$OMP DO schedule(STATIC) & !!$ !$OMP DO schedule(STATIC) &
!$OMP private(k,i) !!$ !$OMP private(k,i)
do k=1,nza !!$ do k=1,nza
i = itemp(k) !!$ i = itemp(k)
a%irp(i) = a%irp(i) + 1 !!$ a%irp(i) = a%irp(i) + 1
end do !!$ end do
!$OMP END DO !!$ !$OMP END DO
!!$
!$OMP SINGLE !!$ !$OMP SINGLE
!$ nthreads = omp_get_num_threads() !!$ !$ nthreads = omp_get_num_threads()
!$ allocate(sum(nthreads+1)) !!$ !$ allocate(sum(nthreads+1))
!$ sum(:) = 0 !!$ !$ sum(:) = 0
!$ sum(1) = 1 !!$ !$ sum(1) = 1
!$OMP END SINGLE !!$ !$OMP END SINGLE
!!$
!$ ithread = omp_get_thread_num() !!$ !$ ithread = omp_get_thread_num()
!!$
!$ work = nr/nthreads !!$ !$ work = nr/nthreads
!$ if (ithread < MOD(nr,nthreads)) then !!$ !$ if (ithread < MOD(nr,nthreads)) then
!$ work = work + 1 !!$ !$ work = work + 1
!$ first_idx = ithread*work + 1 !!$ !$ first_idx = ithread*work + 1
!$ else !!$ !$ else
!$ first_idx = ithread*work + MOD(nr,nthreads) + 1 !!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
!$ end if !!$ !$ end if
!!$
!$ last_idx = first_idx + work - 1 !!$ !$ last_idx = first_idx + work - 1
!!$
!$ s = 0 !!$ !$ s = 0
!$ do i=first_idx,last_idx !!$ !$ do i=first_idx,last_idx
!$ s = s + a%irp(i) !!$ !$ s = s + a%irp(i)
!$ end do !!$ !$ end do
!$ if (work > 0) then !!$ !$ if (work > 0) then
!$ sum(ithread+2) = s !!$ !$ sum(ithread+2) = s
!$ end if !!$ !$ end if
!!$
!$OMP BARRIER !!$ !$OMP BARRIER
!!$
!$OMP SINGLE !!$ !$OMP SINGLE
!$ do i=2,nthreads+1 !!$ !$ do i=2,nthreads+1
!$ sum(i) = sum(i) + sum(i-1) !!$ !$ sum(i) = sum(i) + sum(i-1)
!$ end do !!$ !$ end do
!$OMP END SINGLE !!$ !$OMP END SINGLE
!!$
!$ if (work > 0) then !!$ !$ if (work > 0) then
!$ saved_elem = a%irp(first_idx) !!$ !$ saved_elem = a%irp(first_idx)
!$ end if !!$ !$ end if
!$ if (ithread == 0) then !!$ !$ if (ithread == 0) then
!$ a%irp(1) = 1 !!$ !$ a%irp(1) = 1
!$ end if !!$ !$ end if
!!$
!$ if (work > 0) then !!$ !$ if (work > 0) then
!$ old_val = a%irp(first_idx+1) !!$ !$ old_val = a%irp(first_idx+1)
!$ a%irp(first_idx+1) = saved_elem + sum(ithread+1) !!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!$ end if !!$ !$ end if
!!$
!$ do i=first_idx+2,last_idx+1 !!$ !$ do i=first_idx+2,last_idx+1
!$ nxt_val = a%irp(i) !!$ !$ nxt_val = a%irp(i)
!$ a%irp(i) = a%irp(i-1) + old_val !!$ !$ a%irp(i) = a%irp(i-1) + old_val
!$ old_val = nxt_val !!$ !$ old_val = nxt_val
!$ end do !!$ !$ end do
!!$
!$OMP END PARALLEL !!$ !$OMP END PARALLEL
else !!$ else
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
a%irp(i) = a%irp(i) + 1 a%irp(i) = a%irp(i) + 1
@ -3214,7 +3214,7 @@ subroutine psb_s_mv_csr_from_coo(a,b,info)
ip = ip + ncl ip = ip + ncl
end do end do
a%irp(nr+1) = ip a%irp(nr+1) = ip
end if !!$ end if
call a%set_host() call a%set_host()

File diff suppressed because it is too large Load Diff

@ -2902,78 +2902,78 @@ subroutine psb_z_cp_csr_from_coo(a,b,info)
a%irp(:) = 0 a%irp(:) = 0
if (use_openmp) then !!$ if (use_openmp) then
!$ maxthreads = omp_get_max_threads() !!$ !$ maxthreads = omp_get_max_threads()
!$ allocate(sum(maxthreads+1)) !!$ !$ allocate(sum(maxthreads+1))
!$ sum(:) = 0 !!$ !$ sum(:) = 0
!$ sum(1) = 1 !!$ !$ sum(1) = 1
!!$
!$OMP PARALLEL default(none) & !!$ !$OMP PARALLEL default(none) &
!$OMP shared(nza,itemp,a,nthreads,sum,nr) & !!$ !$OMP shared(nza,itemp,a,nthreads,sum,nr) &
!$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) !!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
!!$
!$OMP DO schedule(STATIC) & !!$ !$OMP DO schedule(STATIC) &
!$OMP private(k,i) !!$ !$OMP private(k,i)
do k=1,nza !!$ do k=1,nza
i = itemp(k) !!$ i = itemp(k)
a%irp(i) = a%irp(i) + 1 !!$ a%irp(i) = a%irp(i) + 1
end do !!$ end do
!$OMP END DO !!$ !$OMP END DO
!!$
!$OMP SINGLE !!$ !$OMP SINGLE
!$ nthreads = omp_get_num_threads() !!$ !$ nthreads = omp_get_num_threads()
!$OMP END SINGLE !!$ !$OMP END SINGLE
!!$
!$ ithread = omp_get_thread_num() !!$ !$ ithread = omp_get_thread_num()
!!$
!$ work = nr/nthreads !!$ !$ work = nr/nthreads
!$ if (ithread < MOD(nr,nthreads)) then !!$ !$ if (ithread < MOD(nr,nthreads)) then
!$ work = work + 1 !!$ !$ work = work + 1
!$ first_idx = ithread*work + 1 !!$ !$ first_idx = ithread*work + 1
!$ else !!$ !$ else
!$ first_idx = ithread*work + MOD(nr,nthreads) + 1 !!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
!$ end if !!$ !$ end if
!!$
!$ last_idx = first_idx + work - 1 !!$ !$ last_idx = first_idx + work - 1
!!$
!$ s = 0 !!$ !$ s = 0
!$ do i=first_idx,last_idx !!$ !$ do i=first_idx,last_idx
!$ s = s + a%irp(i) !!$ !$ s = s + a%irp(i)
!$ end do !!$ !$ end do
!$ if (work > 0) then !!$ !$ if (work > 0) then
!$ sum(ithread+2) = s !!$ !$ sum(ithread+2) = s
!$ end if !!$ !$ end if
!!$
!$OMP BARRIER !!$ !$OMP BARRIER
!!$
!$OMP SINGLE !!$ !$OMP SINGLE
!$ do i=2,nthreads+1 !!$ !$ do i=2,nthreads+1
!$ sum(i) = sum(i) + sum(i-1) !!$ !$ sum(i) = sum(i) + sum(i-1)
!$ end do !!$ !$ end do
!$OMP END SINGLE !!$ !$OMP END SINGLE
!!$
!$ if (work > 0) then !!$ !$ if (work > 0) then
!$ saved_elem = a%irp(first_idx) !!$ !$ saved_elem = a%irp(first_idx)
!$ end if !!$ !$ end if
!$ if (ithread == 0) then !!$ !$ if (ithread == 0) then
!$ a%irp(1) = 1 !!$ !$ a%irp(1) = 1
!$ end if !!$ !$ end if
!!$
!$OMP BARRIER !!$ !$OMP BARRIER
!!$
!$ if (work > 0) then !!$ !$ if (work > 0) then
!$ old_val = a%irp(first_idx+1) !!$ !$ old_val = a%irp(first_idx+1)
!$ a%irp(first_idx+1) = saved_elem + sum(ithread+1) !!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!$ end if !!$ !$ end if
!!$
!$ do i=first_idx+2,last_idx+1 !!$ !$ do i=first_idx+2,last_idx+1
!$ nxt_val = a%irp(i) !!$ !$ nxt_val = a%irp(i)
!$ a%irp(i) = a%irp(i-1) + old_val !!$ !$ a%irp(i) = a%irp(i-1) + old_val
!$ old_val = nxt_val !!$ !$ old_val = nxt_val
!$ end do !!$ !$ end do
!!$
!$OMP END PARALLEL !!$ !$OMP END PARALLEL
else !!$ else
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
@ -2986,7 +2986,7 @@ subroutine psb_z_cp_csr_from_coo(a,b,info)
ip = ip + ncl ip = ip + ncl
end do end do
a%irp(nr+1) = ip a%irp(nr+1) = ip
end if !!$ end if
call a%set_host() call a%set_host()
@ -3104,10 +3104,10 @@ subroutine psb_z_mv_csr_from_coo(a,b,info)
character(len=20) :: name='mv_from_coo' character(len=20) :: name='mv_from_coo'
logical :: use_openmp = .false. logical :: use_openmp = .false.
!$ integer(psb_ipk_), allocatable :: sum(:) ! $ integer(psb_ipk_), allocatable :: sum(:)
!$ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s ! $ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s
!$ integer(psb_ipk_) :: nxt_val,old_val,saved_elem ! $ integer(psb_ipk_) :: nxt_val,old_val,saved_elem
!$ use_openmp = .true. ! $ use_openmp = .true.
info = psb_success_ info = psb_success_
@ -3135,74 +3135,74 @@ subroutine psb_z_mv_csr_from_coo(a,b,info)
a%irp(:) = 0 a%irp(:) = 0
if (use_openmp) then !!$ if (use_openmp) then
!$OMP PARALLEL default(none) & !!$ !$OMP PARALLEL default(none) &
!$OMP shared(sum,nthreads,nr,a,itemp,nza) & !!$ !$OMP shared(sum,nthreads,nr,a,itemp,nza) &
!$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) !!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
!!$
!$OMP DO schedule(STATIC) & !!$ !$OMP DO schedule(STATIC) &
!$OMP private(k,i) !!$ !$OMP private(k,i)
do k=1,nza !!$ do k=1,nza
i = itemp(k) !!$ i = itemp(k)
a%irp(i) = a%irp(i) + 1 !!$ a%irp(i) = a%irp(i) + 1
end do !!$ end do
!$OMP END DO !!$ !$OMP END DO
!!$
!$OMP SINGLE !!$ !$OMP SINGLE
!$ nthreads = omp_get_num_threads() !!$ !$ nthreads = omp_get_num_threads()
!$ allocate(sum(nthreads+1)) !!$ !$ allocate(sum(nthreads+1))
!$ sum(:) = 0 !!$ !$ sum(:) = 0
!$ sum(1) = 1 !!$ !$ sum(1) = 1
!$OMP END SINGLE !!$ !$OMP END SINGLE
!!$
!$ ithread = omp_get_thread_num() !!$ !$ ithread = omp_get_thread_num()
!!$
!$ work = nr/nthreads !!$ !$ work = nr/nthreads
!$ if (ithread < MOD(nr,nthreads)) then !!$ !$ if (ithread < MOD(nr,nthreads)) then
!$ work = work + 1 !!$ !$ work = work + 1
!$ first_idx = ithread*work + 1 !!$ !$ first_idx = ithread*work + 1
!$ else !!$ !$ else
!$ first_idx = ithread*work + MOD(nr,nthreads) + 1 !!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
!$ end if !!$ !$ end if
!!$
!$ last_idx = first_idx + work - 1 !!$ !$ last_idx = first_idx + work - 1
!!$
!$ s = 0 !!$ !$ s = 0
!$ do i=first_idx,last_idx !!$ !$ do i=first_idx,last_idx
!$ s = s + a%irp(i) !!$ !$ s = s + a%irp(i)
!$ end do !!$ !$ end do
!$ if (work > 0) then !!$ !$ if (work > 0) then
!$ sum(ithread+2) = s !!$ !$ sum(ithread+2) = s
!$ end if !!$ !$ end if
!!$
!$OMP BARRIER !!$ !$OMP BARRIER
!!$
!$OMP SINGLE !!$ !$OMP SINGLE
!$ do i=2,nthreads+1 !!$ !$ do i=2,nthreads+1
!$ sum(i) = sum(i) + sum(i-1) !!$ !$ sum(i) = sum(i) + sum(i-1)
!$ end do !!$ !$ end do
!$OMP END SINGLE !!$ !$OMP END SINGLE
!!$
!$ if (work > 0) then !!$ !$ if (work > 0) then
!$ saved_elem = a%irp(first_idx) !!$ !$ saved_elem = a%irp(first_idx)
!$ end if !!$ !$ end if
!$ if (ithread == 0) then !!$ !$ if (ithread == 0) then
!$ a%irp(1) = 1 !!$ !$ a%irp(1) = 1
!$ end if !!$ !$ end if
!!$
!$ if (work > 0) then !!$ !$ if (work > 0) then
!$ old_val = a%irp(first_idx+1) !!$ !$ old_val = a%irp(first_idx+1)
!$ a%irp(first_idx+1) = saved_elem + sum(ithread+1) !!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!$ end if !!$ !$ end if
!!$
!$ do i=first_idx+2,last_idx+1 !!$ !$ do i=first_idx+2,last_idx+1
!$ nxt_val = a%irp(i) !!$ !$ nxt_val = a%irp(i)
!$ a%irp(i) = a%irp(i-1) + old_val !!$ !$ a%irp(i) = a%irp(i-1) + old_val
!$ old_val = nxt_val !!$ !$ old_val = nxt_val
!$ end do !!$ !$ end do
!!$
!$OMP END PARALLEL !!$ !$OMP END PARALLEL
else !!$ else
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
a%irp(i) = a%irp(i) + 1 a%irp(i) = a%irp(i) + 1
@ -3214,7 +3214,7 @@ subroutine psb_z_mv_csr_from_coo(a,b,info)
ip = ip + ncl ip = ip + ncl
end do end do
a%irp(nr+1) = ip a%irp(nr+1) = ip
end if !!$ end if
call a%set_host() call a%set_host()

Loading…
Cancel
Save