Swicth csr_impl to F90

omp-threadsafe
Salvatore Filippone 2 years ago
parent db0577cd07
commit afdbac6727

@ -2837,6 +2837,9 @@ subroutine psb_c_cp_csr_from_coo(a,b,info)
use psb_realloc_mod use psb_realloc_mod
use psb_c_base_mat_mod use psb_c_base_mat_mod
use psb_c_csr_mat_mod, psb_protect_name => psb_c_cp_csr_from_coo use psb_c_csr_mat_mod, psb_protect_name => psb_c_cp_csr_from_coo
#if defined(OPENMP)
use omp_lib
#endif
implicit none implicit none
class(psb_c_csr_sparse_mat), intent(inout) :: a class(psb_c_csr_sparse_mat), intent(inout) :: a
@ -2851,13 +2854,11 @@ subroutine psb_c_cp_csr_from_coo(a,b,info)
integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_), Parameter :: maxtry=8
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='c_cp_csr_from_coo' character(len=20) :: name='c_cp_csr_from_coo'
logical :: use_openmp = .false. #if defined(OPENMP)
integer(psb_ipk_), allocatable :: sum(:)
!$ integer(psb_ipk_), allocatable :: sum(:) integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j
!$ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads
!$ integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads #endif
!$ use_openmp = .true.
info = psb_success_ info = psb_success_
debug_unit = psb_get_debug_unit() debug_unit = psb_get_debug_unit()
@ -2902,94 +2903,93 @@ subroutine psb_c_cp_csr_from_coo(a,b,info)
a%irp(:) = 0 a%irp(:) = 0
!!$ if (use_openmp) then #if defined(OPENMP)
!!$ !$ maxthreads = omp_get_max_threads() maxthreads = omp_get_max_threads()
!!$ !$ allocate(sum(maxthreads+1)) allocate(sum(maxthreads+1))
!!$ !$ sum(:) = 0 sum(:) = 0
!!$ !$ sum(1) = 1 sum(1) = 1
!!$
!!$ !$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!!$ !$OMP shared(nza,itemp,a,nthreads,sum,nr) & !$OMP shared(nza,itemp,a,nthreads,sum,nr) &
!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
!!$
!!$ !$OMP DO schedule(STATIC) & !$OMP DO schedule(STATIC) &
!!$ !$OMP private(k,i) !$OMP private(k,i)
!!$ do k=1,nza do k=1,nza
!!$ i = itemp(k) i = itemp(k)
!!$ a%irp(i) = a%irp(i) + 1 a%irp(i) = a%irp(i) + 1
!!$ end do end do
!!$ !$OMP END DO !$OMP END DO
!!$
!!$ !$OMP SINGLE !$OMP SINGLE
!!$ !$ nthreads = omp_get_num_threads() nthreads = omp_get_num_threads()
!!$ !$OMP END SINGLE !$OMP END SINGLE
!!$
!!$ !$ ithread = omp_get_thread_num() ithread = omp_get_thread_num()
!!$
!!$ !$ work = nr/nthreads work = nr/nthreads
!!$ !$ if (ithread < MOD(nr,nthreads)) then if (ithread < MOD(nr,nthreads)) then
!!$ !$ work = work + 1 work = work + 1
!!$ !$ first_idx = ithread*work + 1 first_idx = ithread*work + 1
!!$ !$ else else
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1 first_idx = ithread*work + MOD(nr,nthreads) + 1
!!$ !$ end if end if
!!$
!!$ !$ last_idx = first_idx + work - 1 last_idx = first_idx + work - 1
!!$
!!$ !$ s = 0 s = 0
!!$ !$ do i=first_idx,last_idx do i=first_idx,last_idx
!!$ !$ s = s + a%irp(i) s = s + a%irp(i)
!!$ !$ end do end do
!!$ !$ if (work > 0) then if (work > 0) then
!!$ !$ sum(ithread+2) = s sum(ithread+2) = s
!!$ !$ end if end if
!!$
!!$ !$OMP BARRIER !$OMP BARRIER
!!$
!!$ !$OMP SINGLE !$OMP SINGLE
!!$ !$ do i=2,nthreads+1 do i=2,nthreads+1
!!$ !$ sum(i) = sum(i) + sum(i-1) sum(i) = sum(i) + sum(i-1)
!!$ !$ end do end do
!!$ !$OMP END SINGLE !$OMP END SINGLE
!!$
!!$ !$ if (work > 0) then if (work > 0) then
!!$ !$ saved_elem = a%irp(first_idx) saved_elem = a%irp(first_idx)
!!$ !$ end if end if
!!$ !$ if (ithread == 0) then if (ithread == 0) then
!!$ !$ a%irp(1) = 1 a%irp(1) = 1
!!$ !$ end if end if
!!$
!!$ !$OMP BARRIER !$OMP BARRIER
!!$
!!$ !$ if (work > 0) then if (work > 0) then
!!$ !$ old_val = a%irp(first_idx+1) old_val = a%irp(first_idx+1)
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1) a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!!$ !$ end if end if
!!$
!!$ !$ do i=first_idx+2,last_idx+1 do i=first_idx+2,last_idx+1
!!$ !$ nxt_val = a%irp(i) nxt_val = a%irp(i)
!!$ !$ a%irp(i) = a%irp(i-1) + old_val a%irp(i) = a%irp(i-1) + old_val
!!$ !$ old_val = nxt_val old_val = nxt_val
!!$ !$ end do end do
!!$
!!$ !$OMP END PARALLEL !$OMP END PARALLEL
!!$ else #else
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
a%irp(i) = a%irp(i) + 1 a%irp(i) = a%irp(i) + 1
end do end do
ip = 1 ip = 1
do i=1,nr do i=1,nr
ncl = a%irp(i) ncl = a%irp(i)
a%irp(i) = ip a%irp(i) = ip
ip = ip + ncl ip = ip + ncl
end do end do
a%irp(nr+1) = ip a%irp(nr+1) = ip
!!$ end if #endif
call a%set_host() call a%set_host()
end subroutine psb_c_cp_csr_from_coo end subroutine psb_c_cp_csr_from_coo
@ -3089,6 +3089,9 @@ subroutine psb_c_mv_csr_from_coo(a,b,info)
use psb_error_mod use psb_error_mod
use psb_c_base_mat_mod use psb_c_base_mat_mod
use psb_c_csr_mat_mod, psb_protect_name => psb_c_mv_csr_from_coo use psb_c_csr_mat_mod, psb_protect_name => psb_c_mv_csr_from_coo
#if defined(OPENMP)
use omp_lib
#endif
implicit none implicit none
class(psb_c_csr_sparse_mat), intent(inout) :: a class(psb_c_csr_sparse_mat), intent(inout) :: a
@ -3102,12 +3105,12 @@ subroutine psb_c_mv_csr_from_coo(a,b,info)
integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_), Parameter :: maxtry=8
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='mv_from_coo' character(len=20) :: name='mv_from_coo'
logical :: use_openmp = .false.
! $ integer(psb_ipk_), allocatable :: sum(:) #if defined(OPENMP)
! $ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s integer(psb_ipk_), allocatable :: sum(:)
! $ integer(psb_ipk_) :: nxt_val,old_val,saved_elem integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s
! $ use_openmp = .true. integer(psb_ipk_) :: nxt_val,old_val,saved_elem
#endif
info = psb_success_ info = psb_success_
@ -3135,74 +3138,74 @@ subroutine psb_c_mv_csr_from_coo(a,b,info)
a%irp(:) = 0 a%irp(:) = 0
!!$ if (use_openmp) then #if defined(OPENMP)
!!$ !$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!!$ !$OMP shared(sum,nthreads,nr,a,itemp,nza) & !$OMP shared(sum,nthreads,nr,a,itemp,nza) &
!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
!!$
!!$ !$OMP DO schedule(STATIC) & !$OMP DO schedule(STATIC) &
!!$ !$OMP private(k,i) !$OMP private(k,i)
!!$ do k=1,nza do k=1,nza
!!$ i = itemp(k) i = itemp(k)
!!$ a%irp(i) = a%irp(i) + 1 a%irp(i) = a%irp(i) + 1
!!$ end do end do
!!$ !$OMP END DO !$OMP END DO
!!$
!!$ !$OMP SINGLE !$OMP SINGLE
!!$ !$ nthreads = omp_get_num_threads() nthreads = omp_get_num_threads()
!!$ !$ allocate(sum(nthreads+1)) allocate(sum(nthreads+1))
!!$ !$ sum(:) = 0 sum(:) = 0
!!$ !$ sum(1) = 1 sum(1) = 1
!!$ !$OMP END SINGLE !$OMP END SINGLE
!!$
!!$ !$ ithread = omp_get_thread_num() ithread = omp_get_thread_num()
!!$
!!$ !$ work = nr/nthreads work = nr/nthreads
!!$ !$ if (ithread < MOD(nr,nthreads)) then if (ithread < MOD(nr,nthreads)) then
!!$ !$ work = work + 1 work = work + 1
!!$ !$ first_idx = ithread*work + 1 first_idx = ithread*work + 1
!!$ !$ else else
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1 first_idx = ithread*work + MOD(nr,nthreads) + 1
!!$ !$ end if end if
!!$
!!$ !$ last_idx = first_idx + work - 1 last_idx = first_idx + work - 1
!!$
!!$ !$ s = 0 s = 0
!!$ !$ do i=first_idx,last_idx do i=first_idx,last_idx
!!$ !$ s = s + a%irp(i) s = s + a%irp(i)
!!$ !$ end do end do
!!$ !$ if (work > 0) then if (work > 0) then
!!$ !$ sum(ithread+2) = s sum(ithread+2) = s
!!$ !$ end if end if
!!$
!!$ !$OMP BARRIER !$OMP BARRIER
!!$
!!$ !$OMP SINGLE !$OMP SINGLE
!!$ !$ do i=2,nthreads+1 do i=2,nthreads+1
!!$ !$ sum(i) = sum(i) + sum(i-1) sum(i) = sum(i) + sum(i-1)
!!$ !$ end do end do
!!$ !$OMP END SINGLE !$OMP END SINGLE
!!$
!!$ !$ if (work > 0) then if (work > 0) then
!!$ !$ saved_elem = a%irp(first_idx) saved_elem = a%irp(first_idx)
!!$ !$ end if end if
!!$ !$ if (ithread == 0) then if (ithread == 0) then
!!$ !$ a%irp(1) = 1 a%irp(1) = 1
!!$ !$ end if end if
!!$
!!$ !$ if (work > 0) then if (work > 0) then
!!$ !$ old_val = a%irp(first_idx+1) old_val = a%irp(first_idx+1)
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1) a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!!$ !$ end if end if
!!$
!!$ !$ do i=first_idx+2,last_idx+1 do i=first_idx+2,last_idx+1
!!$ !$ nxt_val = a%irp(i) nxt_val = a%irp(i)
!!$ !$ a%irp(i) = a%irp(i-1) + old_val a%irp(i) = a%irp(i-1) + old_val
!!$ !$ old_val = nxt_val old_val = nxt_val
!!$ !$ end do end do
!!$
!!$ !$OMP END PARALLEL !$OMP END PARALLEL
!!$ else #else
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
a%irp(i) = a%irp(i) + 1 a%irp(i) = a%irp(i) + 1
@ -3214,7 +3217,7 @@ subroutine psb_c_mv_csr_from_coo(a,b,info)
ip = ip + ncl ip = ip + ncl
end do end do
a%irp(nr+1) = ip a%irp(nr+1) = ip
!!$ end if #endif
call a%set_host() call a%set_host()

@ -2837,6 +2837,9 @@ subroutine psb_d_cp_csr_from_coo(a,b,info)
use psb_realloc_mod use psb_realloc_mod
use psb_d_base_mat_mod use psb_d_base_mat_mod
use psb_d_csr_mat_mod, psb_protect_name => psb_d_cp_csr_from_coo use psb_d_csr_mat_mod, psb_protect_name => psb_d_cp_csr_from_coo
#if defined(OPENMP)
use omp_lib
#endif
implicit none implicit none
class(psb_d_csr_sparse_mat), intent(inout) :: a class(psb_d_csr_sparse_mat), intent(inout) :: a
@ -2851,13 +2854,11 @@ subroutine psb_d_cp_csr_from_coo(a,b,info)
integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_), Parameter :: maxtry=8
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='d_cp_csr_from_coo' character(len=20) :: name='d_cp_csr_from_coo'
logical :: use_openmp = .false. #if defined(OPENMP)
integer(psb_ipk_), allocatable :: sum(:)
!$ integer(psb_ipk_), allocatable :: sum(:) integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j
!$ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads
!$ integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads #endif
!$ use_openmp = .true.
info = psb_success_ info = psb_success_
debug_unit = psb_get_debug_unit() debug_unit = psb_get_debug_unit()
@ -2902,94 +2903,93 @@ subroutine psb_d_cp_csr_from_coo(a,b,info)
a%irp(:) = 0 a%irp(:) = 0
!!$ if (use_openmp) then #if defined(OPENMP)
!!$ !$ maxthreads = omp_get_max_threads() maxthreads = omp_get_max_threads()
!!$ !$ allocate(sum(maxthreads+1)) allocate(sum(maxthreads+1))
!!$ !$ sum(:) = 0 sum(:) = 0
!!$ !$ sum(1) = 1 sum(1) = 1
!!$
!!$ !$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!!$ !$OMP shared(nza,itemp,a,nthreads,sum,nr) & !$OMP shared(nza,itemp,a,nthreads,sum,nr) &
!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
!!$
!!$ !$OMP DO schedule(STATIC) & !$OMP DO schedule(STATIC) &
!!$ !$OMP private(k,i) !$OMP private(k,i)
!!$ do k=1,nza do k=1,nza
!!$ i = itemp(k) i = itemp(k)
!!$ a%irp(i) = a%irp(i) + 1 a%irp(i) = a%irp(i) + 1
!!$ end do end do
!!$ !$OMP END DO !$OMP END DO
!!$
!!$ !$OMP SINGLE !$OMP SINGLE
!!$ !$ nthreads = omp_get_num_threads() nthreads = omp_get_num_threads()
!!$ !$OMP END SINGLE !$OMP END SINGLE
!!$
!!$ !$ ithread = omp_get_thread_num() ithread = omp_get_thread_num()
!!$
!!$ !$ work = nr/nthreads work = nr/nthreads
!!$ !$ if (ithread < MOD(nr,nthreads)) then if (ithread < MOD(nr,nthreads)) then
!!$ !$ work = work + 1 work = work + 1
!!$ !$ first_idx = ithread*work + 1 first_idx = ithread*work + 1
!!$ !$ else else
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1 first_idx = ithread*work + MOD(nr,nthreads) + 1
!!$ !$ end if end if
!!$
!!$ !$ last_idx = first_idx + work - 1 last_idx = first_idx + work - 1
!!$
!!$ !$ s = 0 s = 0
!!$ !$ do i=first_idx,last_idx do i=first_idx,last_idx
!!$ !$ s = s + a%irp(i) s = s + a%irp(i)
!!$ !$ end do end do
!!$ !$ if (work > 0) then if (work > 0) then
!!$ !$ sum(ithread+2) = s sum(ithread+2) = s
!!$ !$ end if end if
!!$
!!$ !$OMP BARRIER !$OMP BARRIER
!!$
!!$ !$OMP SINGLE !$OMP SINGLE
!!$ !$ do i=2,nthreads+1 do i=2,nthreads+1
!!$ !$ sum(i) = sum(i) + sum(i-1) sum(i) = sum(i) + sum(i-1)
!!$ !$ end do end do
!!$ !$OMP END SINGLE !$OMP END SINGLE
!!$
!!$ !$ if (work > 0) then if (work > 0) then
!!$ !$ saved_elem = a%irp(first_idx) saved_elem = a%irp(first_idx)
!!$ !$ end if end if
!!$ !$ if (ithread == 0) then if (ithread == 0) then
!!$ !$ a%irp(1) = 1 a%irp(1) = 1
!!$ !$ end if end if
!!$
!!$ !$OMP BARRIER !$OMP BARRIER
!!$
!!$ !$ if (work > 0) then if (work > 0) then
!!$ !$ old_val = a%irp(first_idx+1) old_val = a%irp(first_idx+1)
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1) a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!!$ !$ end if end if
!!$
!!$ !$ do i=first_idx+2,last_idx+1 do i=first_idx+2,last_idx+1
!!$ !$ nxt_val = a%irp(i) nxt_val = a%irp(i)
!!$ !$ a%irp(i) = a%irp(i-1) + old_val a%irp(i) = a%irp(i-1) + old_val
!!$ !$ old_val = nxt_val old_val = nxt_val
!!$ !$ end do end do
!!$
!!$ !$OMP END PARALLEL !$OMP END PARALLEL
!!$ else #else
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
a%irp(i) = a%irp(i) + 1 a%irp(i) = a%irp(i) + 1
end do end do
ip = 1 ip = 1
do i=1,nr do i=1,nr
ncl = a%irp(i) ncl = a%irp(i)
a%irp(i) = ip a%irp(i) = ip
ip = ip + ncl ip = ip + ncl
end do end do
a%irp(nr+1) = ip a%irp(nr+1) = ip
!!$ end if #endif
call a%set_host() call a%set_host()
end subroutine psb_d_cp_csr_from_coo end subroutine psb_d_cp_csr_from_coo
@ -3089,6 +3089,9 @@ subroutine psb_d_mv_csr_from_coo(a,b,info)
use psb_error_mod use psb_error_mod
use psb_d_base_mat_mod use psb_d_base_mat_mod
use psb_d_csr_mat_mod, psb_protect_name => psb_d_mv_csr_from_coo use psb_d_csr_mat_mod, psb_protect_name => psb_d_mv_csr_from_coo
#if defined(OPENMP)
use omp_lib
#endif
implicit none implicit none
class(psb_d_csr_sparse_mat), intent(inout) :: a class(psb_d_csr_sparse_mat), intent(inout) :: a
@ -3102,12 +3105,12 @@ subroutine psb_d_mv_csr_from_coo(a,b,info)
integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_), Parameter :: maxtry=8
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='mv_from_coo' character(len=20) :: name='mv_from_coo'
logical :: use_openmp = .false.
! $ integer(psb_ipk_), allocatable :: sum(:) #if defined(OPENMP)
! $ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s integer(psb_ipk_), allocatable :: sum(:)
! $ integer(psb_ipk_) :: nxt_val,old_val,saved_elem integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s
! $ use_openmp = .true. integer(psb_ipk_) :: nxt_val,old_val,saved_elem
#endif
info = psb_success_ info = psb_success_
@ -3135,74 +3138,74 @@ subroutine psb_d_mv_csr_from_coo(a,b,info)
a%irp(:) = 0 a%irp(:) = 0
!!$ if (use_openmp) then #if defined(OPENMP)
!!$ !$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!!$ !$OMP shared(sum,nthreads,nr,a,itemp,nza) & !$OMP shared(sum,nthreads,nr,a,itemp,nza) &
!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
!!$
!!$ !$OMP DO schedule(STATIC) & !$OMP DO schedule(STATIC) &
!!$ !$OMP private(k,i) !$OMP private(k,i)
!!$ do k=1,nza do k=1,nza
!!$ i = itemp(k) i = itemp(k)
!!$ a%irp(i) = a%irp(i) + 1 a%irp(i) = a%irp(i) + 1
!!$ end do end do
!!$ !$OMP END DO !$OMP END DO
!!$
!!$ !$OMP SINGLE !$OMP SINGLE
!!$ !$ nthreads = omp_get_num_threads() nthreads = omp_get_num_threads()
!!$ !$ allocate(sum(nthreads+1)) allocate(sum(nthreads+1))
!!$ !$ sum(:) = 0 sum(:) = 0
!!$ !$ sum(1) = 1 sum(1) = 1
!!$ !$OMP END SINGLE !$OMP END SINGLE
!!$
!!$ !$ ithread = omp_get_thread_num() ithread = omp_get_thread_num()
!!$
!!$ !$ work = nr/nthreads work = nr/nthreads
!!$ !$ if (ithread < MOD(nr,nthreads)) then if (ithread < MOD(nr,nthreads)) then
!!$ !$ work = work + 1 work = work + 1
!!$ !$ first_idx = ithread*work + 1 first_idx = ithread*work + 1
!!$ !$ else else
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1 first_idx = ithread*work + MOD(nr,nthreads) + 1
!!$ !$ end if end if
!!$
!!$ !$ last_idx = first_idx + work - 1 last_idx = first_idx + work - 1
!!$
!!$ !$ s = 0 s = 0
!!$ !$ do i=first_idx,last_idx do i=first_idx,last_idx
!!$ !$ s = s + a%irp(i) s = s + a%irp(i)
!!$ !$ end do end do
!!$ !$ if (work > 0) then if (work > 0) then
!!$ !$ sum(ithread+2) = s sum(ithread+2) = s
!!$ !$ end if end if
!!$
!!$ !$OMP BARRIER !$OMP BARRIER
!!$
!!$ !$OMP SINGLE !$OMP SINGLE
!!$ !$ do i=2,nthreads+1 do i=2,nthreads+1
!!$ !$ sum(i) = sum(i) + sum(i-1) sum(i) = sum(i) + sum(i-1)
!!$ !$ end do end do
!!$ !$OMP END SINGLE !$OMP END SINGLE
!!$
!!$ !$ if (work > 0) then if (work > 0) then
!!$ !$ saved_elem = a%irp(first_idx) saved_elem = a%irp(first_idx)
!!$ !$ end if end if
!!$ !$ if (ithread == 0) then if (ithread == 0) then
!!$ !$ a%irp(1) = 1 a%irp(1) = 1
!!$ !$ end if end if
!!$
!!$ !$ if (work > 0) then if (work > 0) then
!!$ !$ old_val = a%irp(first_idx+1) old_val = a%irp(first_idx+1)
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1) a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!!$ !$ end if end if
!!$
!!$ !$ do i=first_idx+2,last_idx+1 do i=first_idx+2,last_idx+1
!!$ !$ nxt_val = a%irp(i) nxt_val = a%irp(i)
!!$ !$ a%irp(i) = a%irp(i-1) + old_val a%irp(i) = a%irp(i-1) + old_val
!!$ !$ old_val = nxt_val old_val = nxt_val
!!$ !$ end do end do
!!$
!!$ !$OMP END PARALLEL !$OMP END PARALLEL
!!$ else #else
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
a%irp(i) = a%irp(i) + 1 a%irp(i) = a%irp(i) + 1
@ -3214,7 +3217,7 @@ subroutine psb_d_mv_csr_from_coo(a,b,info)
ip = ip + ncl ip = ip + ncl
end do end do
a%irp(nr+1) = ip a%irp(nr+1) = ip
!!$ end if #endif
call a%set_host() call a%set_host()

@ -2837,6 +2837,9 @@ subroutine psb_s_cp_csr_from_coo(a,b,info)
use psb_realloc_mod use psb_realloc_mod
use psb_s_base_mat_mod use psb_s_base_mat_mod
use psb_s_csr_mat_mod, psb_protect_name => psb_s_cp_csr_from_coo use psb_s_csr_mat_mod, psb_protect_name => psb_s_cp_csr_from_coo
#if defined(OPENMP)
use omp_lib
#endif
implicit none implicit none
class(psb_s_csr_sparse_mat), intent(inout) :: a class(psb_s_csr_sparse_mat), intent(inout) :: a
@ -2851,13 +2854,11 @@ subroutine psb_s_cp_csr_from_coo(a,b,info)
integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_), Parameter :: maxtry=8
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='s_cp_csr_from_coo' character(len=20) :: name='s_cp_csr_from_coo'
logical :: use_openmp = .false. #if defined(OPENMP)
integer(psb_ipk_), allocatable :: sum(:)
!$ integer(psb_ipk_), allocatable :: sum(:) integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j
!$ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads
!$ integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads #endif
!$ use_openmp = .true.
info = psb_success_ info = psb_success_
debug_unit = psb_get_debug_unit() debug_unit = psb_get_debug_unit()
@ -2902,94 +2903,93 @@ subroutine psb_s_cp_csr_from_coo(a,b,info)
a%irp(:) = 0 a%irp(:) = 0
!!$ if (use_openmp) then #if defined(OPENMP)
!!$ !$ maxthreads = omp_get_max_threads() maxthreads = omp_get_max_threads()
!!$ !$ allocate(sum(maxthreads+1)) allocate(sum(maxthreads+1))
!!$ !$ sum(:) = 0 sum(:) = 0
!!$ !$ sum(1) = 1 sum(1) = 1
!!$
!!$ !$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!!$ !$OMP shared(nza,itemp,a,nthreads,sum,nr) & !$OMP shared(nza,itemp,a,nthreads,sum,nr) &
!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
!!$
!!$ !$OMP DO schedule(STATIC) & !$OMP DO schedule(STATIC) &
!!$ !$OMP private(k,i) !$OMP private(k,i)
!!$ do k=1,nza do k=1,nza
!!$ i = itemp(k) i = itemp(k)
!!$ a%irp(i) = a%irp(i) + 1 a%irp(i) = a%irp(i) + 1
!!$ end do end do
!!$ !$OMP END DO !$OMP END DO
!!$
!!$ !$OMP SINGLE !$OMP SINGLE
!!$ !$ nthreads = omp_get_num_threads() nthreads = omp_get_num_threads()
!!$ !$OMP END SINGLE !$OMP END SINGLE
!!$
!!$ !$ ithread = omp_get_thread_num() ithread = omp_get_thread_num()
!!$
!!$ !$ work = nr/nthreads work = nr/nthreads
!!$ !$ if (ithread < MOD(nr,nthreads)) then if (ithread < MOD(nr,nthreads)) then
!!$ !$ work = work + 1 work = work + 1
!!$ !$ first_idx = ithread*work + 1 first_idx = ithread*work + 1
!!$ !$ else else
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1 first_idx = ithread*work + MOD(nr,nthreads) + 1
!!$ !$ end if end if
!!$
!!$ !$ last_idx = first_idx + work - 1 last_idx = first_idx + work - 1
!!$
!!$ !$ s = 0 s = 0
!!$ !$ do i=first_idx,last_idx do i=first_idx,last_idx
!!$ !$ s = s + a%irp(i) s = s + a%irp(i)
!!$ !$ end do end do
!!$ !$ if (work > 0) then if (work > 0) then
!!$ !$ sum(ithread+2) = s sum(ithread+2) = s
!!$ !$ end if end if
!!$
!!$ !$OMP BARRIER !$OMP BARRIER
!!$
!!$ !$OMP SINGLE !$OMP SINGLE
!!$ !$ do i=2,nthreads+1 do i=2,nthreads+1
!!$ !$ sum(i) = sum(i) + sum(i-1) sum(i) = sum(i) + sum(i-1)
!!$ !$ end do end do
!!$ !$OMP END SINGLE !$OMP END SINGLE
!!$
!!$ !$ if (work > 0) then if (work > 0) then
!!$ !$ saved_elem = a%irp(first_idx) saved_elem = a%irp(first_idx)
!!$ !$ end if end if
!!$ !$ if (ithread == 0) then if (ithread == 0) then
!!$ !$ a%irp(1) = 1 a%irp(1) = 1
!!$ !$ end if end if
!!$
!!$ !$OMP BARRIER !$OMP BARRIER
!!$
!!$ !$ if (work > 0) then if (work > 0) then
!!$ !$ old_val = a%irp(first_idx+1) old_val = a%irp(first_idx+1)
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1) a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!!$ !$ end if end if
!!$
!!$ !$ do i=first_idx+2,last_idx+1 do i=first_idx+2,last_idx+1
!!$ !$ nxt_val = a%irp(i) nxt_val = a%irp(i)
!!$ !$ a%irp(i) = a%irp(i-1) + old_val a%irp(i) = a%irp(i-1) + old_val
!!$ !$ old_val = nxt_val old_val = nxt_val
!!$ !$ end do end do
!!$
!!$ !$OMP END PARALLEL !$OMP END PARALLEL
!!$ else #else
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
a%irp(i) = a%irp(i) + 1 a%irp(i) = a%irp(i) + 1
end do end do
ip = 1 ip = 1
do i=1,nr do i=1,nr
ncl = a%irp(i) ncl = a%irp(i)
a%irp(i) = ip a%irp(i) = ip
ip = ip + ncl ip = ip + ncl
end do end do
a%irp(nr+1) = ip a%irp(nr+1) = ip
!!$ end if #endif
call a%set_host() call a%set_host()
end subroutine psb_s_cp_csr_from_coo end subroutine psb_s_cp_csr_from_coo
@ -3089,6 +3089,9 @@ subroutine psb_s_mv_csr_from_coo(a,b,info)
use psb_error_mod use psb_error_mod
use psb_s_base_mat_mod use psb_s_base_mat_mod
use psb_s_csr_mat_mod, psb_protect_name => psb_s_mv_csr_from_coo use psb_s_csr_mat_mod, psb_protect_name => psb_s_mv_csr_from_coo
#if defined(OPENMP)
use omp_lib
#endif
implicit none implicit none
class(psb_s_csr_sparse_mat), intent(inout) :: a class(psb_s_csr_sparse_mat), intent(inout) :: a
@ -3102,12 +3105,12 @@ subroutine psb_s_mv_csr_from_coo(a,b,info)
integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_), Parameter :: maxtry=8
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='mv_from_coo' character(len=20) :: name='mv_from_coo'
logical :: use_openmp = .false.
! $ integer(psb_ipk_), allocatable :: sum(:) #if defined(OPENMP)
! $ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s integer(psb_ipk_), allocatable :: sum(:)
! $ integer(psb_ipk_) :: nxt_val,old_val,saved_elem integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s
! $ use_openmp = .true. integer(psb_ipk_) :: nxt_val,old_val,saved_elem
#endif
info = psb_success_ info = psb_success_
@ -3135,74 +3138,74 @@ subroutine psb_s_mv_csr_from_coo(a,b,info)
a%irp(:) = 0 a%irp(:) = 0
!!$ if (use_openmp) then #if defined(OPENMP)
!!$ !$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!!$ !$OMP shared(sum,nthreads,nr,a,itemp,nza) & !$OMP shared(sum,nthreads,nr,a,itemp,nza) &
!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
!!$
!!$ !$OMP DO schedule(STATIC) & !$OMP DO schedule(STATIC) &
!!$ !$OMP private(k,i) !$OMP private(k,i)
!!$ do k=1,nza do k=1,nza
!!$ i = itemp(k) i = itemp(k)
!!$ a%irp(i) = a%irp(i) + 1 a%irp(i) = a%irp(i) + 1
!!$ end do end do
!!$ !$OMP END DO !$OMP END DO
!!$
!!$ !$OMP SINGLE !$OMP SINGLE
!!$ !$ nthreads = omp_get_num_threads() nthreads = omp_get_num_threads()
!!$ !$ allocate(sum(nthreads+1)) allocate(sum(nthreads+1))
!!$ !$ sum(:) = 0 sum(:) = 0
!!$ !$ sum(1) = 1 sum(1) = 1
!!$ !$OMP END SINGLE !$OMP END SINGLE
!!$
!!$ !$ ithread = omp_get_thread_num() ithread = omp_get_thread_num()
!!$
!!$ !$ work = nr/nthreads work = nr/nthreads
!!$ !$ if (ithread < MOD(nr,nthreads)) then if (ithread < MOD(nr,nthreads)) then
!!$ !$ work = work + 1 work = work + 1
!!$ !$ first_idx = ithread*work + 1 first_idx = ithread*work + 1
!!$ !$ else else
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1 first_idx = ithread*work + MOD(nr,nthreads) + 1
!!$ !$ end if end if
!!$
!!$ !$ last_idx = first_idx + work - 1 last_idx = first_idx + work - 1
!!$
!!$ !$ s = 0 s = 0
!!$ !$ do i=first_idx,last_idx do i=first_idx,last_idx
!!$ !$ s = s + a%irp(i) s = s + a%irp(i)
!!$ !$ end do end do
!!$ !$ if (work > 0) then if (work > 0) then
!!$ !$ sum(ithread+2) = s sum(ithread+2) = s
!!$ !$ end if end if
!!$
!!$ !$OMP BARRIER !$OMP BARRIER
!!$
!!$ !$OMP SINGLE !$OMP SINGLE
!!$ !$ do i=2,nthreads+1 do i=2,nthreads+1
!!$ !$ sum(i) = sum(i) + sum(i-1) sum(i) = sum(i) + sum(i-1)
!!$ !$ end do end do
!!$ !$OMP END SINGLE !$OMP END SINGLE
!!$
!!$ !$ if (work > 0) then if (work > 0) then
!!$ !$ saved_elem = a%irp(first_idx) saved_elem = a%irp(first_idx)
!!$ !$ end if end if
!!$ !$ if (ithread == 0) then if (ithread == 0) then
!!$ !$ a%irp(1) = 1 a%irp(1) = 1
!!$ !$ end if end if
!!$
!!$ !$ if (work > 0) then if (work > 0) then
!!$ !$ old_val = a%irp(first_idx+1) old_val = a%irp(first_idx+1)
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1) a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!!$ !$ end if end if
!!$
!!$ !$ do i=first_idx+2,last_idx+1 do i=first_idx+2,last_idx+1
!!$ !$ nxt_val = a%irp(i) nxt_val = a%irp(i)
!!$ !$ a%irp(i) = a%irp(i-1) + old_val a%irp(i) = a%irp(i-1) + old_val
!!$ !$ old_val = nxt_val old_val = nxt_val
!!$ !$ end do end do
!!$
!!$ !$OMP END PARALLEL !$OMP END PARALLEL
!!$ else #else
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
a%irp(i) = a%irp(i) + 1 a%irp(i) = a%irp(i) + 1
@ -3214,7 +3217,7 @@ subroutine psb_s_mv_csr_from_coo(a,b,info)
ip = ip + ncl ip = ip + ncl
end do end do
a%irp(nr+1) = ip a%irp(nr+1) = ip
!!$ end if #endif
call a%set_host() call a%set_host()

@ -2837,6 +2837,9 @@ subroutine psb_z_cp_csr_from_coo(a,b,info)
use psb_realloc_mod use psb_realloc_mod
use psb_z_base_mat_mod use psb_z_base_mat_mod
use psb_z_csr_mat_mod, psb_protect_name => psb_z_cp_csr_from_coo use psb_z_csr_mat_mod, psb_protect_name => psb_z_cp_csr_from_coo
#if defined(OPENMP)
use omp_lib
#endif
implicit none implicit none
class(psb_z_csr_sparse_mat), intent(inout) :: a class(psb_z_csr_sparse_mat), intent(inout) :: a
@ -2851,13 +2854,11 @@ subroutine psb_z_cp_csr_from_coo(a,b,info)
integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_), Parameter :: maxtry=8
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='z_cp_csr_from_coo' character(len=20) :: name='z_cp_csr_from_coo'
logical :: use_openmp = .false. #if defined(OPENMP)
integer(psb_ipk_), allocatable :: sum(:)
!$ integer(psb_ipk_), allocatable :: sum(:) integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j
!$ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads
!$ integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads #endif
!$ use_openmp = .true.
info = psb_success_ info = psb_success_
debug_unit = psb_get_debug_unit() debug_unit = psb_get_debug_unit()
@ -2902,94 +2903,93 @@ subroutine psb_z_cp_csr_from_coo(a,b,info)
a%irp(:) = 0 a%irp(:) = 0
!!$ if (use_openmp) then #if defined(OPENMP)
!!$ !$ maxthreads = omp_get_max_threads() maxthreads = omp_get_max_threads()
!!$ !$ allocate(sum(maxthreads+1)) allocate(sum(maxthreads+1))
!!$ !$ sum(:) = 0 sum(:) = 0
!!$ !$ sum(1) = 1 sum(1) = 1
!!$
!!$ !$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!!$ !$OMP shared(nza,itemp,a,nthreads,sum,nr) & !$OMP shared(nza,itemp,a,nthreads,sum,nr) &
!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
!!$
!!$ !$OMP DO schedule(STATIC) & !$OMP DO schedule(STATIC) &
!!$ !$OMP private(k,i) !$OMP private(k,i)
!!$ do k=1,nza do k=1,nza
!!$ i = itemp(k) i = itemp(k)
!!$ a%irp(i) = a%irp(i) + 1 a%irp(i) = a%irp(i) + 1
!!$ end do end do
!!$ !$OMP END DO !$OMP END DO
!!$
!!$ !$OMP SINGLE !$OMP SINGLE
!!$ !$ nthreads = omp_get_num_threads() nthreads = omp_get_num_threads()
!!$ !$OMP END SINGLE !$OMP END SINGLE
!!$
!!$ !$ ithread = omp_get_thread_num() ithread = omp_get_thread_num()
!!$
!!$ !$ work = nr/nthreads work = nr/nthreads
!!$ !$ if (ithread < MOD(nr,nthreads)) then if (ithread < MOD(nr,nthreads)) then
!!$ !$ work = work + 1 work = work + 1
!!$ !$ first_idx = ithread*work + 1 first_idx = ithread*work + 1
!!$ !$ else else
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1 first_idx = ithread*work + MOD(nr,nthreads) + 1
!!$ !$ end if end if
!!$
!!$ !$ last_idx = first_idx + work - 1 last_idx = first_idx + work - 1
!!$
!!$ !$ s = 0 s = 0
!!$ !$ do i=first_idx,last_idx do i=first_idx,last_idx
!!$ !$ s = s + a%irp(i) s = s + a%irp(i)
!!$ !$ end do end do
!!$ !$ if (work > 0) then if (work > 0) then
!!$ !$ sum(ithread+2) = s sum(ithread+2) = s
!!$ !$ end if end if
!!$
!!$ !$OMP BARRIER !$OMP BARRIER
!!$
!!$ !$OMP SINGLE !$OMP SINGLE
!!$ !$ do i=2,nthreads+1 do i=2,nthreads+1
!!$ !$ sum(i) = sum(i) + sum(i-1) sum(i) = sum(i) + sum(i-1)
!!$ !$ end do end do
!!$ !$OMP END SINGLE !$OMP END SINGLE
!!$
!!$ !$ if (work > 0) then if (work > 0) then
!!$ !$ saved_elem = a%irp(first_idx) saved_elem = a%irp(first_idx)
!!$ !$ end if end if
!!$ !$ if (ithread == 0) then if (ithread == 0) then
!!$ !$ a%irp(1) = 1 a%irp(1) = 1
!!$ !$ end if end if
!!$
!!$ !$OMP BARRIER !$OMP BARRIER
!!$
!!$ !$ if (work > 0) then if (work > 0) then
!!$ !$ old_val = a%irp(first_idx+1) old_val = a%irp(first_idx+1)
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1) a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!!$ !$ end if end if
!!$
!!$ !$ do i=first_idx+2,last_idx+1 do i=first_idx+2,last_idx+1
!!$ !$ nxt_val = a%irp(i) nxt_val = a%irp(i)
!!$ !$ a%irp(i) = a%irp(i-1) + old_val a%irp(i) = a%irp(i-1) + old_val
!!$ !$ old_val = nxt_val old_val = nxt_val
!!$ !$ end do end do
!!$
!!$ !$OMP END PARALLEL !$OMP END PARALLEL
!!$ else #else
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
a%irp(i) = a%irp(i) + 1 a%irp(i) = a%irp(i) + 1
end do end do
ip = 1 ip = 1
do i=1,nr do i=1,nr
ncl = a%irp(i) ncl = a%irp(i)
a%irp(i) = ip a%irp(i) = ip
ip = ip + ncl ip = ip + ncl
end do end do
a%irp(nr+1) = ip a%irp(nr+1) = ip
!!$ end if #endif
call a%set_host() call a%set_host()
end subroutine psb_z_cp_csr_from_coo end subroutine psb_z_cp_csr_from_coo
@ -3089,6 +3089,9 @@ subroutine psb_z_mv_csr_from_coo(a,b,info)
use psb_error_mod use psb_error_mod
use psb_z_base_mat_mod use psb_z_base_mat_mod
use psb_z_csr_mat_mod, psb_protect_name => psb_z_mv_csr_from_coo use psb_z_csr_mat_mod, psb_protect_name => psb_z_mv_csr_from_coo
#if defined(OPENMP)
use omp_lib
#endif
implicit none implicit none
class(psb_z_csr_sparse_mat), intent(inout) :: a class(psb_z_csr_sparse_mat), intent(inout) :: a
@ -3102,12 +3105,12 @@ subroutine psb_z_mv_csr_from_coo(a,b,info)
integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_), Parameter :: maxtry=8
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name='mv_from_coo' character(len=20) :: name='mv_from_coo'
logical :: use_openmp = .false.
! $ integer(psb_ipk_), allocatable :: sum(:) #if defined(OPENMP)
! $ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s integer(psb_ipk_), allocatable :: sum(:)
! $ integer(psb_ipk_) :: nxt_val,old_val,saved_elem integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s
! $ use_openmp = .true. integer(psb_ipk_) :: nxt_val,old_val,saved_elem
#endif
info = psb_success_ info = psb_success_
@ -3135,74 +3138,74 @@ subroutine psb_z_mv_csr_from_coo(a,b,info)
a%irp(:) = 0 a%irp(:) = 0
!!$ if (use_openmp) then #if defined(OPENMP)
!!$ !$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!!$ !$OMP shared(sum,nthreads,nr,a,itemp,nza) & !$OMP shared(sum,nthreads,nr,a,itemp,nza) &
!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
!!$
!!$ !$OMP DO schedule(STATIC) & !$OMP DO schedule(STATIC) &
!!$ !$OMP private(k,i) !$OMP private(k,i)
!!$ do k=1,nza do k=1,nza
!!$ i = itemp(k) i = itemp(k)
!!$ a%irp(i) = a%irp(i) + 1 a%irp(i) = a%irp(i) + 1
!!$ end do end do
!!$ !$OMP END DO !$OMP END DO
!!$
!!$ !$OMP SINGLE !$OMP SINGLE
!!$ !$ nthreads = omp_get_num_threads() nthreads = omp_get_num_threads()
!!$ !$ allocate(sum(nthreads+1)) allocate(sum(nthreads+1))
!!$ !$ sum(:) = 0 sum(:) = 0
!!$ !$ sum(1) = 1 sum(1) = 1
!!$ !$OMP END SINGLE !$OMP END SINGLE
!!$
!!$ !$ ithread = omp_get_thread_num() ithread = omp_get_thread_num()
!!$
!!$ !$ work = nr/nthreads work = nr/nthreads
!!$ !$ if (ithread < MOD(nr,nthreads)) then if (ithread < MOD(nr,nthreads)) then
!!$ !$ work = work + 1 work = work + 1
!!$ !$ first_idx = ithread*work + 1 first_idx = ithread*work + 1
!!$ !$ else else
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1 first_idx = ithread*work + MOD(nr,nthreads) + 1
!!$ !$ end if end if
!!$
!!$ !$ last_idx = first_idx + work - 1 last_idx = first_idx + work - 1
!!$
!!$ !$ s = 0 s = 0
!!$ !$ do i=first_idx,last_idx do i=first_idx,last_idx
!!$ !$ s = s + a%irp(i) s = s + a%irp(i)
!!$ !$ end do end do
!!$ !$ if (work > 0) then if (work > 0) then
!!$ !$ sum(ithread+2) = s sum(ithread+2) = s
!!$ !$ end if end if
!!$
!!$ !$OMP BARRIER !$OMP BARRIER
!!$
!!$ !$OMP SINGLE !$OMP SINGLE
!!$ !$ do i=2,nthreads+1 do i=2,nthreads+1
!!$ !$ sum(i) = sum(i) + sum(i-1) sum(i) = sum(i) + sum(i-1)
!!$ !$ end do end do
!!$ !$OMP END SINGLE !$OMP END SINGLE
!!$
!!$ !$ if (work > 0) then if (work > 0) then
!!$ !$ saved_elem = a%irp(first_idx) saved_elem = a%irp(first_idx)
!!$ !$ end if end if
!!$ !$ if (ithread == 0) then if (ithread == 0) then
!!$ !$ a%irp(1) = 1 a%irp(1) = 1
!!$ !$ end if end if
!!$
!!$ !$ if (work > 0) then if (work > 0) then
!!$ !$ old_val = a%irp(first_idx+1) old_val = a%irp(first_idx+1)
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1) a%irp(first_idx+1) = saved_elem + sum(ithread+1)
!!$ !$ end if end if
!!$
!!$ !$ do i=first_idx+2,last_idx+1 do i=first_idx+2,last_idx+1
!!$ !$ nxt_val = a%irp(i) nxt_val = a%irp(i)
!!$ !$ a%irp(i) = a%irp(i-1) + old_val a%irp(i) = a%irp(i-1) + old_val
!!$ !$ old_val = nxt_val old_val = nxt_val
!!$ !$ end do end do
!!$
!!$ !$OMP END PARALLEL !$OMP END PARALLEL
!!$ else #else
do k=1,nza do k=1,nza
i = itemp(k) i = itemp(k)
a%irp(i) = a%irp(i) + 1 a%irp(i) = a%irp(i) + 1
@ -3214,7 +3217,7 @@ subroutine psb_z_mv_csr_from_coo(a,b,info)
ip = ip + ncl ip = ip + ncl
end do end do
a%irp(nr+1) = ip a%irp(nr+1) = ip
!!$ end if #endif
call a%set_host() call a%set_host()

@ -8,7 +8,7 @@ CSR Storage format for matrix A: CSR COO
0100 MAXIT 0100 MAXIT
05 ITRACE 05 ITRACE
002 IRST restart for RGMRES and BiCGSTABL 002 IRST restart for RGMRES and BiCGSTABL
ILU Block Solver ILU,ILUT,INVK,AINVT,AORTH INVK Block Solver ILU,ILUT,INVK,AINVT,AORTH
NONE If ILU : MILU or NONE othewise ignored NONE If ILU : MILU or NONE othewise ignored
NONE Scaling if ILUT: NONE, MAXVAL otherwise ignored NONE Scaling if ILUT: NONE, MAXVAL otherwise ignored
0 Level of fill for forward factorization 0 Level of fill for forward factorization

Loading…
Cancel
Save