|
|
@ -2837,6 +2837,9 @@ subroutine psb_c_cp_csr_from_coo(a,b,info)
|
|
|
|
use psb_realloc_mod
|
|
|
|
use psb_realloc_mod
|
|
|
|
use psb_c_base_mat_mod
|
|
|
|
use psb_c_base_mat_mod
|
|
|
|
use psb_c_csr_mat_mod, psb_protect_name => psb_c_cp_csr_from_coo
|
|
|
|
use psb_c_csr_mat_mod, psb_protect_name => psb_c_cp_csr_from_coo
|
|
|
|
|
|
|
|
#if defined(OPENMP)
|
|
|
|
|
|
|
|
use omp_lib
|
|
|
|
|
|
|
|
#endif
|
|
|
|
implicit none
|
|
|
|
implicit none
|
|
|
|
|
|
|
|
|
|
|
|
class(psb_c_csr_sparse_mat), intent(inout) :: a
|
|
|
|
class(psb_c_csr_sparse_mat), intent(inout) :: a
|
|
|
@ -2851,13 +2854,11 @@ subroutine psb_c_cp_csr_from_coo(a,b,info)
|
|
|
|
integer(psb_ipk_), Parameter :: maxtry=8
|
|
|
|
integer(psb_ipk_), Parameter :: maxtry=8
|
|
|
|
integer(psb_ipk_) :: debug_level, debug_unit
|
|
|
|
integer(psb_ipk_) :: debug_level, debug_unit
|
|
|
|
character(len=20) :: name='c_cp_csr_from_coo'
|
|
|
|
character(len=20) :: name='c_cp_csr_from_coo'
|
|
|
|
logical :: use_openmp = .false.
|
|
|
|
#if defined(OPENMP)
|
|
|
|
|
|
|
|
integer(psb_ipk_), allocatable :: sum(:)
|
|
|
|
!$ integer(psb_ipk_), allocatable :: sum(:)
|
|
|
|
integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j
|
|
|
|
!$ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j
|
|
|
|
integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads
|
|
|
|
!$ integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads
|
|
|
|
#endif
|
|
|
|
!$ use_openmp = .true.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
info = psb_success_
|
|
|
|
info = psb_success_
|
|
|
|
debug_unit = psb_get_debug_unit()
|
|
|
|
debug_unit = psb_get_debug_unit()
|
|
|
@ -2902,78 +2903,78 @@ subroutine psb_c_cp_csr_from_coo(a,b,info)
|
|
|
|
|
|
|
|
|
|
|
|
a%irp(:) = 0
|
|
|
|
a%irp(:) = 0
|
|
|
|
|
|
|
|
|
|
|
|
!!$ if (use_openmp) then
|
|
|
|
#if defined(OPENMP)
|
|
|
|
!!$ !$ maxthreads = omp_get_max_threads()
|
|
|
|
maxthreads = omp_get_max_threads()
|
|
|
|
!!$ !$ allocate(sum(maxthreads+1))
|
|
|
|
allocate(sum(maxthreads+1))
|
|
|
|
!!$ !$ sum(:) = 0
|
|
|
|
sum(:) = 0
|
|
|
|
!!$ !$ sum(1) = 1
|
|
|
|
sum(1) = 1
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$ !$OMP PARALLEL default(none) &
|
|
|
|
!$OMP PARALLEL default(none) &
|
|
|
|
!!$ !$OMP shared(nza,itemp,a,nthreads,sum,nr) &
|
|
|
|
!$OMP shared(nza,itemp,a,nthreads,sum,nr) &
|
|
|
|
!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
|
|
|
|
!$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$ !$OMP DO schedule(STATIC) &
|
|
|
|
!$OMP DO schedule(STATIC) &
|
|
|
|
!!$ !$OMP private(k,i)
|
|
|
|
!$OMP private(k,i)
|
|
|
|
!!$ do k=1,nza
|
|
|
|
do k=1,nza
|
|
|
|
!!$ i = itemp(k)
|
|
|
|
i = itemp(k)
|
|
|
|
!!$ a%irp(i) = a%irp(i) + 1
|
|
|
|
a%irp(i) = a%irp(i) + 1
|
|
|
|
!!$ end do
|
|
|
|
end do
|
|
|
|
!!$ !$OMP END DO
|
|
|
|
!$OMP END DO
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$ !$OMP SINGLE
|
|
|
|
!$OMP SINGLE
|
|
|
|
!!$ !$ nthreads = omp_get_num_threads()
|
|
|
|
nthreads = omp_get_num_threads()
|
|
|
|
!!$ !$OMP END SINGLE
|
|
|
|
!$OMP END SINGLE
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$ !$ ithread = omp_get_thread_num()
|
|
|
|
ithread = omp_get_thread_num()
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$ !$ work = nr/nthreads
|
|
|
|
work = nr/nthreads
|
|
|
|
!!$ !$ if (ithread < MOD(nr,nthreads)) then
|
|
|
|
if (ithread < MOD(nr,nthreads)) then
|
|
|
|
!!$ !$ work = work + 1
|
|
|
|
work = work + 1
|
|
|
|
!!$ !$ first_idx = ithread*work + 1
|
|
|
|
first_idx = ithread*work + 1
|
|
|
|
!!$ !$ else
|
|
|
|
else
|
|
|
|
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
|
|
|
|
first_idx = ithread*work + MOD(nr,nthreads) + 1
|
|
|
|
!!$ !$ end if
|
|
|
|
end if
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$ !$ last_idx = first_idx + work - 1
|
|
|
|
last_idx = first_idx + work - 1
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$ !$ s = 0
|
|
|
|
s = 0
|
|
|
|
!!$ !$ do i=first_idx,last_idx
|
|
|
|
do i=first_idx,last_idx
|
|
|
|
!!$ !$ s = s + a%irp(i)
|
|
|
|
s = s + a%irp(i)
|
|
|
|
!!$ !$ end do
|
|
|
|
end do
|
|
|
|
!!$ !$ if (work > 0) then
|
|
|
|
if (work > 0) then
|
|
|
|
!!$ !$ sum(ithread+2) = s
|
|
|
|
sum(ithread+2) = s
|
|
|
|
!!$ !$ end if
|
|
|
|
end if
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$ !$OMP BARRIER
|
|
|
|
!$OMP BARRIER
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$ !$OMP SINGLE
|
|
|
|
!$OMP SINGLE
|
|
|
|
!!$ !$ do i=2,nthreads+1
|
|
|
|
do i=2,nthreads+1
|
|
|
|
!!$ !$ sum(i) = sum(i) + sum(i-1)
|
|
|
|
sum(i) = sum(i) + sum(i-1)
|
|
|
|
!!$ !$ end do
|
|
|
|
end do
|
|
|
|
!!$ !$OMP END SINGLE
|
|
|
|
!$OMP END SINGLE
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$ !$ if (work > 0) then
|
|
|
|
if (work > 0) then
|
|
|
|
!!$ !$ saved_elem = a%irp(first_idx)
|
|
|
|
saved_elem = a%irp(first_idx)
|
|
|
|
!!$ !$ end if
|
|
|
|
end if
|
|
|
|
!!$ !$ if (ithread == 0) then
|
|
|
|
if (ithread == 0) then
|
|
|
|
!!$ !$ a%irp(1) = 1
|
|
|
|
a%irp(1) = 1
|
|
|
|
!!$ !$ end if
|
|
|
|
end if
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$ !$OMP BARRIER
|
|
|
|
!$OMP BARRIER
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$ !$ if (work > 0) then
|
|
|
|
if (work > 0) then
|
|
|
|
!!$ !$ old_val = a%irp(first_idx+1)
|
|
|
|
old_val = a%irp(first_idx+1)
|
|
|
|
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
|
|
|
|
a%irp(first_idx+1) = saved_elem + sum(ithread+1)
|
|
|
|
!!$ !$ end if
|
|
|
|
end if
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$ !$ do i=first_idx+2,last_idx+1
|
|
|
|
do i=first_idx+2,last_idx+1
|
|
|
|
!!$ !$ nxt_val = a%irp(i)
|
|
|
|
nxt_val = a%irp(i)
|
|
|
|
!!$ !$ a%irp(i) = a%irp(i-1) + old_val
|
|
|
|
a%irp(i) = a%irp(i-1) + old_val
|
|
|
|
!!$ !$ old_val = nxt_val
|
|
|
|
old_val = nxt_val
|
|
|
|
!!$ !$ end do
|
|
|
|
end do
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$ !$OMP END PARALLEL
|
|
|
|
!$OMP END PARALLEL
|
|
|
|
!!$ else
|
|
|
|
#else
|
|
|
|
|
|
|
|
|
|
|
|
do k=1,nza
|
|
|
|
do k=1,nza
|
|
|
|
i = itemp(k)
|
|
|
|
i = itemp(k)
|
|
|
@ -2986,10 +2987,9 @@ subroutine psb_c_cp_csr_from_coo(a,b,info)
|
|
|
|
ip = ip + ncl
|
|
|
|
ip = ip + ncl
|
|
|
|
end do
|
|
|
|
end do
|
|
|
|
a%irp(nr+1) = ip
|
|
|
|
a%irp(nr+1) = ip
|
|
|
|
!!$ end if
|
|
|
|
#endif
|
|
|
|
call a%set_host()
|
|
|
|
call a%set_host()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
end subroutine psb_c_cp_csr_from_coo
|
|
|
|
end subroutine psb_c_cp_csr_from_coo
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -3089,6 +3089,9 @@ subroutine psb_c_mv_csr_from_coo(a,b,info)
|
|
|
|
use psb_error_mod
|
|
|
|
use psb_error_mod
|
|
|
|
use psb_c_base_mat_mod
|
|
|
|
use psb_c_base_mat_mod
|
|
|
|
use psb_c_csr_mat_mod, psb_protect_name => psb_c_mv_csr_from_coo
|
|
|
|
use psb_c_csr_mat_mod, psb_protect_name => psb_c_mv_csr_from_coo
|
|
|
|
|
|
|
|
#if defined(OPENMP)
|
|
|
|
|
|
|
|
use omp_lib
|
|
|
|
|
|
|
|
#endif
|
|
|
|
implicit none
|
|
|
|
implicit none
|
|
|
|
|
|
|
|
|
|
|
|
class(psb_c_csr_sparse_mat), intent(inout) :: a
|
|
|
|
class(psb_c_csr_sparse_mat), intent(inout) :: a
|
|
|
@ -3102,12 +3105,12 @@ subroutine psb_c_mv_csr_from_coo(a,b,info)
|
|
|
|
integer(psb_ipk_), Parameter :: maxtry=8
|
|
|
|
integer(psb_ipk_), Parameter :: maxtry=8
|
|
|
|
integer(psb_ipk_) :: debug_level, debug_unit
|
|
|
|
integer(psb_ipk_) :: debug_level, debug_unit
|
|
|
|
character(len=20) :: name='mv_from_coo'
|
|
|
|
character(len=20) :: name='mv_from_coo'
|
|
|
|
logical :: use_openmp = .false.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
! $ integer(psb_ipk_), allocatable :: sum(:)
|
|
|
|
#if defined(OPENMP)
|
|
|
|
! $ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s
|
|
|
|
integer(psb_ipk_), allocatable :: sum(:)
|
|
|
|
! $ integer(psb_ipk_) :: nxt_val,old_val,saved_elem
|
|
|
|
integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s
|
|
|
|
! $ use_openmp = .true.
|
|
|
|
integer(psb_ipk_) :: nxt_val,old_val,saved_elem
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
info = psb_success_
|
|
|
|
info = psb_success_
|
|
|
@ -3135,74 +3138,74 @@ subroutine psb_c_mv_csr_from_coo(a,b,info)
|
|
|
|
|
|
|
|
|
|
|
|
a%irp(:) = 0
|
|
|
|
a%irp(:) = 0
|
|
|
|
|
|
|
|
|
|
|
|
!!$ if (use_openmp) then
|
|
|
|
#if defined(OPENMP)
|
|
|
|
!!$ !$OMP PARALLEL default(none) &
|
|
|
|
!$OMP PARALLEL default(none) &
|
|
|
|
!!$ !$OMP shared(sum,nthreads,nr,a,itemp,nza) &
|
|
|
|
!$OMP shared(sum,nthreads,nr,a,itemp,nza) &
|
|
|
|
!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
|
|
|
|
!$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$ !$OMP DO schedule(STATIC) &
|
|
|
|
!$OMP DO schedule(STATIC) &
|
|
|
|
!!$ !$OMP private(k,i)
|
|
|
|
!$OMP private(k,i)
|
|
|
|
!!$ do k=1,nza
|
|
|
|
do k=1,nza
|
|
|
|
!!$ i = itemp(k)
|
|
|
|
i = itemp(k)
|
|
|
|
!!$ a%irp(i) = a%irp(i) + 1
|
|
|
|
a%irp(i) = a%irp(i) + 1
|
|
|
|
!!$ end do
|
|
|
|
end do
|
|
|
|
!!$ !$OMP END DO
|
|
|
|
!$OMP END DO
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$ !$OMP SINGLE
|
|
|
|
!$OMP SINGLE
|
|
|
|
!!$ !$ nthreads = omp_get_num_threads()
|
|
|
|
nthreads = omp_get_num_threads()
|
|
|
|
!!$ !$ allocate(sum(nthreads+1))
|
|
|
|
allocate(sum(nthreads+1))
|
|
|
|
!!$ !$ sum(:) = 0
|
|
|
|
sum(:) = 0
|
|
|
|
!!$ !$ sum(1) = 1
|
|
|
|
sum(1) = 1
|
|
|
|
!!$ !$OMP END SINGLE
|
|
|
|
!$OMP END SINGLE
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$ !$ ithread = omp_get_thread_num()
|
|
|
|
ithread = omp_get_thread_num()
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$ !$ work = nr/nthreads
|
|
|
|
work = nr/nthreads
|
|
|
|
!!$ !$ if (ithread < MOD(nr,nthreads)) then
|
|
|
|
if (ithread < MOD(nr,nthreads)) then
|
|
|
|
!!$ !$ work = work + 1
|
|
|
|
work = work + 1
|
|
|
|
!!$ !$ first_idx = ithread*work + 1
|
|
|
|
first_idx = ithread*work + 1
|
|
|
|
!!$ !$ else
|
|
|
|
else
|
|
|
|
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
|
|
|
|
first_idx = ithread*work + MOD(nr,nthreads) + 1
|
|
|
|
!!$ !$ end if
|
|
|
|
end if
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$ !$ last_idx = first_idx + work - 1
|
|
|
|
last_idx = first_idx + work - 1
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$ !$ s = 0
|
|
|
|
s = 0
|
|
|
|
!!$ !$ do i=first_idx,last_idx
|
|
|
|
do i=first_idx,last_idx
|
|
|
|
!!$ !$ s = s + a%irp(i)
|
|
|
|
s = s + a%irp(i)
|
|
|
|
!!$ !$ end do
|
|
|
|
end do
|
|
|
|
!!$ !$ if (work > 0) then
|
|
|
|
if (work > 0) then
|
|
|
|
!!$ !$ sum(ithread+2) = s
|
|
|
|
sum(ithread+2) = s
|
|
|
|
!!$ !$ end if
|
|
|
|
end if
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$ !$OMP BARRIER
|
|
|
|
!$OMP BARRIER
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$ !$OMP SINGLE
|
|
|
|
!$OMP SINGLE
|
|
|
|
!!$ !$ do i=2,nthreads+1
|
|
|
|
do i=2,nthreads+1
|
|
|
|
!!$ !$ sum(i) = sum(i) + sum(i-1)
|
|
|
|
sum(i) = sum(i) + sum(i-1)
|
|
|
|
!!$ !$ end do
|
|
|
|
end do
|
|
|
|
!!$ !$OMP END SINGLE
|
|
|
|
!$OMP END SINGLE
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$ !$ if (work > 0) then
|
|
|
|
if (work > 0) then
|
|
|
|
!!$ !$ saved_elem = a%irp(first_idx)
|
|
|
|
saved_elem = a%irp(first_idx)
|
|
|
|
!!$ !$ end if
|
|
|
|
end if
|
|
|
|
!!$ !$ if (ithread == 0) then
|
|
|
|
if (ithread == 0) then
|
|
|
|
!!$ !$ a%irp(1) = 1
|
|
|
|
a%irp(1) = 1
|
|
|
|
!!$ !$ end if
|
|
|
|
end if
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$ !$ if (work > 0) then
|
|
|
|
if (work > 0) then
|
|
|
|
!!$ !$ old_val = a%irp(first_idx+1)
|
|
|
|
old_val = a%irp(first_idx+1)
|
|
|
|
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
|
|
|
|
a%irp(first_idx+1) = saved_elem + sum(ithread+1)
|
|
|
|
!!$ !$ end if
|
|
|
|
end if
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$ !$ do i=first_idx+2,last_idx+1
|
|
|
|
do i=first_idx+2,last_idx+1
|
|
|
|
!!$ !$ nxt_val = a%irp(i)
|
|
|
|
nxt_val = a%irp(i)
|
|
|
|
!!$ !$ a%irp(i) = a%irp(i-1) + old_val
|
|
|
|
a%irp(i) = a%irp(i-1) + old_val
|
|
|
|
!!$ !$ old_val = nxt_val
|
|
|
|
old_val = nxt_val
|
|
|
|
!!$ !$ end do
|
|
|
|
end do
|
|
|
|
!!$
|
|
|
|
|
|
|
|
!!$ !$OMP END PARALLEL
|
|
|
|
!$OMP END PARALLEL
|
|
|
|
!!$ else
|
|
|
|
#else
|
|
|
|
do k=1,nza
|
|
|
|
do k=1,nza
|
|
|
|
i = itemp(k)
|
|
|
|
i = itemp(k)
|
|
|
|
a%irp(i) = a%irp(i) + 1
|
|
|
|
a%irp(i) = a%irp(i) + 1
|
|
|
@ -3214,7 +3217,7 @@ subroutine psb_c_mv_csr_from_coo(a,b,info)
|
|
|
|
ip = ip + ncl
|
|
|
|
ip = ip + ncl
|
|
|
|
end do
|
|
|
|
end do
|
|
|
|
a%irp(nr+1) = ip
|
|
|
|
a%irp(nr+1) = ip
|
|
|
|
!!$ end if
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
call a%set_host()
|
|
|
|
call a%set_host()
|
|
|
|
|
|
|
|
|