diff --git a/base/serial/impl/psb_c_csr_impl.f90 b/base/serial/impl/psb_c_csr_impl.F90 similarity index 96% rename from base/serial/impl/psb_c_csr_impl.f90 rename to base/serial/impl/psb_c_csr_impl.F90 index 88510e53..87b0872b 100644 --- a/base/serial/impl/psb_c_csr_impl.f90 +++ b/base/serial/impl/psb_c_csr_impl.F90 @@ -2837,6 +2837,9 @@ subroutine psb_c_cp_csr_from_coo(a,b,info) use psb_realloc_mod use psb_c_base_mat_mod use psb_c_csr_mat_mod, psb_protect_name => psb_c_cp_csr_from_coo +#if defined(OPENMP) + use omp_lib +#endif implicit none class(psb_c_csr_sparse_mat), intent(inout) :: a @@ -2851,13 +2854,11 @@ subroutine psb_c_cp_csr_from_coo(a,b,info) integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name='c_cp_csr_from_coo' - logical :: use_openmp = .false. - - !$ integer(psb_ipk_), allocatable :: sum(:) - !$ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j - !$ integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads - !$ use_openmp = .true. - +#if defined(OPENMP) + integer(psb_ipk_), allocatable :: sum(:) + integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j + integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads +#endif info = psb_success_ debug_unit = psb_get_debug_unit() @@ -2902,94 +2903,93 @@ subroutine psb_c_cp_csr_from_coo(a,b,info) a%irp(:) = 0 -!!$ if (use_openmp) then -!!$ !$ maxthreads = omp_get_max_threads() -!!$ !$ allocate(sum(maxthreads+1)) -!!$ !$ sum(:) = 0 -!!$ !$ sum(1) = 1 -!!$ -!!$ !$OMP PARALLEL default(none) & -!!$ !$OMP shared(nza,itemp,a,nthreads,sum,nr) & -!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) -!!$ -!!$ !$OMP DO schedule(STATIC) & -!!$ !$OMP private(k,i) -!!$ do k=1,nza -!!$ i = itemp(k) -!!$ a%irp(i) = a%irp(i) + 1 -!!$ end do -!!$ !$OMP END DO -!!$ -!!$ !$OMP SINGLE -!!$ !$ nthreads = omp_get_num_threads() -!!$ !$OMP END SINGLE -!!$ -!!$ !$ ithread = omp_get_thread_num() -!!$ -!!$ !$ work = nr/nthreads -!!$ !$ if (ithread < MOD(nr,nthreads)) then -!!$ !$ work = work + 1 -!!$ !$ first_idx = ithread*work + 1 -!!$ !$ else -!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1 -!!$ !$ end if -!!$ -!!$ !$ last_idx = first_idx + work - 1 -!!$ -!!$ !$ s = 0 -!!$ !$ do i=first_idx,last_idx -!!$ !$ s = s + a%irp(i) -!!$ !$ end do -!!$ !$ if (work > 0) then -!!$ !$ sum(ithread+2) = s -!!$ !$ end if -!!$ -!!$ !$OMP BARRIER -!!$ -!!$ !$OMP SINGLE -!!$ !$ do i=2,nthreads+1 -!!$ !$ sum(i) = sum(i) + sum(i-1) -!!$ !$ end do -!!$ !$OMP END SINGLE -!!$ -!!$ !$ if (work > 0) then -!!$ !$ saved_elem = a%irp(first_idx) -!!$ !$ end if -!!$ !$ if (ithread == 0) then -!!$ !$ a%irp(1) = 1 -!!$ !$ end if -!!$ -!!$ !$OMP BARRIER -!!$ -!!$ !$ if (work > 0) then -!!$ !$ old_val = a%irp(first_idx+1) -!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1) -!!$ !$ end if -!!$ -!!$ !$ do i=first_idx+2,last_idx+1 -!!$ !$ nxt_val = a%irp(i) -!!$ !$ a%irp(i) = a%irp(i-1) + old_val -!!$ !$ old_val = nxt_val -!!$ !$ end do -!!$ -!!$ !$OMP END PARALLEL -!!$ else - - do k=1,nza - i = itemp(k) - a%irp(i) = a%irp(i) + 1 - end do - ip = 1 - do i=1,nr - ncl = a%irp(i) - a%irp(i) = ip - ip = ip + ncl - end do - a%irp(nr+1) = ip -!!$ end if +#if defined(OPENMP) + maxthreads = omp_get_max_threads() + allocate(sum(maxthreads+1)) + sum(:) = 0 + sum(1) = 1 + + !$OMP PARALLEL default(none) & + !$OMP shared(nza,itemp,a,nthreads,sum,nr) & + !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) + + !$OMP DO schedule(STATIC) & + !$OMP private(k,i) + do k=1,nza + i = itemp(k) + a%irp(i) = a%irp(i) + 1 + end do + !$OMP END DO + + !$OMP SINGLE + nthreads = omp_get_num_threads() + !$OMP END SINGLE + + ithread = omp_get_thread_num() + + work = nr/nthreads + if (ithread < MOD(nr,nthreads)) then + work = work + 1 + first_idx = ithread*work + 1 + else + first_idx = ithread*work + MOD(nr,nthreads) + 1 + end if + + last_idx = first_idx + work - 1 + + s = 0 + do i=first_idx,last_idx + s = s + a%irp(i) + end do + if (work > 0) then + sum(ithread+2) = s + end if + + !$OMP BARRIER + + !$OMP SINGLE + do i=2,nthreads+1 + sum(i) = sum(i) + sum(i-1) + end do + !$OMP END SINGLE + + if (work > 0) then + saved_elem = a%irp(first_idx) + end if + if (ithread == 0) then + a%irp(1) = 1 + end if + + !$OMP BARRIER + + if (work > 0) then + old_val = a%irp(first_idx+1) + a%irp(first_idx+1) = saved_elem + sum(ithread+1) + end if + + do i=first_idx+2,last_idx+1 + nxt_val = a%irp(i) + a%irp(i) = a%irp(i-1) + old_val + old_val = nxt_val + end do + + !$OMP END PARALLEL +#else + + do k=1,nza + i = itemp(k) + a%irp(i) = a%irp(i) + 1 + end do + ip = 1 + do i=1,nr + ncl = a%irp(i) + a%irp(i) = ip + ip = ip + ncl + end do + a%irp(nr+1) = ip +#endif call a%set_host() - - + end subroutine psb_c_cp_csr_from_coo @@ -3089,6 +3089,9 @@ subroutine psb_c_mv_csr_from_coo(a,b,info) use psb_error_mod use psb_c_base_mat_mod use psb_c_csr_mat_mod, psb_protect_name => psb_c_mv_csr_from_coo +#if defined(OPENMP) + use omp_lib +#endif implicit none class(psb_c_csr_sparse_mat), intent(inout) :: a @@ -3102,12 +3105,12 @@ subroutine psb_c_mv_csr_from_coo(a,b,info) integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name='mv_from_coo' - logical :: use_openmp = .false. - ! $ integer(psb_ipk_), allocatable :: sum(:) - ! $ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s - ! $ integer(psb_ipk_) :: nxt_val,old_val,saved_elem - ! $ use_openmp = .true. +#if defined(OPENMP) + integer(psb_ipk_), allocatable :: sum(:) + integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s + integer(psb_ipk_) :: nxt_val,old_val,saved_elem +#endif info = psb_success_ @@ -3135,74 +3138,74 @@ subroutine psb_c_mv_csr_from_coo(a,b,info) a%irp(:) = 0 -!!$ if (use_openmp) then -!!$ !$OMP PARALLEL default(none) & -!!$ !$OMP shared(sum,nthreads,nr,a,itemp,nza) & -!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) -!!$ -!!$ !$OMP DO schedule(STATIC) & -!!$ !$OMP private(k,i) -!!$ do k=1,nza -!!$ i = itemp(k) -!!$ a%irp(i) = a%irp(i) + 1 -!!$ end do -!!$ !$OMP END DO -!!$ -!!$ !$OMP SINGLE -!!$ !$ nthreads = omp_get_num_threads() -!!$ !$ allocate(sum(nthreads+1)) -!!$ !$ sum(:) = 0 -!!$ !$ sum(1) = 1 -!!$ !$OMP END SINGLE -!!$ -!!$ !$ ithread = omp_get_thread_num() -!!$ -!!$ !$ work = nr/nthreads -!!$ !$ if (ithread < MOD(nr,nthreads)) then -!!$ !$ work = work + 1 -!!$ !$ first_idx = ithread*work + 1 -!!$ !$ else -!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1 -!!$ !$ end if -!!$ -!!$ !$ last_idx = first_idx + work - 1 -!!$ -!!$ !$ s = 0 -!!$ !$ do i=first_idx,last_idx -!!$ !$ s = s + a%irp(i) -!!$ !$ end do -!!$ !$ if (work > 0) then -!!$ !$ sum(ithread+2) = s -!!$ !$ end if -!!$ -!!$ !$OMP BARRIER -!!$ -!!$ !$OMP SINGLE -!!$ !$ do i=2,nthreads+1 -!!$ !$ sum(i) = sum(i) + sum(i-1) -!!$ !$ end do -!!$ !$OMP END SINGLE -!!$ -!!$ !$ if (work > 0) then -!!$ !$ saved_elem = a%irp(first_idx) -!!$ !$ end if -!!$ !$ if (ithread == 0) then -!!$ !$ a%irp(1) = 1 -!!$ !$ end if -!!$ -!!$ !$ if (work > 0) then -!!$ !$ old_val = a%irp(first_idx+1) -!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1) -!!$ !$ end if -!!$ -!!$ !$ do i=first_idx+2,last_idx+1 -!!$ !$ nxt_val = a%irp(i) -!!$ !$ a%irp(i) = a%irp(i-1) + old_val -!!$ !$ old_val = nxt_val -!!$ !$ end do -!!$ -!!$ !$OMP END PARALLEL -!!$ else +#if defined(OPENMP) + !$OMP PARALLEL default(none) & + !$OMP shared(sum,nthreads,nr,a,itemp,nza) & + !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) + + !$OMP DO schedule(STATIC) & + !$OMP private(k,i) + do k=1,nza + i = itemp(k) + a%irp(i) = a%irp(i) + 1 + end do + !$OMP END DO + + !$OMP SINGLE + nthreads = omp_get_num_threads() + allocate(sum(nthreads+1)) + sum(:) = 0 + sum(1) = 1 + !$OMP END SINGLE + + ithread = omp_get_thread_num() + + work = nr/nthreads + if (ithread < MOD(nr,nthreads)) then + work = work + 1 + first_idx = ithread*work + 1 + else + first_idx = ithread*work + MOD(nr,nthreads) + 1 + end if + + last_idx = first_idx + work - 1 + + s = 0 + do i=first_idx,last_idx + s = s + a%irp(i) + end do + if (work > 0) then + sum(ithread+2) = s + end if + + !$OMP BARRIER + + !$OMP SINGLE + do i=2,nthreads+1 + sum(i) = sum(i) + sum(i-1) + end do + !$OMP END SINGLE + + if (work > 0) then + saved_elem = a%irp(first_idx) + end if + if (ithread == 0) then + a%irp(1) = 1 + end if + + if (work > 0) then + old_val = a%irp(first_idx+1) + a%irp(first_idx+1) = saved_elem + sum(ithread+1) + end if + + do i=first_idx+2,last_idx+1 + nxt_val = a%irp(i) + a%irp(i) = a%irp(i-1) + old_val + old_val = nxt_val + end do + + !$OMP END PARALLEL +#else do k=1,nza i = itemp(k) a%irp(i) = a%irp(i) + 1 @@ -3214,7 +3217,7 @@ subroutine psb_c_mv_csr_from_coo(a,b,info) ip = ip + ncl end do a%irp(nr+1) = ip -!!$ end if +#endif call a%set_host() diff --git a/base/serial/impl/psb_d_csr_impl.f90 b/base/serial/impl/psb_d_csr_impl.F90 similarity index 96% rename from base/serial/impl/psb_d_csr_impl.f90 rename to base/serial/impl/psb_d_csr_impl.F90 index a927b1a9..3aadde20 100644 --- a/base/serial/impl/psb_d_csr_impl.f90 +++ b/base/serial/impl/psb_d_csr_impl.F90 @@ -2837,6 +2837,9 @@ subroutine psb_d_cp_csr_from_coo(a,b,info) use psb_realloc_mod use psb_d_base_mat_mod use psb_d_csr_mat_mod, psb_protect_name => psb_d_cp_csr_from_coo +#if defined(OPENMP) + use omp_lib +#endif implicit none class(psb_d_csr_sparse_mat), intent(inout) :: a @@ -2851,13 +2854,11 @@ subroutine psb_d_cp_csr_from_coo(a,b,info) integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name='d_cp_csr_from_coo' - logical :: use_openmp = .false. - - !$ integer(psb_ipk_), allocatable :: sum(:) - !$ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j - !$ integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads - !$ use_openmp = .true. - +#if defined(OPENMP) + integer(psb_ipk_), allocatable :: sum(:) + integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j + integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads +#endif info = psb_success_ debug_unit = psb_get_debug_unit() @@ -2902,94 +2903,93 @@ subroutine psb_d_cp_csr_from_coo(a,b,info) a%irp(:) = 0 -!!$ if (use_openmp) then -!!$ !$ maxthreads = omp_get_max_threads() -!!$ !$ allocate(sum(maxthreads+1)) -!!$ !$ sum(:) = 0 -!!$ !$ sum(1) = 1 -!!$ -!!$ !$OMP PARALLEL default(none) & -!!$ !$OMP shared(nza,itemp,a,nthreads,sum,nr) & -!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) -!!$ -!!$ !$OMP DO schedule(STATIC) & -!!$ !$OMP private(k,i) -!!$ do k=1,nza -!!$ i = itemp(k) -!!$ a%irp(i) = a%irp(i) + 1 -!!$ end do -!!$ !$OMP END DO -!!$ -!!$ !$OMP SINGLE -!!$ !$ nthreads = omp_get_num_threads() -!!$ !$OMP END SINGLE -!!$ -!!$ !$ ithread = omp_get_thread_num() -!!$ -!!$ !$ work = nr/nthreads -!!$ !$ if (ithread < MOD(nr,nthreads)) then -!!$ !$ work = work + 1 -!!$ !$ first_idx = ithread*work + 1 -!!$ !$ else -!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1 -!!$ !$ end if -!!$ -!!$ !$ last_idx = first_idx + work - 1 -!!$ -!!$ !$ s = 0 -!!$ !$ do i=first_idx,last_idx -!!$ !$ s = s + a%irp(i) -!!$ !$ end do -!!$ !$ if (work > 0) then -!!$ !$ sum(ithread+2) = s -!!$ !$ end if -!!$ -!!$ !$OMP BARRIER -!!$ -!!$ !$OMP SINGLE -!!$ !$ do i=2,nthreads+1 -!!$ !$ sum(i) = sum(i) + sum(i-1) -!!$ !$ end do -!!$ !$OMP END SINGLE -!!$ -!!$ !$ if (work > 0) then -!!$ !$ saved_elem = a%irp(first_idx) -!!$ !$ end if -!!$ !$ if (ithread == 0) then -!!$ !$ a%irp(1) = 1 -!!$ !$ end if -!!$ -!!$ !$OMP BARRIER -!!$ -!!$ !$ if (work > 0) then -!!$ !$ old_val = a%irp(first_idx+1) -!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1) -!!$ !$ end if -!!$ -!!$ !$ do i=first_idx+2,last_idx+1 -!!$ !$ nxt_val = a%irp(i) -!!$ !$ a%irp(i) = a%irp(i-1) + old_val -!!$ !$ old_val = nxt_val -!!$ !$ end do -!!$ -!!$ !$OMP END PARALLEL -!!$ else - - do k=1,nza - i = itemp(k) - a%irp(i) = a%irp(i) + 1 - end do - ip = 1 - do i=1,nr - ncl = a%irp(i) - a%irp(i) = ip - ip = ip + ncl - end do - a%irp(nr+1) = ip -!!$ end if +#if defined(OPENMP) + maxthreads = omp_get_max_threads() + allocate(sum(maxthreads+1)) + sum(:) = 0 + sum(1) = 1 + + !$OMP PARALLEL default(none) & + !$OMP shared(nza,itemp,a,nthreads,sum,nr) & + !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) + + !$OMP DO schedule(STATIC) & + !$OMP private(k,i) + do k=1,nza + i = itemp(k) + a%irp(i) = a%irp(i) + 1 + end do + !$OMP END DO + + !$OMP SINGLE + nthreads = omp_get_num_threads() + !$OMP END SINGLE + + ithread = omp_get_thread_num() + + work = nr/nthreads + if (ithread < MOD(nr,nthreads)) then + work = work + 1 + first_idx = ithread*work + 1 + else + first_idx = ithread*work + MOD(nr,nthreads) + 1 + end if + + last_idx = first_idx + work - 1 + + s = 0 + do i=first_idx,last_idx + s = s + a%irp(i) + end do + if (work > 0) then + sum(ithread+2) = s + end if + + !$OMP BARRIER + + !$OMP SINGLE + do i=2,nthreads+1 + sum(i) = sum(i) + sum(i-1) + end do + !$OMP END SINGLE + + if (work > 0) then + saved_elem = a%irp(first_idx) + end if + if (ithread == 0) then + a%irp(1) = 1 + end if + + !$OMP BARRIER + + if (work > 0) then + old_val = a%irp(first_idx+1) + a%irp(first_idx+1) = saved_elem + sum(ithread+1) + end if + + do i=first_idx+2,last_idx+1 + nxt_val = a%irp(i) + a%irp(i) = a%irp(i-1) + old_val + old_val = nxt_val + end do + + !$OMP END PARALLEL +#else + + do k=1,nza + i = itemp(k) + a%irp(i) = a%irp(i) + 1 + end do + ip = 1 + do i=1,nr + ncl = a%irp(i) + a%irp(i) = ip + ip = ip + ncl + end do + a%irp(nr+1) = ip +#endif call a%set_host() - - + end subroutine psb_d_cp_csr_from_coo @@ -3089,6 +3089,9 @@ subroutine psb_d_mv_csr_from_coo(a,b,info) use psb_error_mod use psb_d_base_mat_mod use psb_d_csr_mat_mod, psb_protect_name => psb_d_mv_csr_from_coo +#if defined(OPENMP) + use omp_lib +#endif implicit none class(psb_d_csr_sparse_mat), intent(inout) :: a @@ -3102,12 +3105,12 @@ subroutine psb_d_mv_csr_from_coo(a,b,info) integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name='mv_from_coo' - logical :: use_openmp = .false. - ! $ integer(psb_ipk_), allocatable :: sum(:) - ! $ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s - ! $ integer(psb_ipk_) :: nxt_val,old_val,saved_elem - ! $ use_openmp = .true. +#if defined(OPENMP) + integer(psb_ipk_), allocatable :: sum(:) + integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s + integer(psb_ipk_) :: nxt_val,old_val,saved_elem +#endif info = psb_success_ @@ -3135,74 +3138,74 @@ subroutine psb_d_mv_csr_from_coo(a,b,info) a%irp(:) = 0 -!!$ if (use_openmp) then -!!$ !$OMP PARALLEL default(none) & -!!$ !$OMP shared(sum,nthreads,nr,a,itemp,nza) & -!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) -!!$ -!!$ !$OMP DO schedule(STATIC) & -!!$ !$OMP private(k,i) -!!$ do k=1,nza -!!$ i = itemp(k) -!!$ a%irp(i) = a%irp(i) + 1 -!!$ end do -!!$ !$OMP END DO -!!$ -!!$ !$OMP SINGLE -!!$ !$ nthreads = omp_get_num_threads() -!!$ !$ allocate(sum(nthreads+1)) -!!$ !$ sum(:) = 0 -!!$ !$ sum(1) = 1 -!!$ !$OMP END SINGLE -!!$ -!!$ !$ ithread = omp_get_thread_num() -!!$ -!!$ !$ work = nr/nthreads -!!$ !$ if (ithread < MOD(nr,nthreads)) then -!!$ !$ work = work + 1 -!!$ !$ first_idx = ithread*work + 1 -!!$ !$ else -!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1 -!!$ !$ end if -!!$ -!!$ !$ last_idx = first_idx + work - 1 -!!$ -!!$ !$ s = 0 -!!$ !$ do i=first_idx,last_idx -!!$ !$ s = s + a%irp(i) -!!$ !$ end do -!!$ !$ if (work > 0) then -!!$ !$ sum(ithread+2) = s -!!$ !$ end if -!!$ -!!$ !$OMP BARRIER -!!$ -!!$ !$OMP SINGLE -!!$ !$ do i=2,nthreads+1 -!!$ !$ sum(i) = sum(i) + sum(i-1) -!!$ !$ end do -!!$ !$OMP END SINGLE -!!$ -!!$ !$ if (work > 0) then -!!$ !$ saved_elem = a%irp(first_idx) -!!$ !$ end if -!!$ !$ if (ithread == 0) then -!!$ !$ a%irp(1) = 1 -!!$ !$ end if -!!$ -!!$ !$ if (work > 0) then -!!$ !$ old_val = a%irp(first_idx+1) -!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1) -!!$ !$ end if -!!$ -!!$ !$ do i=first_idx+2,last_idx+1 -!!$ !$ nxt_val = a%irp(i) -!!$ !$ a%irp(i) = a%irp(i-1) + old_val -!!$ !$ old_val = nxt_val -!!$ !$ end do -!!$ -!!$ !$OMP END PARALLEL -!!$ else +#if defined(OPENMP) + !$OMP PARALLEL default(none) & + !$OMP shared(sum,nthreads,nr,a,itemp,nza) & + !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) + + !$OMP DO schedule(STATIC) & + !$OMP private(k,i) + do k=1,nza + i = itemp(k) + a%irp(i) = a%irp(i) + 1 + end do + !$OMP END DO + + !$OMP SINGLE + nthreads = omp_get_num_threads() + allocate(sum(nthreads+1)) + sum(:) = 0 + sum(1) = 1 + !$OMP END SINGLE + + ithread = omp_get_thread_num() + + work = nr/nthreads + if (ithread < MOD(nr,nthreads)) then + work = work + 1 + first_idx = ithread*work + 1 + else + first_idx = ithread*work + MOD(nr,nthreads) + 1 + end if + + last_idx = first_idx + work - 1 + + s = 0 + do i=first_idx,last_idx + s = s + a%irp(i) + end do + if (work > 0) then + sum(ithread+2) = s + end if + + !$OMP BARRIER + + !$OMP SINGLE + do i=2,nthreads+1 + sum(i) = sum(i) + sum(i-1) + end do + !$OMP END SINGLE + + if (work > 0) then + saved_elem = a%irp(first_idx) + end if + if (ithread == 0) then + a%irp(1) = 1 + end if + + if (work > 0) then + old_val = a%irp(first_idx+1) + a%irp(first_idx+1) = saved_elem + sum(ithread+1) + end if + + do i=first_idx+2,last_idx+1 + nxt_val = a%irp(i) + a%irp(i) = a%irp(i-1) + old_val + old_val = nxt_val + end do + + !$OMP END PARALLEL +#else do k=1,nza i = itemp(k) a%irp(i) = a%irp(i) + 1 @@ -3214,7 +3217,7 @@ subroutine psb_d_mv_csr_from_coo(a,b,info) ip = ip + ncl end do a%irp(nr+1) = ip -!!$ end if +#endif call a%set_host() diff --git a/base/serial/impl/psb_s_csr_impl.f90 b/base/serial/impl/psb_s_csr_impl.F90 similarity index 96% rename from base/serial/impl/psb_s_csr_impl.f90 rename to base/serial/impl/psb_s_csr_impl.F90 index b2d9dd48..a554e13b 100644 --- a/base/serial/impl/psb_s_csr_impl.f90 +++ b/base/serial/impl/psb_s_csr_impl.F90 @@ -2837,6 +2837,9 @@ subroutine psb_s_cp_csr_from_coo(a,b,info) use psb_realloc_mod use psb_s_base_mat_mod use psb_s_csr_mat_mod, psb_protect_name => psb_s_cp_csr_from_coo +#if defined(OPENMP) + use omp_lib +#endif implicit none class(psb_s_csr_sparse_mat), intent(inout) :: a @@ -2851,13 +2854,11 @@ subroutine psb_s_cp_csr_from_coo(a,b,info) integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name='s_cp_csr_from_coo' - logical :: use_openmp = .false. - - !$ integer(psb_ipk_), allocatable :: sum(:) - !$ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j - !$ integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads - !$ use_openmp = .true. - +#if defined(OPENMP) + integer(psb_ipk_), allocatable :: sum(:) + integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j + integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads +#endif info = psb_success_ debug_unit = psb_get_debug_unit() @@ -2902,94 +2903,93 @@ subroutine psb_s_cp_csr_from_coo(a,b,info) a%irp(:) = 0 -!!$ if (use_openmp) then -!!$ !$ maxthreads = omp_get_max_threads() -!!$ !$ allocate(sum(maxthreads+1)) -!!$ !$ sum(:) = 0 -!!$ !$ sum(1) = 1 -!!$ -!!$ !$OMP PARALLEL default(none) & -!!$ !$OMP shared(nza,itemp,a,nthreads,sum,nr) & -!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) -!!$ -!!$ !$OMP DO schedule(STATIC) & -!!$ !$OMP private(k,i) -!!$ do k=1,nza -!!$ i = itemp(k) -!!$ a%irp(i) = a%irp(i) + 1 -!!$ end do -!!$ !$OMP END DO -!!$ -!!$ !$OMP SINGLE -!!$ !$ nthreads = omp_get_num_threads() -!!$ !$OMP END SINGLE -!!$ -!!$ !$ ithread = omp_get_thread_num() -!!$ -!!$ !$ work = nr/nthreads -!!$ !$ if (ithread < MOD(nr,nthreads)) then -!!$ !$ work = work + 1 -!!$ !$ first_idx = ithread*work + 1 -!!$ !$ else -!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1 -!!$ !$ end if -!!$ -!!$ !$ last_idx = first_idx + work - 1 -!!$ -!!$ !$ s = 0 -!!$ !$ do i=first_idx,last_idx -!!$ !$ s = s + a%irp(i) -!!$ !$ end do -!!$ !$ if (work > 0) then -!!$ !$ sum(ithread+2) = s -!!$ !$ end if -!!$ -!!$ !$OMP BARRIER -!!$ -!!$ !$OMP SINGLE -!!$ !$ do i=2,nthreads+1 -!!$ !$ sum(i) = sum(i) + sum(i-1) -!!$ !$ end do -!!$ !$OMP END SINGLE -!!$ -!!$ !$ if (work > 0) then -!!$ !$ saved_elem = a%irp(first_idx) -!!$ !$ end if -!!$ !$ if (ithread == 0) then -!!$ !$ a%irp(1) = 1 -!!$ !$ end if -!!$ -!!$ !$OMP BARRIER -!!$ -!!$ !$ if (work > 0) then -!!$ !$ old_val = a%irp(first_idx+1) -!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1) -!!$ !$ end if -!!$ -!!$ !$ do i=first_idx+2,last_idx+1 -!!$ !$ nxt_val = a%irp(i) -!!$ !$ a%irp(i) = a%irp(i-1) + old_val -!!$ !$ old_val = nxt_val -!!$ !$ end do -!!$ -!!$ !$OMP END PARALLEL -!!$ else - - do k=1,nza - i = itemp(k) - a%irp(i) = a%irp(i) + 1 - end do - ip = 1 - do i=1,nr - ncl = a%irp(i) - a%irp(i) = ip - ip = ip + ncl - end do - a%irp(nr+1) = ip -!!$ end if +#if defined(OPENMP) + maxthreads = omp_get_max_threads() + allocate(sum(maxthreads+1)) + sum(:) = 0 + sum(1) = 1 + + !$OMP PARALLEL default(none) & + !$OMP shared(nza,itemp,a,nthreads,sum,nr) & + !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) + + !$OMP DO schedule(STATIC) & + !$OMP private(k,i) + do k=1,nza + i = itemp(k) + a%irp(i) = a%irp(i) + 1 + end do + !$OMP END DO + + !$OMP SINGLE + nthreads = omp_get_num_threads() + !$OMP END SINGLE + + ithread = omp_get_thread_num() + + work = nr/nthreads + if (ithread < MOD(nr,nthreads)) then + work = work + 1 + first_idx = ithread*work + 1 + else + first_idx = ithread*work + MOD(nr,nthreads) + 1 + end if + + last_idx = first_idx + work - 1 + + s = 0 + do i=first_idx,last_idx + s = s + a%irp(i) + end do + if (work > 0) then + sum(ithread+2) = s + end if + + !$OMP BARRIER + + !$OMP SINGLE + do i=2,nthreads+1 + sum(i) = sum(i) + sum(i-1) + end do + !$OMP END SINGLE + + if (work > 0) then + saved_elem = a%irp(first_idx) + end if + if (ithread == 0) then + a%irp(1) = 1 + end if + + !$OMP BARRIER + + if (work > 0) then + old_val = a%irp(first_idx+1) + a%irp(first_idx+1) = saved_elem + sum(ithread+1) + end if + + do i=first_idx+2,last_idx+1 + nxt_val = a%irp(i) + a%irp(i) = a%irp(i-1) + old_val + old_val = nxt_val + end do + + !$OMP END PARALLEL +#else + + do k=1,nza + i = itemp(k) + a%irp(i) = a%irp(i) + 1 + end do + ip = 1 + do i=1,nr + ncl = a%irp(i) + a%irp(i) = ip + ip = ip + ncl + end do + a%irp(nr+1) = ip +#endif call a%set_host() - - + end subroutine psb_s_cp_csr_from_coo @@ -3089,6 +3089,9 @@ subroutine psb_s_mv_csr_from_coo(a,b,info) use psb_error_mod use psb_s_base_mat_mod use psb_s_csr_mat_mod, psb_protect_name => psb_s_mv_csr_from_coo +#if defined(OPENMP) + use omp_lib +#endif implicit none class(psb_s_csr_sparse_mat), intent(inout) :: a @@ -3102,12 +3105,12 @@ subroutine psb_s_mv_csr_from_coo(a,b,info) integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name='mv_from_coo' - logical :: use_openmp = .false. - ! $ integer(psb_ipk_), allocatable :: sum(:) - ! $ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s - ! $ integer(psb_ipk_) :: nxt_val,old_val,saved_elem - ! $ use_openmp = .true. +#if defined(OPENMP) + integer(psb_ipk_), allocatable :: sum(:) + integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s + integer(psb_ipk_) :: nxt_val,old_val,saved_elem +#endif info = psb_success_ @@ -3135,74 +3138,74 @@ subroutine psb_s_mv_csr_from_coo(a,b,info) a%irp(:) = 0 -!!$ if (use_openmp) then -!!$ !$OMP PARALLEL default(none) & -!!$ !$OMP shared(sum,nthreads,nr,a,itemp,nza) & -!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) -!!$ -!!$ !$OMP DO schedule(STATIC) & -!!$ !$OMP private(k,i) -!!$ do k=1,nza -!!$ i = itemp(k) -!!$ a%irp(i) = a%irp(i) + 1 -!!$ end do -!!$ !$OMP END DO -!!$ -!!$ !$OMP SINGLE -!!$ !$ nthreads = omp_get_num_threads() -!!$ !$ allocate(sum(nthreads+1)) -!!$ !$ sum(:) = 0 -!!$ !$ sum(1) = 1 -!!$ !$OMP END SINGLE -!!$ -!!$ !$ ithread = omp_get_thread_num() -!!$ -!!$ !$ work = nr/nthreads -!!$ !$ if (ithread < MOD(nr,nthreads)) then -!!$ !$ work = work + 1 -!!$ !$ first_idx = ithread*work + 1 -!!$ !$ else -!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1 -!!$ !$ end if -!!$ -!!$ !$ last_idx = first_idx + work - 1 -!!$ -!!$ !$ s = 0 -!!$ !$ do i=first_idx,last_idx -!!$ !$ s = s + a%irp(i) -!!$ !$ end do -!!$ !$ if (work > 0) then -!!$ !$ sum(ithread+2) = s -!!$ !$ end if -!!$ -!!$ !$OMP BARRIER -!!$ -!!$ !$OMP SINGLE -!!$ !$ do i=2,nthreads+1 -!!$ !$ sum(i) = sum(i) + sum(i-1) -!!$ !$ end do -!!$ !$OMP END SINGLE -!!$ -!!$ !$ if (work > 0) then -!!$ !$ saved_elem = a%irp(first_idx) -!!$ !$ end if -!!$ !$ if (ithread == 0) then -!!$ !$ a%irp(1) = 1 -!!$ !$ end if -!!$ -!!$ !$ if (work > 0) then -!!$ !$ old_val = a%irp(first_idx+1) -!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1) -!!$ !$ end if -!!$ -!!$ !$ do i=first_idx+2,last_idx+1 -!!$ !$ nxt_val = a%irp(i) -!!$ !$ a%irp(i) = a%irp(i-1) + old_val -!!$ !$ old_val = nxt_val -!!$ !$ end do -!!$ -!!$ !$OMP END PARALLEL -!!$ else +#if defined(OPENMP) + !$OMP PARALLEL default(none) & + !$OMP shared(sum,nthreads,nr,a,itemp,nza) & + !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) + + !$OMP DO schedule(STATIC) & + !$OMP private(k,i) + do k=1,nza + i = itemp(k) + a%irp(i) = a%irp(i) + 1 + end do + !$OMP END DO + + !$OMP SINGLE + nthreads = omp_get_num_threads() + allocate(sum(nthreads+1)) + sum(:) = 0 + sum(1) = 1 + !$OMP END SINGLE + + ithread = omp_get_thread_num() + + work = nr/nthreads + if (ithread < MOD(nr,nthreads)) then + work = work + 1 + first_idx = ithread*work + 1 + else + first_idx = ithread*work + MOD(nr,nthreads) + 1 + end if + + last_idx = first_idx + work - 1 + + s = 0 + do i=first_idx,last_idx + s = s + a%irp(i) + end do + if (work > 0) then + sum(ithread+2) = s + end if + + !$OMP BARRIER + + !$OMP SINGLE + do i=2,nthreads+1 + sum(i) = sum(i) + sum(i-1) + end do + !$OMP END SINGLE + + if (work > 0) then + saved_elem = a%irp(first_idx) + end if + if (ithread == 0) then + a%irp(1) = 1 + end if + + if (work > 0) then + old_val = a%irp(first_idx+1) + a%irp(first_idx+1) = saved_elem + sum(ithread+1) + end if + + do i=first_idx+2,last_idx+1 + nxt_val = a%irp(i) + a%irp(i) = a%irp(i-1) + old_val + old_val = nxt_val + end do + + !$OMP END PARALLEL +#else do k=1,nza i = itemp(k) a%irp(i) = a%irp(i) + 1 @@ -3214,7 +3217,7 @@ subroutine psb_s_mv_csr_from_coo(a,b,info) ip = ip + ncl end do a%irp(nr+1) = ip -!!$ end if +#endif call a%set_host() diff --git a/base/serial/impl/psb_z_csr_impl.f90 b/base/serial/impl/psb_z_csr_impl.F90 similarity index 96% rename from base/serial/impl/psb_z_csr_impl.f90 rename to base/serial/impl/psb_z_csr_impl.F90 index ba2d322f..b2b0d3d1 100644 --- a/base/serial/impl/psb_z_csr_impl.f90 +++ b/base/serial/impl/psb_z_csr_impl.F90 @@ -2837,6 +2837,9 @@ subroutine psb_z_cp_csr_from_coo(a,b,info) use psb_realloc_mod use psb_z_base_mat_mod use psb_z_csr_mat_mod, psb_protect_name => psb_z_cp_csr_from_coo +#if defined(OPENMP) + use omp_lib +#endif implicit none class(psb_z_csr_sparse_mat), intent(inout) :: a @@ -2851,13 +2854,11 @@ subroutine psb_z_cp_csr_from_coo(a,b,info) integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name='z_cp_csr_from_coo' - logical :: use_openmp = .false. - - !$ integer(psb_ipk_), allocatable :: sum(:) - !$ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j - !$ integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads - !$ use_openmp = .true. - +#if defined(OPENMP) + integer(psb_ipk_), allocatable :: sum(:) + integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j + integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads +#endif info = psb_success_ debug_unit = psb_get_debug_unit() @@ -2902,94 +2903,93 @@ subroutine psb_z_cp_csr_from_coo(a,b,info) a%irp(:) = 0 -!!$ if (use_openmp) then -!!$ !$ maxthreads = omp_get_max_threads() -!!$ !$ allocate(sum(maxthreads+1)) -!!$ !$ sum(:) = 0 -!!$ !$ sum(1) = 1 -!!$ -!!$ !$OMP PARALLEL default(none) & -!!$ !$OMP shared(nza,itemp,a,nthreads,sum,nr) & -!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) -!!$ -!!$ !$OMP DO schedule(STATIC) & -!!$ !$OMP private(k,i) -!!$ do k=1,nza -!!$ i = itemp(k) -!!$ a%irp(i) = a%irp(i) + 1 -!!$ end do -!!$ !$OMP END DO -!!$ -!!$ !$OMP SINGLE -!!$ !$ nthreads = omp_get_num_threads() -!!$ !$OMP END SINGLE -!!$ -!!$ !$ ithread = omp_get_thread_num() -!!$ -!!$ !$ work = nr/nthreads -!!$ !$ if (ithread < MOD(nr,nthreads)) then -!!$ !$ work = work + 1 -!!$ !$ first_idx = ithread*work + 1 -!!$ !$ else -!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1 -!!$ !$ end if -!!$ -!!$ !$ last_idx = first_idx + work - 1 -!!$ -!!$ !$ s = 0 -!!$ !$ do i=first_idx,last_idx -!!$ !$ s = s + a%irp(i) -!!$ !$ end do -!!$ !$ if (work > 0) then -!!$ !$ sum(ithread+2) = s -!!$ !$ end if -!!$ -!!$ !$OMP BARRIER -!!$ -!!$ !$OMP SINGLE -!!$ !$ do i=2,nthreads+1 -!!$ !$ sum(i) = sum(i) + sum(i-1) -!!$ !$ end do -!!$ !$OMP END SINGLE -!!$ -!!$ !$ if (work > 0) then -!!$ !$ saved_elem = a%irp(first_idx) -!!$ !$ end if -!!$ !$ if (ithread == 0) then -!!$ !$ a%irp(1) = 1 -!!$ !$ end if -!!$ -!!$ !$OMP BARRIER -!!$ -!!$ !$ if (work > 0) then -!!$ !$ old_val = a%irp(first_idx+1) -!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1) -!!$ !$ end if -!!$ -!!$ !$ do i=first_idx+2,last_idx+1 -!!$ !$ nxt_val = a%irp(i) -!!$ !$ a%irp(i) = a%irp(i-1) + old_val -!!$ !$ old_val = nxt_val -!!$ !$ end do -!!$ -!!$ !$OMP END PARALLEL -!!$ else - - do k=1,nza - i = itemp(k) - a%irp(i) = a%irp(i) + 1 - end do - ip = 1 - do i=1,nr - ncl = a%irp(i) - a%irp(i) = ip - ip = ip + ncl - end do - a%irp(nr+1) = ip -!!$ end if +#if defined(OPENMP) + maxthreads = omp_get_max_threads() + allocate(sum(maxthreads+1)) + sum(:) = 0 + sum(1) = 1 + + !$OMP PARALLEL default(none) & + !$OMP shared(nza,itemp,a,nthreads,sum,nr) & + !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) + + !$OMP DO schedule(STATIC) & + !$OMP private(k,i) + do k=1,nza + i = itemp(k) + a%irp(i) = a%irp(i) + 1 + end do + !$OMP END DO + + !$OMP SINGLE + nthreads = omp_get_num_threads() + !$OMP END SINGLE + + ithread = omp_get_thread_num() + + work = nr/nthreads + if (ithread < MOD(nr,nthreads)) then + work = work + 1 + first_idx = ithread*work + 1 + else + first_idx = ithread*work + MOD(nr,nthreads) + 1 + end if + + last_idx = first_idx + work - 1 + + s = 0 + do i=first_idx,last_idx + s = s + a%irp(i) + end do + if (work > 0) then + sum(ithread+2) = s + end if + + !$OMP BARRIER + + !$OMP SINGLE + do i=2,nthreads+1 + sum(i) = sum(i) + sum(i-1) + end do + !$OMP END SINGLE + + if (work > 0) then + saved_elem = a%irp(first_idx) + end if + if (ithread == 0) then + a%irp(1) = 1 + end if + + !$OMP BARRIER + + if (work > 0) then + old_val = a%irp(first_idx+1) + a%irp(first_idx+1) = saved_elem + sum(ithread+1) + end if + + do i=first_idx+2,last_idx+1 + nxt_val = a%irp(i) + a%irp(i) = a%irp(i-1) + old_val + old_val = nxt_val + end do + + !$OMP END PARALLEL +#else + + do k=1,nza + i = itemp(k) + a%irp(i) = a%irp(i) + 1 + end do + ip = 1 + do i=1,nr + ncl = a%irp(i) + a%irp(i) = ip + ip = ip + ncl + end do + a%irp(nr+1) = ip +#endif call a%set_host() - - + end subroutine psb_z_cp_csr_from_coo @@ -3089,6 +3089,9 @@ subroutine psb_z_mv_csr_from_coo(a,b,info) use psb_error_mod use psb_z_base_mat_mod use psb_z_csr_mat_mod, psb_protect_name => psb_z_mv_csr_from_coo +#if defined(OPENMP) + use omp_lib +#endif implicit none class(psb_z_csr_sparse_mat), intent(inout) :: a @@ -3102,12 +3105,12 @@ subroutine psb_z_mv_csr_from_coo(a,b,info) integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name='mv_from_coo' - logical :: use_openmp = .false. - ! $ integer(psb_ipk_), allocatable :: sum(:) - ! $ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s - ! $ integer(psb_ipk_) :: nxt_val,old_val,saved_elem - ! $ use_openmp = .true. +#if defined(OPENMP) + integer(psb_ipk_), allocatable :: sum(:) + integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s + integer(psb_ipk_) :: nxt_val,old_val,saved_elem +#endif info = psb_success_ @@ -3135,74 +3138,74 @@ subroutine psb_z_mv_csr_from_coo(a,b,info) a%irp(:) = 0 -!!$ if (use_openmp) then -!!$ !$OMP PARALLEL default(none) & -!!$ !$OMP shared(sum,nthreads,nr,a,itemp,nza) & -!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) -!!$ -!!$ !$OMP DO schedule(STATIC) & -!!$ !$OMP private(k,i) -!!$ do k=1,nza -!!$ i = itemp(k) -!!$ a%irp(i) = a%irp(i) + 1 -!!$ end do -!!$ !$OMP END DO -!!$ -!!$ !$OMP SINGLE -!!$ !$ nthreads = omp_get_num_threads() -!!$ !$ allocate(sum(nthreads+1)) -!!$ !$ sum(:) = 0 -!!$ !$ sum(1) = 1 -!!$ !$OMP END SINGLE -!!$ -!!$ !$ ithread = omp_get_thread_num() -!!$ -!!$ !$ work = nr/nthreads -!!$ !$ if (ithread < MOD(nr,nthreads)) then -!!$ !$ work = work + 1 -!!$ !$ first_idx = ithread*work + 1 -!!$ !$ else -!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1 -!!$ !$ end if -!!$ -!!$ !$ last_idx = first_idx + work - 1 -!!$ -!!$ !$ s = 0 -!!$ !$ do i=first_idx,last_idx -!!$ !$ s = s + a%irp(i) -!!$ !$ end do -!!$ !$ if (work > 0) then -!!$ !$ sum(ithread+2) = s -!!$ !$ end if -!!$ -!!$ !$OMP BARRIER -!!$ -!!$ !$OMP SINGLE -!!$ !$ do i=2,nthreads+1 -!!$ !$ sum(i) = sum(i) + sum(i-1) -!!$ !$ end do -!!$ !$OMP END SINGLE -!!$ -!!$ !$ if (work > 0) then -!!$ !$ saved_elem = a%irp(first_idx) -!!$ !$ end if -!!$ !$ if (ithread == 0) then -!!$ !$ a%irp(1) = 1 -!!$ !$ end if -!!$ -!!$ !$ if (work > 0) then -!!$ !$ old_val = a%irp(first_idx+1) -!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1) -!!$ !$ end if -!!$ -!!$ !$ do i=first_idx+2,last_idx+1 -!!$ !$ nxt_val = a%irp(i) -!!$ !$ a%irp(i) = a%irp(i-1) + old_val -!!$ !$ old_val = nxt_val -!!$ !$ end do -!!$ -!!$ !$OMP END PARALLEL -!!$ else +#if defined(OPENMP) + !$OMP PARALLEL default(none) & + !$OMP shared(sum,nthreads,nr,a,itemp,nza) & + !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val) + + !$OMP DO schedule(STATIC) & + !$OMP private(k,i) + do k=1,nza + i = itemp(k) + a%irp(i) = a%irp(i) + 1 + end do + !$OMP END DO + + !$OMP SINGLE + nthreads = omp_get_num_threads() + allocate(sum(nthreads+1)) + sum(:) = 0 + sum(1) = 1 + !$OMP END SINGLE + + ithread = omp_get_thread_num() + + work = nr/nthreads + if (ithread < MOD(nr,nthreads)) then + work = work + 1 + first_idx = ithread*work + 1 + else + first_idx = ithread*work + MOD(nr,nthreads) + 1 + end if + + last_idx = first_idx + work - 1 + + s = 0 + do i=first_idx,last_idx + s = s + a%irp(i) + end do + if (work > 0) then + sum(ithread+2) = s + end if + + !$OMP BARRIER + + !$OMP SINGLE + do i=2,nthreads+1 + sum(i) = sum(i) + sum(i-1) + end do + !$OMP END SINGLE + + if (work > 0) then + saved_elem = a%irp(first_idx) + end if + if (ithread == 0) then + a%irp(1) = 1 + end if + + if (work > 0) then + old_val = a%irp(first_idx+1) + a%irp(first_idx+1) = saved_elem + sum(ithread+1) + end if + + do i=first_idx+2,last_idx+1 + nxt_val = a%irp(i) + a%irp(i) = a%irp(i-1) + old_val + old_val = nxt_val + end do + + !$OMP END PARALLEL +#else do k=1,nza i = itemp(k) a%irp(i) = a%irp(i) + 1 @@ -3214,7 +3217,7 @@ subroutine psb_z_mv_csr_from_coo(a,b,info) ip = ip + ncl end do a%irp(nr+1) = ip -!!$ end if +#endif call a%set_host() diff --git a/test/pargen/runs/ppde.inp b/test/pargen/runs/ppde.inp index f4b45430..996585dd 100644 --- a/test/pargen/runs/ppde.inp +++ b/test/pargen/runs/ppde.inp @@ -8,7 +8,7 @@ CSR Storage format for matrix A: CSR COO 0100 MAXIT 05 ITRACE 002 IRST restart for RGMRES and BiCGSTABL -ILU Block Solver ILU,ILUT,INVK,AINVT,AORTH +INVK Block Solver ILU,ILUT,INVK,AINVT,AORTH NONE If ILU : MILU or NONE othewise ignored NONE Scaling if ILUT: NONE, MAXVAL otherwise ignored 0 Level of fill for forward factorization