diff --git a/base/internals/psi_bld_tmphalo.f90 b/base/internals/psi_bld_tmphalo.f90 index 78ce69ed..9d3fb7ab 100644 --- a/base/internals/psi_bld_tmphalo.f90 +++ b/base/internals/psi_bld_tmphalo.f90 @@ -104,6 +104,7 @@ subroutine psi_bld_tmphalo(desc,info) call desc%indxmap%l2gip(helem(1:nh),info) if (info == psb_success_) call desc%indxmap%fnd_owner(helem(1:nh),hproc,info) + !write(0,*) 'bld_tmphalo calling set_owner',hproc(:) if (info == psb_success_) call desc%indxmap%set_halo_owner(hproc,info) if (info == psb_success_) call desc%indxmap%xtnd_p_adjcncy(hproc) diff --git a/base/modules/desc/psb_hash_map_mod.F90 b/base/modules/desc/psb_hash_map_mod.F90 index 5f32b2e1..82e1f012 100644 --- a/base/modules/desc/psb_hash_map_mod.F90 +++ b/base/modules/desc/psb_hash_map_mod.F90 @@ -655,7 +655,7 @@ contains integer(psb_ipk_) :: me, np character(len=20) :: name,ch_err logical, allocatable :: mask_(:) - logical :: use_openmp = .true. + logical :: use_openmp = .false. #ifdef OPENMP integer(kind = OMP_lock_kind) :: ins_lck #endif diff --git a/base/modules/desc/psb_hash_mod.F90 b/base/modules/desc/psb_hash_mod.F90 index 42911e3f..0c682670 100644 --- a/base/modules/desc/psb_hash_mod.F90 +++ b/base/modules/desc/psb_hash_mod.F90 @@ -388,7 +388,7 @@ contains info = HashOK hsize = hash%hsize hmask = hash%hmask - + val = -1 hk = iand(psb_hashval(key),hmask) if (hk == 0) then hd = 1 @@ -409,6 +409,7 @@ contains info = HashDuplicate return end if + !$OMP CRITICAL if (hash%table(hk,1) == HashFreeEntry) then if (hash%nk == hash%hsize -1) then ! @@ -420,19 +421,22 @@ contains call psb_hash_realloc(hash,info) if (info /= HashOk) then info = HashOutOfMemory - return + !return else call psb_hash_searchinskey(key,val,nextval,hash,info) - return + !return end if else hash%nk = hash%nk + 1 hash%table(hk,1) = key hash%table(hk,2) = nextval val = nextval - return + !return end if end if + !$OMP END CRITICAL + if (info /= HashOk) return + if (val > 0) return hk = hk - hd if (hk < 0) hk = hk + hsize end do @@ -448,7 +452,7 @@ contains info = HashOK hsize = hash%hsize hmask = hash%hmask - + hk = iand(psb_hashval(key),hmask) if (hk == 0) then hd = 1 @@ -460,7 +464,7 @@ contains info = HashOutOfMemory return end if - + val = -1 hash%nsrch = hash%nsrch + 1 do hash%nacc = hash%nacc + 1 @@ -469,6 +473,7 @@ contains info = HashDuplicate return end if + !$OMP CRITICAL if (hash%table(hk,1) == HashFreeEntry) then if (hash%nk == hash%hsize -1) then ! @@ -480,19 +485,22 @@ contains call psb_hash_realloc(hash,info) if (info /= HashOk) then info = HashOutOfMemory - return + !return else call psb_hash_searchinskey(key,val,nextval,hash,info) - return + !return end if else hash%nk = hash%nk + 1 hash%table(hk,1) = key hash%table(hk,2) = nextval val = nextval - return + !return end if end if + !$OMP END CRITICAL + if (info /= HashOk) return + if (val > 0) return hk = hk - hd if (hk < 0) hk = hk + hsize end do diff --git a/base/serial/impl/psb_c_csr_impl.F90 b/base/serial/impl/psb_c_csr_impl.F90 index fa5f2edf..a7869136 100644 --- a/base/serial/impl/psb_c_csr_impl.F90 +++ b/base/serial/impl/psb_c_csr_impl.F90 @@ -152,6 +152,7 @@ contains !$omp parallel do private(i,j, acc) schedule(static) do i=1,m acc = czero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -163,6 +164,7 @@ contains !$omp parallel do private(i,j, acc) do i=1,m acc = czero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -174,6 +176,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = czero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -189,6 +192,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = czero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -200,6 +204,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = czero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -211,6 +216,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = czero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -225,6 +231,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = czero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -236,6 +243,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = czero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -247,6 +255,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = czero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -261,6 +270,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = czero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -272,6 +282,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = czero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -283,6 +294,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = czero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -2862,6 +2874,8 @@ subroutine psb_c_cp_csr_from_coo(a,b,info) integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name='c_cp_csr_from_coo' + logical :: use_openmp = .false. + #if defined(OPENMP) integer(psb_ipk_), allocatable :: sum(:) integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j @@ -2983,21 +2997,22 @@ subroutine psb_c_cp_csr_from_coo(a,b,info) !$OMP END PARALLEL #else - - do k=1,nza - i = itemp(k) - a%irp(i) = a%irp(i) + 1 - end do - ip = 1 - do i=1,nr - ncl = a%irp(i) - a%irp(i) = ip - ip = ip + ncl - end do - a%irp(nr+1) = ip + + do k=1,nza + i = itemp(k) + a%irp(i) = a%irp(i) + 1 + end do + ip = 1 + do i=1,nr + ncl = a%irp(i) + a%irp(i) = ip + ip = ip + ncl + end do + a%irp(nr+1) = ip #endif call a%set_host() - + + end subroutine psb_c_cp_csr_from_coo @@ -3113,6 +3128,7 @@ subroutine psb_c_mv_csr_from_coo(a,b,info) integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name='mv_from_coo' + logical :: use_openmp = .false. #if defined(OPENMP) integer(psb_ipk_), allocatable :: sum(:) @@ -3120,7 +3136,6 @@ subroutine psb_c_mv_csr_from_coo(a,b,info) integer(psb_ipk_) :: nxt_val,old_val,saved_elem #endif - info = psb_success_ debug_unit = psb_get_debug_unit() debug_level = psb_get_debug_level() @@ -3214,6 +3229,7 @@ subroutine psb_c_mv_csr_from_coo(a,b,info) !$OMP END PARALLEL #else + do k=1,nza i = itemp(k) a%irp(i) = a%irp(i) + 1 diff --git a/base/serial/impl/psb_d_csr_impl.F90 b/base/serial/impl/psb_d_csr_impl.F90 index faf422ae..a2ddad30 100644 --- a/base/serial/impl/psb_d_csr_impl.F90 +++ b/base/serial/impl/psb_d_csr_impl.F90 @@ -152,6 +152,7 @@ contains !$omp parallel do private(i,j, acc) schedule(static) do i=1,m acc = dzero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -163,6 +164,7 @@ contains !$omp parallel do private(i,j, acc) do i=1,m acc = dzero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -174,6 +176,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = dzero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -189,6 +192,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = dzero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -200,6 +204,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = dzero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -211,6 +216,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = dzero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -225,6 +231,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = dzero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -236,6 +243,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = dzero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -247,6 +255,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = dzero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -261,6 +270,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = dzero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -272,6 +282,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = dzero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -283,6 +294,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = dzero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -2862,6 +2874,8 @@ subroutine psb_d_cp_csr_from_coo(a,b,info) integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name='d_cp_csr_from_coo' + logical :: use_openmp = .false. + #if defined(OPENMP) integer(psb_ipk_), allocatable :: sum(:) integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j @@ -2983,21 +2997,22 @@ subroutine psb_d_cp_csr_from_coo(a,b,info) !$OMP END PARALLEL #else - - do k=1,nza - i = itemp(k) - a%irp(i) = a%irp(i) + 1 - end do - ip = 1 - do i=1,nr - ncl = a%irp(i) - a%irp(i) = ip - ip = ip + ncl - end do - a%irp(nr+1) = ip + + do k=1,nza + i = itemp(k) + a%irp(i) = a%irp(i) + 1 + end do + ip = 1 + do i=1,nr + ncl = a%irp(i) + a%irp(i) = ip + ip = ip + ncl + end do + a%irp(nr+1) = ip #endif call a%set_host() - + + end subroutine psb_d_cp_csr_from_coo @@ -3113,6 +3128,7 @@ subroutine psb_d_mv_csr_from_coo(a,b,info) integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name='mv_from_coo' + logical :: use_openmp = .false. #if defined(OPENMP) integer(psb_ipk_), allocatable :: sum(:) @@ -3120,7 +3136,6 @@ subroutine psb_d_mv_csr_from_coo(a,b,info) integer(psb_ipk_) :: nxt_val,old_val,saved_elem #endif - info = psb_success_ debug_unit = psb_get_debug_unit() debug_level = psb_get_debug_level() @@ -3214,6 +3229,7 @@ subroutine psb_d_mv_csr_from_coo(a,b,info) !$OMP END PARALLEL #else + do k=1,nza i = itemp(k) a%irp(i) = a%irp(i) + 1 diff --git a/base/serial/impl/psb_s_csr_impl.F90 b/base/serial/impl/psb_s_csr_impl.F90 index 446e6791..a8c67c10 100644 --- a/base/serial/impl/psb_s_csr_impl.F90 +++ b/base/serial/impl/psb_s_csr_impl.F90 @@ -152,6 +152,7 @@ contains !$omp parallel do private(i,j, acc) schedule(static) do i=1,m acc = szero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -163,6 +164,7 @@ contains !$omp parallel do private(i,j, acc) do i=1,m acc = szero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -174,6 +176,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = szero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -189,6 +192,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = szero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -200,6 +204,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = szero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -211,6 +216,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = szero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -225,6 +231,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = szero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -236,6 +243,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = szero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -247,6 +255,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = szero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -261,6 +270,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = szero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -272,6 +282,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = szero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -283,6 +294,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = szero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -2862,6 +2874,8 @@ subroutine psb_s_cp_csr_from_coo(a,b,info) integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name='s_cp_csr_from_coo' + logical :: use_openmp = .false. + #if defined(OPENMP) integer(psb_ipk_), allocatable :: sum(:) integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j @@ -2983,21 +2997,22 @@ subroutine psb_s_cp_csr_from_coo(a,b,info) !$OMP END PARALLEL #else - - do k=1,nza - i = itemp(k) - a%irp(i) = a%irp(i) + 1 - end do - ip = 1 - do i=1,nr - ncl = a%irp(i) - a%irp(i) = ip - ip = ip + ncl - end do - a%irp(nr+1) = ip + + do k=1,nza + i = itemp(k) + a%irp(i) = a%irp(i) + 1 + end do + ip = 1 + do i=1,nr + ncl = a%irp(i) + a%irp(i) = ip + ip = ip + ncl + end do + a%irp(nr+1) = ip #endif call a%set_host() - + + end subroutine psb_s_cp_csr_from_coo @@ -3113,6 +3128,7 @@ subroutine psb_s_mv_csr_from_coo(a,b,info) integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name='mv_from_coo' + logical :: use_openmp = .false. #if defined(OPENMP) integer(psb_ipk_), allocatable :: sum(:) @@ -3120,7 +3136,6 @@ subroutine psb_s_mv_csr_from_coo(a,b,info) integer(psb_ipk_) :: nxt_val,old_val,saved_elem #endif - info = psb_success_ debug_unit = psb_get_debug_unit() debug_level = psb_get_debug_level() @@ -3214,6 +3229,7 @@ subroutine psb_s_mv_csr_from_coo(a,b,info) !$OMP END PARALLEL #else + do k=1,nza i = itemp(k) a%irp(i) = a%irp(i) + 1 diff --git a/base/serial/impl/psb_z_csr_impl.F90 b/base/serial/impl/psb_z_csr_impl.F90 index 5ec579d5..6344b268 100644 --- a/base/serial/impl/psb_z_csr_impl.F90 +++ b/base/serial/impl/psb_z_csr_impl.F90 @@ -152,6 +152,7 @@ contains !$omp parallel do private(i,j, acc) schedule(static) do i=1,m acc = zzero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -163,6 +164,7 @@ contains !$omp parallel do private(i,j, acc) do i=1,m acc = zzero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -174,6 +176,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = zzero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -189,6 +192,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = zzero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -200,6 +204,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = zzero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -211,6 +216,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = zzero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -225,6 +231,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = zzero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -236,6 +243,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = zzero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -247,6 +255,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = zzero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -261,6 +270,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = zzero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -272,6 +282,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = zzero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -283,6 +294,7 @@ contains !$omp parallel do private(i,j,acc) do i=1,m acc = zzero + !$omp simd do j=irp(i), irp(i+1)-1 acc = acc + val(j) * x(ja(j)) enddo @@ -2862,6 +2874,8 @@ subroutine psb_z_cp_csr_from_coo(a,b,info) integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name='z_cp_csr_from_coo' + logical :: use_openmp = .false. + #if defined(OPENMP) integer(psb_ipk_), allocatable :: sum(:) integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j @@ -2983,21 +2997,22 @@ subroutine psb_z_cp_csr_from_coo(a,b,info) !$OMP END PARALLEL #else - - do k=1,nza - i = itemp(k) - a%irp(i) = a%irp(i) + 1 - end do - ip = 1 - do i=1,nr - ncl = a%irp(i) - a%irp(i) = ip - ip = ip + ncl - end do - a%irp(nr+1) = ip + + do k=1,nza + i = itemp(k) + a%irp(i) = a%irp(i) + 1 + end do + ip = 1 + do i=1,nr + ncl = a%irp(i) + a%irp(i) = ip + ip = ip + ncl + end do + a%irp(nr+1) = ip #endif call a%set_host() - + + end subroutine psb_z_cp_csr_from_coo @@ -3113,6 +3128,7 @@ subroutine psb_z_mv_csr_from_coo(a,b,info) integer(psb_ipk_), Parameter :: maxtry=8 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name='mv_from_coo' + logical :: use_openmp = .false. #if defined(OPENMP) integer(psb_ipk_), allocatable :: sum(:) @@ -3120,7 +3136,6 @@ subroutine psb_z_mv_csr_from_coo(a,b,info) integer(psb_ipk_) :: nxt_val,old_val,saved_elem #endif - info = psb_success_ debug_unit = psb_get_debug_unit() debug_level = psb_get_debug_level() @@ -3214,6 +3229,7 @@ subroutine psb_z_mv_csr_from_coo(a,b,info) !$OMP END PARALLEL #else + do k=1,nza i = itemp(k) a%irp(i) = a%irp(i) + 1