OpenMP in base_mat and coo

new-context
Salvatore Filippone 4 years ago
parent 77730de80c
commit 0acf10f0d8

@ -250,7 +250,6 @@ subroutine psb_c_base_mv_from_coo(a,b,info)
end subroutine psb_c_base_mv_from_coo end subroutine psb_c_base_mv_from_coo
subroutine psb_c_base_mv_to_fmt(a,b,info) subroutine psb_c_base_mv_to_fmt(a,b,info)
use psb_c_base_mat_mod, psb_protect_name => psb_c_base_mv_to_fmt use psb_c_base_mat_mod, psb_protect_name => psb_c_base_mv_to_fmt
use psb_error_mod use psb_error_mod
@ -698,6 +697,8 @@ subroutine psb_c_base_tril(a,l,info,&
call psb_realloc(max(mb,nb),ia,info) call psb_realloc(max(mb,nb),ia,info)
call psb_realloc(max(mb,nb),ja,info) call psb_realloc(max(mb,nb),ja,info)
call psb_realloc(max(mb,nb),val,info) call psb_realloc(max(mb,nb),val,info)
! Implementing this in OpenMP?
! Tricky, to be seen
do i=imin_,imax_, nbk do i=imin_,imax_, nbk
ibk = min(nbk,imax_-i+1) ibk = min(nbk,imax_-i+1)
call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,& call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,&
@ -849,6 +850,8 @@ subroutine psb_c_base_triu(a,u,info,&
call psb_realloc(max(mb,nb),ia,info) call psb_realloc(max(mb,nb),ia,info)
call psb_realloc(max(mb,nb),ja,info) call psb_realloc(max(mb,nb),ja,info)
call psb_realloc(max(mb,nb),val,info) call psb_realloc(max(mb,nb),val,info)
! Implementing this in OpenMP?
! Tricky, to be seen
do i=imin_,imax_, nbk do i=imin_,imax_, nbk
ibk = min(nbk,imax_-i+1) ibk = min(nbk,imax_-i+1)
call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,& call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,&
@ -914,8 +917,6 @@ subroutine psb_c_base_triu(a,u,info,&
end subroutine psb_c_base_triu end subroutine psb_c_base_triu
subroutine psb_c_base_clone(a,b,info) subroutine psb_c_base_clone(a,b,info)
use psb_c_base_mat_mod, psb_protect_name => psb_c_base_clone use psb_c_base_mat_mod, psb_protect_name => psb_c_base_clone
use psb_error_mod use psb_error_mod
@ -960,6 +961,7 @@ subroutine psb_c_base_make_nonunit(a)
mnm = min(m,n) mnm = min(m,n)
nz = tmp%get_nzeros() nz = tmp%get_nzeros()
call tmp%reallocate(nz+mnm) call tmp%reallocate(nz+mnm)
!$omp parallel do private(i) shared(nz)
do i=1, mnm do i=1, mnm
tmp%val(nz+i) = cone tmp%val(nz+i) = cone
tmp%ia(nz+i) = i tmp%ia(nz+i) = i
@ -1506,6 +1508,7 @@ contains
complex(psb_spk_), intent(out) :: y(*) complex(psb_spk_), intent(out) :: y(*)
integer(psb_ipk_) :: i integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1,n do i=1,n
y(i) = d(i)*x(i) y(i) = d(i)*x(i)
end do end do
@ -1519,6 +1522,7 @@ contains
complex(psb_spk_), intent(inout) :: x(*) complex(psb_spk_), intent(inout) :: x(*)
integer(psb_ipk_) :: i integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1,n do i=1,n
x(i) = d(i)*x(i) x(i) = d(i)*x(i)
end do end do
@ -3182,6 +3186,8 @@ subroutine psb_lc_base_tril(a,l,info,&
call psb_realloc(max(mb,nb),ia,info) call psb_realloc(max(mb,nb),ia,info)
call psb_realloc(max(mb,nb),ja,info) call psb_realloc(max(mb,nb),ja,info)
call psb_realloc(max(mb,nb),val,info) call psb_realloc(max(mb,nb),val,info)
! Implementing this in OpenMP?
! Tricky, to be seen
do i=imin_,imax_, nbk do i=imin_,imax_, nbk
ibk = min(nbk,imax_-i+1) ibk = min(nbk,imax_-i+1)
call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,& call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,&
@ -3334,6 +3340,8 @@ subroutine psb_lc_base_triu(a,u,info,&
call psb_realloc(max(mb,nb),ia,info) call psb_realloc(max(mb,nb),ia,info)
call psb_realloc(max(mb,nb),ja,info) call psb_realloc(max(mb,nb),ja,info)
call psb_realloc(max(mb,nb),val,info) call psb_realloc(max(mb,nb),val,info)
! Implementing this in OpenMP?
! Tricky, to be seen
do i=imin_,imax_, nbk do i=imin_,imax_, nbk
ibk = min(nbk,imax_-i+1) ibk = min(nbk,imax_-i+1)
call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,& call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,&
@ -3446,6 +3454,7 @@ subroutine psb_lc_base_make_nonunit(a)
mnm = min(m,n) mnm = min(m,n)
nz = tmp%get_nzeros() nz = tmp%get_nzeros()
call tmp%reallocate(nz+mnm) call tmp%reallocate(nz+mnm)
!$omp parallel do private(i) shared(nz)
do i=1, mnm do i=1, mnm
tmp%val(nz+i) = cone tmp%val(nz+i) = cone
tmp%ia(nz+i) = i tmp%ia(nz+i) = i

@ -114,6 +114,7 @@ subroutine psb_c_coo_scal(d,a,info,side)
goto 9999 goto 9999
end if end if
!$omp parallel do private(i,j)
do i=1,a%get_nzeros() do i=1,a%get_nzeros()
j = a%ia(i) j = a%ia(i)
a%val(i) = a%val(i) * d(j) a%val(i) = a%val(i) * d(j)
@ -126,6 +127,7 @@ subroutine psb_c_coo_scal(d,a,info,side)
goto 9999 goto 9999
end if end if
!$omp parallel do private(i,j)
do i=1,a%get_nzeros() do i=1,a%get_nzeros()
j = a%ja(i) j = a%ja(i)
a%val(i) = a%val(i) * d(j) a%val(i) = a%val(i) * d(j)
@ -201,6 +203,7 @@ subroutine psb_c_coo_scalplusidentity(d,a,info)
end if end if
mnm = min(a%get_nrows(),a%get_ncols()) mnm = min(a%get_nrows(),a%get_ncols())
!$omp parallel do private(i,j)
do i=1,a%get_nzeros() do i=1,a%get_nzeros()
a%val(i) = a%val(i) * d a%val(i) = a%val(i) * d
j=a%ia(i) j=a%ia(i)
@ -253,12 +256,30 @@ subroutine psb_c_coo_spaxpby(alpha,a,beta,b,info)
! Allocate (temporary) space for the solution ! Allocate (temporary) space for the solution
call tcoo%allocate(M,N,(nza+nzb)) call tcoo%allocate(M,N,(nza+nzb))
! Compute the sum ! Compute the sum
#if defined (OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nza
tcoo%ia(i) = a%ia(i)
tcoo%ja(i) = a%ja(i)
tcoo%val(i) = alpha*a%val(i)
end do
!$omp parallel do private(i)
do i=1, nzb
tcoo%ia(nza+i) = bcoo%ia(i)
tcoo%ja(nza+i) = bcoo%ja(i)
tcoo%val(nza+i) = beta*bcoo%val(i)
end do
end block
#else
tcoo%ia(1:nza) = a%ia(1:nza) tcoo%ia(1:nza) = a%ia(1:nza)
tcoo%ja(1:nza) = a%ja(1:nza) tcoo%ja(1:nza) = a%ja(1:nza)
tcoo%val(1:nza) = alpha*a%val(1:nza) tcoo%val(1:nza) = alpha*a%val(1:nza)
tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb) tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb)
tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb) tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb)
tcoo%val(nza+1:nza+nzb) = beta*bcoo%val(1:nzb) tcoo%val(nza+1:nza+nzb) = beta*bcoo%val(1:nzb)
#endif
! Fix the indexes ! Fix the indexes
call tcoo%fix(info) call tcoo%fix(info)
! Move to correct output format ! Move to correct output format
@ -346,12 +367,30 @@ function psb_c_coo_cmpmat(a,b,tol,info) result(res)
! Allocate (temporary) space for the solution ! Allocate (temporary) space for the solution
call tcoo%allocate(M,N,(nza+nzb)) call tcoo%allocate(M,N,(nza+nzb))
! Compute the sum ! Compute the sum
#if defined (OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nza
tcoo%ia(i) = a%ia(i)
tcoo%ja(i) = a%ja(i)
tcoo%val(i) = alpha*a%val(i)
end do
!$omp parallel do private(i)
do i=1, nzb
tcoo%ia(nza+i) = bcoo%ia(i)
tcoo%ja(nza+i) = bcoo%ja(i)
tcoo%val(nza+i) = (-sone)*beta*bcoo%val(i)
end do
end block
#else
tcoo%ia(1:nza) = a%ia(1:nza) tcoo%ia(1:nza) = a%ia(1:nza)
tcoo%ja(1:nza) = a%ja(1:nza) tcoo%ja(1:nza) = a%ja(1:nza)
tcoo%val(1:nza) = a%val(1:nza) tcoo%val(1:nza) = a%val(1:nza)
tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb) tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb)
tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb) tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb)
tcoo%val(nza+1:nza+nzb) = (-sone)*bcoo%val(1:nzb) tcoo%val(nza+1:nza+nzb) = (-sone)*bcoo%val(1:nzb)
#endif
! Fix the indexes ! Fix the indexes
call tcoo%fix(info) call tcoo%fix(info)
if (info /= psb_success_) then if (info /= psb_success_) then
@ -728,9 +767,6 @@ subroutine psb_c_coo_print(iout,a,iv,head,ivr,ivc)
end subroutine psb_c_coo_print end subroutine psb_c_coo_print
function psb_c_coo_get_nz_row(idx,a) result(res) function psb_c_coo_get_nz_row(idx,a) result(res)
use psb_const_mod use psb_const_mod
use psb_sort_mod use psb_sort_mod
@ -1670,7 +1706,6 @@ subroutine psb_c_coo_csmv(alpha,a,x,beta,y,info,trans)
end subroutine psb_c_coo_csmv end subroutine psb_c_coo_csmv
subroutine psb_c_coo_csmm(alpha,a,x,beta,y,info,trans) subroutine psb_c_coo_csmm(alpha,a,x,beta,y,info,trans)
use psb_const_mod use psb_const_mod
use psb_error_mod use psb_error_mod
@ -1709,11 +1744,9 @@ subroutine psb_c_coo_csmm(alpha,a,x,beta,y,info,trans)
trans_ = 'N' trans_ = 'N'
end if end if
tra = (psb_toupper(trans_) == 'T') tra = (psb_toupper(trans_) == 'T')
ctra = (psb_toupper(trans_) == 'C') ctra = (psb_toupper(trans_) == 'C')
if (tra.or.ctra) then if (tra.or.ctra) then
m = a%get_ncols() m = a%get_ncols()
n = a%get_nrows() n = a%get_nrows()
@ -1895,7 +1928,15 @@ function psb_c_coo_maxval(a) result(res)
nnz = a%get_nzeros() nnz = a%get_nzeros()
if (allocated(a%val)) then if (allocated(a%val)) then
nnz = min(nnz,size(a%val)) nnz = min(nnz,size(a%val))
#if defined(OPENMP)
res = szero
!$omp parallel do private(i) reduction(max: res)
do i=1, nnz
res = max(res,abs(a%val(i)))
end do
#else
res = maxval(abs(a%val(1:nnz))) res = maxval(abs(a%val(1:nnz)))
#endif
end if end if
end function psb_c_coo_maxval end function psb_c_coo_maxval
@ -2275,11 +2316,13 @@ subroutine psb_c_coo_csgetptn(imin,imax,a,nz,ia,ja,info,&
& iren) & iren)
if (rscale_) then if (rscale_) then
!$omp parallel do private(i)
do i=nzin_+1, nzin_+nz do i=nzin_+1, nzin_+nz
ia(i) = ia(i) - imin + 1 ia(i) = ia(i) - imin + 1
end do end do
end if end if
if (cscale_) then if (cscale_) then
!$omp parallel do private(i)
do i=nzin_+1, nzin_+nz do i=nzin_+1, nzin_+nz
ja(i) = ja(i) - jmin_ + 1 ja(i) = ja(i) - jmin_ + 1
end do end do
@ -2553,11 +2596,13 @@ subroutine psb_c_coo_csgetrow(imin,imax,a,nz,ia,ja,val,info,&
& iren) & iren)
if (rscale_) then if (rscale_) then
!$omp parallel do private(i)
do i=nzin_+1, nzin_+nz do i=nzin_+1, nzin_+nz
ia(i) = ia(i) - imin + 1 ia(i) = ia(i) - imin + 1
end do end do
end if end if
if (cscale_) then if (cscale_) then
!$omp parallel do private(i)
do i=nzin_+1, nzin_+nz do i=nzin_+1, nzin_+nz
ja(i) = ja(i) - jmin_ + 1 ja(i) = ja(i) - jmin_ + 1
end do end do
@ -2768,7 +2813,6 @@ contains
end subroutine psb_c_coo_csgetrow end subroutine psb_c_coo_csgetrow
subroutine psb_c_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) subroutine psb_c_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
use psb_error_mod use psb_error_mod
use psb_realloc_mod use psb_realloc_mod
@ -3021,7 +3065,6 @@ contains
end subroutine psb_c_coo_csput_a end subroutine psb_c_coo_csput_a
subroutine psb_c_cp_coo_to_coo(a,b,info) subroutine psb_c_cp_coo_to_coo(a,b,info)
use psb_error_mod use psb_error_mod
use psb_c_base_mat_mod, psb_protect_name => psb_c_cp_coo_to_coo use psb_c_base_mat_mod, psb_protect_name => psb_c_cp_coo_to_coo
@ -3045,10 +3088,21 @@ subroutine psb_c_cp_coo_to_coo(a,b,info)
call b%set_nzeros(nz) call b%set_nzeros(nz)
call b%reallocate(nz) call b%reallocate(nz)
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nz
b%ia(i) = a%ia(i)
b%ja(i) = a%ja(i)
b%val(i) = a%val(i)
end do
end block
#else
b%ia(1:nz) = a%ia(1:nz) b%ia(1:nz) = a%ia(1:nz)
b%ja(1:nz) = a%ja(1:nz) b%ja(1:nz) = a%ja(1:nz)
b%val(1:nz) = a%val(1:nz) b%val(1:nz) = a%val(1:nz)
#endif
call b%set_host() call b%set_host()
if (.not.b%is_by_rows()) call b%fix(info) if (.not.b%is_by_rows()) call b%fix(info)
@ -3087,10 +3141,21 @@ subroutine psb_c_cp_coo_from_coo(a,b,info)
call a%set_nzeros(nz) call a%set_nzeros(nz)
call a%reallocate(nz) call a%reallocate(nz)
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nz
a%ia(i) = b%ia(i)
a%ja(i) = b%ja(i)
a%val(i) = b%val(i)
end do
end block
#else
a%ia(1:nz) = b%ia(1:nz) a%ia(1:nz) = b%ia(1:nz)
a%ja(1:nz) = b%ja(1:nz) a%ja(1:nz) = b%ja(1:nz)
a%val(1:nz) = b%val(1:nz) a%val(1:nz) = b%val(1:nz)
#endif
call a%set_host() call a%set_host()
if (.not.a%is_by_rows()) call a%fix(info) if (.not.a%is_by_rows()) call a%fix(info)
@ -3445,8 +3510,6 @@ subroutine psb_c_fix_coo(a,info,idir)
end subroutine psb_c_fix_coo end subroutine psb_c_fix_coo
subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
use psb_const_mod use psb_const_mod
use psb_error_mod use psb_error_mod
@ -4174,7 +4237,6 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
end subroutine psb_c_fix_coo_inner end subroutine psb_c_fix_coo_inner
subroutine psb_c_cp_coo_to_lcoo(a,b,info) subroutine psb_c_cp_coo_to_lcoo(a,b,info)
use psb_error_mod use psb_error_mod
use psb_c_base_mat_mod, psb_protect_name => psb_c_cp_coo_to_lcoo use psb_c_base_mat_mod, psb_protect_name => psb_c_cp_coo_to_lcoo
@ -4199,10 +4261,21 @@ subroutine psb_c_cp_coo_to_lcoo(a,b,info)
call b%set_nzeros(nz) call b%set_nzeros(nz)
call b%reallocate(nz) call b%reallocate(nz)
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nz
b%ia(i) = a%ia(i)
b%ja(i) = a%ja(i)
b%val(i) = a%val(i)
end do
end block
#else
b%ia(1:nz) = a%ia(1:nz) b%ia(1:nz) = a%ia(1:nz)
b%ja(1:nz) = a%ja(1:nz) b%ja(1:nz) = a%ja(1:nz)
b%val(1:nz) = a%val(1:nz) b%val(1:nz) = a%val(1:nz)
#endif
call b%set_host() call b%set_host()
if (.not.b%is_by_rows()) call b%fix(info) if (.not.b%is_by_rows()) call b%fix(info)
@ -4240,10 +4313,21 @@ subroutine psb_c_cp_coo_from_lcoo(a,b,info)
call a%set_nzeros(nz) call a%set_nzeros(nz)
call a%reallocate(nz) call a%reallocate(nz)
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nz
a%ia(i) = b%ia(i)
a%ja(i) = b%ja(i)
a%val(i) = b%val(i)
end do
end block
#else
a%ia(1:nz) = b%ia(1:nz) a%ia(1:nz) = b%ia(1:nz)
a%ja(1:nz) = b%ja(1:nz) a%ja(1:nz) = b%ja(1:nz)
a%val(1:nz) = b%val(1:nz) a%val(1:nz) = b%val(1:nz)
#endif
call a%set_host() call a%set_host()
if (.not.a%is_by_rows()) call a%fix(info) if (.not.a%is_by_rows()) call a%fix(info)
@ -4442,7 +4526,17 @@ function psb_lc_coo_maxval(a) result(res)
nnz = a%get_nzeros() nnz = a%get_nzeros()
if (allocated(a%val)) then if (allocated(a%val)) then
nnz = min(nnz,size(a%val)) nnz = min(nnz,size(a%val))
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nnz
res = max(res,abs(a%val(i)))
end do
end block
#else
res = maxval(abs(a%val(1:nnz))) res = maxval(abs(a%val(1:nnz)))
#endif
end if end if
end function psb_lc_coo_maxval end function psb_lc_coo_maxval
@ -4499,7 +4593,17 @@ function psb_lc_coo_csnmi(a) result(res)
i = a%ia(j) i = a%ia(j)
vt(i) = vt(i) + abs(a%val(j)) vt(i) = vt(i) + abs(a%val(j))
end do end do
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, m
res = max(res,abs(vt(i)))
end do
end block
#else
res = maxval(vt(1:m)) res = maxval(vt(1:m))
#endif
deallocate(vt,stat=info) deallocate(vt,stat=info)
end if end if
@ -4539,7 +4643,17 @@ function psb_lc_coo_csnm1(a) result(res)
i = a%ja(j) i = a%ja(j)
vt(i) = vt(i) + abs(a%val(j)) vt(i) = vt(i) + abs(a%val(j))
end do end do
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, n
res = max(res,abs(vt(i)))
end do
end block
#else
res = maxval(vt(1:n)) res = maxval(vt(1:n))
#endif
deallocate(vt,stat=info) deallocate(vt,stat=info)
return return
@ -4584,7 +4698,6 @@ subroutine psb_lc_coo_rowsum(d,a)
d(i) = d(i) + a%val(j) d(i) = d(i) + a%val(j)
end do end do
return return
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
return return
@ -4592,7 +4705,6 @@ subroutine psb_lc_coo_rowsum(d,a)
9999 call psb_error_handler(err_act) 9999 call psb_error_handler(err_act)
return return
end subroutine psb_lc_coo_rowsum end subroutine psb_lc_coo_rowsum
subroutine psb_lc_coo_arwsum(d,a) subroutine psb_lc_coo_arwsum(d,a)
@ -4761,6 +4873,7 @@ subroutine psb_lc_coo_scalplusidentity(d,a,info)
end if end if
mnm = min(a%get_nrows(),a%get_ncols()) mnm = min(a%get_nrows(),a%get_ncols())
!$omp parallel do private(i,j)
do i=1,a%get_nzeros() do i=1,a%get_nzeros()
a%val(i) = a%val(i) * d a%val(i) = a%val(i) * d
j=a%ia(i) j=a%ia(i)
@ -4813,12 +4926,30 @@ subroutine psb_lc_coo_spaxpby(alpha,a,beta,b,info)
! Allocate (temporary) space for the solution ! Allocate (temporary) space for the solution
call tcoo%allocate(M,N,(nza+nzb)) call tcoo%allocate(M,N,(nza+nzb))
! Compute the sum ! Compute the sum
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nza
tcoo%ia(i) = a%ia(i)
tcoo%ja(i) = a%ja(i)
tcoo%val(i) = alpha*a%val(i)
end do
!$omp parallel do private(i)
do i=1, nzb
tcoo%ia(nza+i) = bcoo%ia(i)
tcoo%ja(nza+i) = bcoo%ja(i)
tcoo%val(nza+i) = beta*bcoo%val(i)
end do
end block
#else
tcoo%ia(1:nza) = a%ia(1:nza) tcoo%ia(1:nza) = a%ia(1:nza)
tcoo%ja(1:nza) = a%ja(1:nza) tcoo%ja(1:nza) = a%ja(1:nza)
tcoo%val(1:nza) = alpha*a%val(1:nza) tcoo%val(1:nza) = alpha*a%val(1:nza)
tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb) tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb)
tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb) tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb)
tcoo%val(nza+1:nza+nzb) = beta*bcoo%val(1:nzb) tcoo%val(nza+1:nza+nzb) = beta*bcoo%val(1:nzb)
#endif
! Fix the indexes ! Fix the indexes
call tcoo%fix(info) call tcoo%fix(info)
! Move to correct output format ! Move to correct output format
@ -4906,12 +5037,30 @@ function psb_lc_coo_cmpmat(a,b,tol,info) result(res)
! Allocate (temporary) space for the solution ! Allocate (temporary) space for the solution
call tcoo%allocate(M,N,(nza+nzb)) call tcoo%allocate(M,N,(nza+nzb))
! Compute the sum ! Compute the sum
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nza
tcoo%ia(i) = a%ia(i)
tcoo%ja(i) = a%ja(i)
tcoo%val(i) = a%val(i)
end do
!$omp parallel do private(i)
do i=1, nzb
tcoo%ia(nza+i) = bcoo%ia(i)
tcoo%ja(nza+i) = bcoo%ja(i)
tcoo%val(nza+i) = (-1_psb_spk_)*bcoo%val(i)
end do
end block
#else
tcoo%ia(1:nza) = a%ia(1:nza) tcoo%ia(1:nza) = a%ia(1:nza)
tcoo%ja(1:nza) = a%ja(1:nza) tcoo%ja(1:nza) = a%ja(1:nza)
tcoo%val(1:nza) = a%val(1:nza) tcoo%val(1:nza) = a%val(1:nza)
tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb) tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb)
tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb) tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb)
tcoo%val(nza+1:nza+nzb) = (-1_psb_spk_)*bcoo%val(1:nzb) tcoo%val(nza+1:nza+nzb) = (-1_psb_spk_)*bcoo%val(1:nzb)
#endif
! Fix the indexes ! Fix the indexes
call tcoo%fix(info) call tcoo%fix(info)
if (info /= psb_success_) then if (info /= psb_success_) then
@ -5950,7 +6099,7 @@ subroutine psb_lc_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
if (nz < 0) then if (nz < 0) then
info = psb_err_iarg_neg_ info = psb_err_iarg_neg_
call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) 3 call psb_errpush(info,name,i_err=(/1_psb_ipk_/))
goto 9999 goto 9999
end if end if
if (size(ia) < nz) then if (size(ia) < nz) then

@ -250,7 +250,6 @@ subroutine psb_d_base_mv_from_coo(a,b,info)
end subroutine psb_d_base_mv_from_coo end subroutine psb_d_base_mv_from_coo
subroutine psb_d_base_mv_to_fmt(a,b,info) subroutine psb_d_base_mv_to_fmt(a,b,info)
use psb_d_base_mat_mod, psb_protect_name => psb_d_base_mv_to_fmt use psb_d_base_mat_mod, psb_protect_name => psb_d_base_mv_to_fmt
use psb_error_mod use psb_error_mod
@ -698,6 +697,8 @@ subroutine psb_d_base_tril(a,l,info,&
call psb_realloc(max(mb,nb),ia,info) call psb_realloc(max(mb,nb),ia,info)
call psb_realloc(max(mb,nb),ja,info) call psb_realloc(max(mb,nb),ja,info)
call psb_realloc(max(mb,nb),val,info) call psb_realloc(max(mb,nb),val,info)
! Implementing this in OpenMP?
! Tricky, to be seen
do i=imin_,imax_, nbk do i=imin_,imax_, nbk
ibk = min(nbk,imax_-i+1) ibk = min(nbk,imax_-i+1)
call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,& call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,&
@ -849,6 +850,8 @@ subroutine psb_d_base_triu(a,u,info,&
call psb_realloc(max(mb,nb),ia,info) call psb_realloc(max(mb,nb),ia,info)
call psb_realloc(max(mb,nb),ja,info) call psb_realloc(max(mb,nb),ja,info)
call psb_realloc(max(mb,nb),val,info) call psb_realloc(max(mb,nb),val,info)
! Implementing this in OpenMP?
! Tricky, to be seen
do i=imin_,imax_, nbk do i=imin_,imax_, nbk
ibk = min(nbk,imax_-i+1) ibk = min(nbk,imax_-i+1)
call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,& call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,&
@ -914,8 +917,6 @@ subroutine psb_d_base_triu(a,u,info,&
end subroutine psb_d_base_triu end subroutine psb_d_base_triu
subroutine psb_d_base_clone(a,b,info) subroutine psb_d_base_clone(a,b,info)
use psb_d_base_mat_mod, psb_protect_name => psb_d_base_clone use psb_d_base_mat_mod, psb_protect_name => psb_d_base_clone
use psb_error_mod use psb_error_mod
@ -960,6 +961,7 @@ subroutine psb_d_base_make_nonunit(a)
mnm = min(m,n) mnm = min(m,n)
nz = tmp%get_nzeros() nz = tmp%get_nzeros()
call tmp%reallocate(nz+mnm) call tmp%reallocate(nz+mnm)
!$omp parallel do private(i) shared(nz)
do i=1, mnm do i=1, mnm
tmp%val(nz+i) = done tmp%val(nz+i) = done
tmp%ia(nz+i) = i tmp%ia(nz+i) = i
@ -1506,6 +1508,7 @@ contains
real(psb_dpk_), intent(out) :: y(*) real(psb_dpk_), intent(out) :: y(*)
integer(psb_ipk_) :: i integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1,n do i=1,n
y(i) = d(i)*x(i) y(i) = d(i)*x(i)
end do end do
@ -1519,6 +1522,7 @@ contains
real(psb_dpk_), intent(inout) :: x(*) real(psb_dpk_), intent(inout) :: x(*)
integer(psb_ipk_) :: i integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1,n do i=1,n
x(i) = d(i)*x(i) x(i) = d(i)*x(i)
end do end do
@ -3182,6 +3186,8 @@ subroutine psb_ld_base_tril(a,l,info,&
call psb_realloc(max(mb,nb),ia,info) call psb_realloc(max(mb,nb),ia,info)
call psb_realloc(max(mb,nb),ja,info) call psb_realloc(max(mb,nb),ja,info)
call psb_realloc(max(mb,nb),val,info) call psb_realloc(max(mb,nb),val,info)
! Implementing this in OpenMP?
! Tricky, to be seen
do i=imin_,imax_, nbk do i=imin_,imax_, nbk
ibk = min(nbk,imax_-i+1) ibk = min(nbk,imax_-i+1)
call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,& call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,&
@ -3334,6 +3340,8 @@ subroutine psb_ld_base_triu(a,u,info,&
call psb_realloc(max(mb,nb),ia,info) call psb_realloc(max(mb,nb),ia,info)
call psb_realloc(max(mb,nb),ja,info) call psb_realloc(max(mb,nb),ja,info)
call psb_realloc(max(mb,nb),val,info) call psb_realloc(max(mb,nb),val,info)
! Implementing this in OpenMP?
! Tricky, to be seen
do i=imin_,imax_, nbk do i=imin_,imax_, nbk
ibk = min(nbk,imax_-i+1) ibk = min(nbk,imax_-i+1)
call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,& call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,&
@ -3446,6 +3454,7 @@ subroutine psb_ld_base_make_nonunit(a)
mnm = min(m,n) mnm = min(m,n)
nz = tmp%get_nzeros() nz = tmp%get_nzeros()
call tmp%reallocate(nz+mnm) call tmp%reallocate(nz+mnm)
!$omp parallel do private(i) shared(nz)
do i=1, mnm do i=1, mnm
tmp%val(nz+i) = done tmp%val(nz+i) = done
tmp%ia(nz+i) = i tmp%ia(nz+i) = i

@ -114,6 +114,7 @@ subroutine psb_d_coo_scal(d,a,info,side)
goto 9999 goto 9999
end if end if
!$omp parallel do private(i,j)
do i=1,a%get_nzeros() do i=1,a%get_nzeros()
j = a%ia(i) j = a%ia(i)
a%val(i) = a%val(i) * d(j) a%val(i) = a%val(i) * d(j)
@ -126,6 +127,7 @@ subroutine psb_d_coo_scal(d,a,info,side)
goto 9999 goto 9999
end if end if
!$omp parallel do private(i,j)
do i=1,a%get_nzeros() do i=1,a%get_nzeros()
j = a%ja(i) j = a%ja(i)
a%val(i) = a%val(i) * d(j) a%val(i) = a%val(i) * d(j)
@ -201,6 +203,7 @@ subroutine psb_d_coo_scalplusidentity(d,a,info)
end if end if
mnm = min(a%get_nrows(),a%get_ncols()) mnm = min(a%get_nrows(),a%get_ncols())
!$omp parallel do private(i,j)
do i=1,a%get_nzeros() do i=1,a%get_nzeros()
a%val(i) = a%val(i) * d a%val(i) = a%val(i) * d
j=a%ia(i) j=a%ia(i)
@ -253,12 +256,30 @@ subroutine psb_d_coo_spaxpby(alpha,a,beta,b,info)
! Allocate (temporary) space for the solution ! Allocate (temporary) space for the solution
call tcoo%allocate(M,N,(nza+nzb)) call tcoo%allocate(M,N,(nza+nzb))
! Compute the sum ! Compute the sum
#if defined (OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nza
tcoo%ia(i) = a%ia(i)
tcoo%ja(i) = a%ja(i)
tcoo%val(i) = alpha*a%val(i)
end do
!$omp parallel do private(i)
do i=1, nzb
tcoo%ia(nza+i) = bcoo%ia(i)
tcoo%ja(nza+i) = bcoo%ja(i)
tcoo%val(nza+i) = beta*bcoo%val(i)
end do
end block
#else
tcoo%ia(1:nza) = a%ia(1:nza) tcoo%ia(1:nza) = a%ia(1:nza)
tcoo%ja(1:nza) = a%ja(1:nza) tcoo%ja(1:nza) = a%ja(1:nza)
tcoo%val(1:nza) = alpha*a%val(1:nza) tcoo%val(1:nza) = alpha*a%val(1:nza)
tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb) tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb)
tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb) tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb)
tcoo%val(nza+1:nza+nzb) = beta*bcoo%val(1:nzb) tcoo%val(nza+1:nza+nzb) = beta*bcoo%val(1:nzb)
#endif
! Fix the indexes ! Fix the indexes
call tcoo%fix(info) call tcoo%fix(info)
! Move to correct output format ! Move to correct output format
@ -346,12 +367,30 @@ function psb_d_coo_cmpmat(a,b,tol,info) result(res)
! Allocate (temporary) space for the solution ! Allocate (temporary) space for the solution
call tcoo%allocate(M,N,(nza+nzb)) call tcoo%allocate(M,N,(nza+nzb))
! Compute the sum ! Compute the sum
#if defined (OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nza
tcoo%ia(i) = a%ia(i)
tcoo%ja(i) = a%ja(i)
tcoo%val(i) = alpha*a%val(i)
end do
!$omp parallel do private(i)
do i=1, nzb
tcoo%ia(nza+i) = bcoo%ia(i)
tcoo%ja(nza+i) = bcoo%ja(i)
tcoo%val(nza+i) = (-done)*beta*bcoo%val(i)
end do
end block
#else
tcoo%ia(1:nza) = a%ia(1:nza) tcoo%ia(1:nza) = a%ia(1:nza)
tcoo%ja(1:nza) = a%ja(1:nza) tcoo%ja(1:nza) = a%ja(1:nza)
tcoo%val(1:nza) = a%val(1:nza) tcoo%val(1:nza) = a%val(1:nza)
tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb) tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb)
tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb) tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb)
tcoo%val(nza+1:nza+nzb) = (-done)*bcoo%val(1:nzb) tcoo%val(nza+1:nza+nzb) = (-done)*bcoo%val(1:nzb)
#endif
! Fix the indexes ! Fix the indexes
call tcoo%fix(info) call tcoo%fix(info)
if (info /= psb_success_) then if (info /= psb_success_) then
@ -728,9 +767,6 @@ subroutine psb_d_coo_print(iout,a,iv,head,ivr,ivc)
end subroutine psb_d_coo_print end subroutine psb_d_coo_print
function psb_d_coo_get_nz_row(idx,a) result(res) function psb_d_coo_get_nz_row(idx,a) result(res)
use psb_const_mod use psb_const_mod
use psb_sort_mod use psb_sort_mod
@ -1670,7 +1706,6 @@ subroutine psb_d_coo_csmv(alpha,a,x,beta,y,info,trans)
end subroutine psb_d_coo_csmv end subroutine psb_d_coo_csmv
subroutine psb_d_coo_csmm(alpha,a,x,beta,y,info,trans) subroutine psb_d_coo_csmm(alpha,a,x,beta,y,info,trans)
use psb_const_mod use psb_const_mod
use psb_error_mod use psb_error_mod
@ -1709,11 +1744,9 @@ subroutine psb_d_coo_csmm(alpha,a,x,beta,y,info,trans)
trans_ = 'N' trans_ = 'N'
end if end if
tra = (psb_toupper(trans_) == 'T') tra = (psb_toupper(trans_) == 'T')
ctra = (psb_toupper(trans_) == 'C') ctra = (psb_toupper(trans_) == 'C')
if (tra.or.ctra) then if (tra.or.ctra) then
m = a%get_ncols() m = a%get_ncols()
n = a%get_nrows() n = a%get_nrows()
@ -1895,7 +1928,15 @@ function psb_d_coo_maxval(a) result(res)
nnz = a%get_nzeros() nnz = a%get_nzeros()
if (allocated(a%val)) then if (allocated(a%val)) then
nnz = min(nnz,size(a%val)) nnz = min(nnz,size(a%val))
#if defined(OPENMP)
res = dzero
!$omp parallel do private(i) reduction(max: res)
do i=1, nnz
res = max(res,abs(a%val(i)))
end do
#else
res = maxval(abs(a%val(1:nnz))) res = maxval(abs(a%val(1:nnz)))
#endif
end if end if
end function psb_d_coo_maxval end function psb_d_coo_maxval
@ -2275,11 +2316,13 @@ subroutine psb_d_coo_csgetptn(imin,imax,a,nz,ia,ja,info,&
& iren) & iren)
if (rscale_) then if (rscale_) then
!$omp parallel do private(i)
do i=nzin_+1, nzin_+nz do i=nzin_+1, nzin_+nz
ia(i) = ia(i) - imin + 1 ia(i) = ia(i) - imin + 1
end do end do
end if end if
if (cscale_) then if (cscale_) then
!$omp parallel do private(i)
do i=nzin_+1, nzin_+nz do i=nzin_+1, nzin_+nz
ja(i) = ja(i) - jmin_ + 1 ja(i) = ja(i) - jmin_ + 1
end do end do
@ -2553,11 +2596,13 @@ subroutine psb_d_coo_csgetrow(imin,imax,a,nz,ia,ja,val,info,&
& iren) & iren)
if (rscale_) then if (rscale_) then
!$omp parallel do private(i)
do i=nzin_+1, nzin_+nz do i=nzin_+1, nzin_+nz
ia(i) = ia(i) - imin + 1 ia(i) = ia(i) - imin + 1
end do end do
end if end if
if (cscale_) then if (cscale_) then
!$omp parallel do private(i)
do i=nzin_+1, nzin_+nz do i=nzin_+1, nzin_+nz
ja(i) = ja(i) - jmin_ + 1 ja(i) = ja(i) - jmin_ + 1
end do end do
@ -2768,7 +2813,6 @@ contains
end subroutine psb_d_coo_csgetrow end subroutine psb_d_coo_csgetrow
subroutine psb_d_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) subroutine psb_d_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
use psb_error_mod use psb_error_mod
use psb_realloc_mod use psb_realloc_mod
@ -3021,7 +3065,6 @@ contains
end subroutine psb_d_coo_csput_a end subroutine psb_d_coo_csput_a
subroutine psb_d_cp_coo_to_coo(a,b,info) subroutine psb_d_cp_coo_to_coo(a,b,info)
use psb_error_mod use psb_error_mod
use psb_d_base_mat_mod, psb_protect_name => psb_d_cp_coo_to_coo use psb_d_base_mat_mod, psb_protect_name => psb_d_cp_coo_to_coo
@ -3045,10 +3088,21 @@ subroutine psb_d_cp_coo_to_coo(a,b,info)
call b%set_nzeros(nz) call b%set_nzeros(nz)
call b%reallocate(nz) call b%reallocate(nz)
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nz
b%ia(i) = a%ia(i)
b%ja(i) = a%ja(i)
b%val(i) = a%val(i)
end do
end block
#else
b%ia(1:nz) = a%ia(1:nz) b%ia(1:nz) = a%ia(1:nz)
b%ja(1:nz) = a%ja(1:nz) b%ja(1:nz) = a%ja(1:nz)
b%val(1:nz) = a%val(1:nz) b%val(1:nz) = a%val(1:nz)
#endif
call b%set_host() call b%set_host()
if (.not.b%is_by_rows()) call b%fix(info) if (.not.b%is_by_rows()) call b%fix(info)
@ -3087,10 +3141,21 @@ subroutine psb_d_cp_coo_from_coo(a,b,info)
call a%set_nzeros(nz) call a%set_nzeros(nz)
call a%reallocate(nz) call a%reallocate(nz)
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nz
a%ia(i) = b%ia(i)
a%ja(i) = b%ja(i)
a%val(i) = b%val(i)
end do
end block
#else
a%ia(1:nz) = b%ia(1:nz) a%ia(1:nz) = b%ia(1:nz)
a%ja(1:nz) = b%ja(1:nz) a%ja(1:nz) = b%ja(1:nz)
a%val(1:nz) = b%val(1:nz) a%val(1:nz) = b%val(1:nz)
#endif
call a%set_host() call a%set_host()
if (.not.a%is_by_rows()) call a%fix(info) if (.not.a%is_by_rows()) call a%fix(info)
@ -3445,8 +3510,6 @@ subroutine psb_d_fix_coo(a,info,idir)
end subroutine psb_d_fix_coo end subroutine psb_d_fix_coo
subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
use psb_const_mod use psb_const_mod
use psb_error_mod use psb_error_mod
@ -4174,7 +4237,6 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
end subroutine psb_d_fix_coo_inner end subroutine psb_d_fix_coo_inner
subroutine psb_d_cp_coo_to_lcoo(a,b,info) subroutine psb_d_cp_coo_to_lcoo(a,b,info)
use psb_error_mod use psb_error_mod
use psb_d_base_mat_mod, psb_protect_name => psb_d_cp_coo_to_lcoo use psb_d_base_mat_mod, psb_protect_name => psb_d_cp_coo_to_lcoo
@ -4199,10 +4261,21 @@ subroutine psb_d_cp_coo_to_lcoo(a,b,info)
call b%set_nzeros(nz) call b%set_nzeros(nz)
call b%reallocate(nz) call b%reallocate(nz)
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nz
b%ia(i) = a%ia(i)
b%ja(i) = a%ja(i)
b%val(i) = a%val(i)
end do
end block
#else
b%ia(1:nz) = a%ia(1:nz) b%ia(1:nz) = a%ia(1:nz)
b%ja(1:nz) = a%ja(1:nz) b%ja(1:nz) = a%ja(1:nz)
b%val(1:nz) = a%val(1:nz) b%val(1:nz) = a%val(1:nz)
#endif
call b%set_host() call b%set_host()
if (.not.b%is_by_rows()) call b%fix(info) if (.not.b%is_by_rows()) call b%fix(info)
@ -4240,10 +4313,21 @@ subroutine psb_d_cp_coo_from_lcoo(a,b,info)
call a%set_nzeros(nz) call a%set_nzeros(nz)
call a%reallocate(nz) call a%reallocate(nz)
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nz
a%ia(i) = b%ia(i)
a%ja(i) = b%ja(i)
a%val(i) = b%val(i)
end do
end block
#else
a%ia(1:nz) = b%ia(1:nz) a%ia(1:nz) = b%ia(1:nz)
a%ja(1:nz) = b%ja(1:nz) a%ja(1:nz) = b%ja(1:nz)
a%val(1:nz) = b%val(1:nz) a%val(1:nz) = b%val(1:nz)
#endif
call a%set_host() call a%set_host()
if (.not.a%is_by_rows()) call a%fix(info) if (.not.a%is_by_rows()) call a%fix(info)
@ -4442,7 +4526,17 @@ function psb_ld_coo_maxval(a) result(res)
nnz = a%get_nzeros() nnz = a%get_nzeros()
if (allocated(a%val)) then if (allocated(a%val)) then
nnz = min(nnz,size(a%val)) nnz = min(nnz,size(a%val))
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nnz
res = max(res,abs(a%val(i)))
end do
end block
#else
res = maxval(abs(a%val(1:nnz))) res = maxval(abs(a%val(1:nnz)))
#endif
end if end if
end function psb_ld_coo_maxval end function psb_ld_coo_maxval
@ -4499,7 +4593,17 @@ function psb_ld_coo_csnmi(a) result(res)
i = a%ia(j) i = a%ia(j)
vt(i) = vt(i) + abs(a%val(j)) vt(i) = vt(i) + abs(a%val(j))
end do end do
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, m
res = max(res,abs(vt(i)))
end do
end block
#else
res = maxval(vt(1:m)) res = maxval(vt(1:m))
#endif
deallocate(vt,stat=info) deallocate(vt,stat=info)
end if end if
@ -4539,7 +4643,17 @@ function psb_ld_coo_csnm1(a) result(res)
i = a%ja(j) i = a%ja(j)
vt(i) = vt(i) + abs(a%val(j)) vt(i) = vt(i) + abs(a%val(j))
end do end do
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, n
res = max(res,abs(vt(i)))
end do
end block
#else
res = maxval(vt(1:n)) res = maxval(vt(1:n))
#endif
deallocate(vt,stat=info) deallocate(vt,stat=info)
return return
@ -4584,7 +4698,6 @@ subroutine psb_ld_coo_rowsum(d,a)
d(i) = d(i) + a%val(j) d(i) = d(i) + a%val(j)
end do end do
return return
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
return return
@ -4592,7 +4705,6 @@ subroutine psb_ld_coo_rowsum(d,a)
9999 call psb_error_handler(err_act) 9999 call psb_error_handler(err_act)
return return
end subroutine psb_ld_coo_rowsum end subroutine psb_ld_coo_rowsum
subroutine psb_ld_coo_arwsum(d,a) subroutine psb_ld_coo_arwsum(d,a)
@ -4761,6 +4873,7 @@ subroutine psb_ld_coo_scalplusidentity(d,a,info)
end if end if
mnm = min(a%get_nrows(),a%get_ncols()) mnm = min(a%get_nrows(),a%get_ncols())
!$omp parallel do private(i,j)
do i=1,a%get_nzeros() do i=1,a%get_nzeros()
a%val(i) = a%val(i) * d a%val(i) = a%val(i) * d
j=a%ia(i) j=a%ia(i)
@ -4813,12 +4926,30 @@ subroutine psb_ld_coo_spaxpby(alpha,a,beta,b,info)
! Allocate (temporary) space for the solution ! Allocate (temporary) space for the solution
call tcoo%allocate(M,N,(nza+nzb)) call tcoo%allocate(M,N,(nza+nzb))
! Compute the sum ! Compute the sum
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nza
tcoo%ia(i) = a%ia(i)
tcoo%ja(i) = a%ja(i)
tcoo%val(i) = alpha*a%val(i)
end do
!$omp parallel do private(i)
do i=1, nzb
tcoo%ia(nza+i) = bcoo%ia(i)
tcoo%ja(nza+i) = bcoo%ja(i)
tcoo%val(nza+i) = beta*bcoo%val(i)
end do
end block
#else
tcoo%ia(1:nza) = a%ia(1:nza) tcoo%ia(1:nza) = a%ia(1:nza)
tcoo%ja(1:nza) = a%ja(1:nza) tcoo%ja(1:nza) = a%ja(1:nza)
tcoo%val(1:nza) = alpha*a%val(1:nza) tcoo%val(1:nza) = alpha*a%val(1:nza)
tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb) tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb)
tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb) tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb)
tcoo%val(nza+1:nza+nzb) = beta*bcoo%val(1:nzb) tcoo%val(nza+1:nza+nzb) = beta*bcoo%val(1:nzb)
#endif
! Fix the indexes ! Fix the indexes
call tcoo%fix(info) call tcoo%fix(info)
! Move to correct output format ! Move to correct output format
@ -4906,12 +5037,30 @@ function psb_ld_coo_cmpmat(a,b,tol,info) result(res)
! Allocate (temporary) space for the solution ! Allocate (temporary) space for the solution
call tcoo%allocate(M,N,(nza+nzb)) call tcoo%allocate(M,N,(nza+nzb))
! Compute the sum ! Compute the sum
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nza
tcoo%ia(i) = a%ia(i)
tcoo%ja(i) = a%ja(i)
tcoo%val(i) = a%val(i)
end do
!$omp parallel do private(i)
do i=1, nzb
tcoo%ia(nza+i) = bcoo%ia(i)
tcoo%ja(nza+i) = bcoo%ja(i)
tcoo%val(nza+i) = (-1_psb_dpk_)*bcoo%val(i)
end do
end block
#else
tcoo%ia(1:nza) = a%ia(1:nza) tcoo%ia(1:nza) = a%ia(1:nza)
tcoo%ja(1:nza) = a%ja(1:nza) tcoo%ja(1:nza) = a%ja(1:nza)
tcoo%val(1:nza) = a%val(1:nza) tcoo%val(1:nza) = a%val(1:nza)
tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb) tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb)
tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb) tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb)
tcoo%val(nza+1:nza+nzb) = (-1_psb_dpk_)*bcoo%val(1:nzb) tcoo%val(nza+1:nza+nzb) = (-1_psb_dpk_)*bcoo%val(1:nzb)
#endif
! Fix the indexes ! Fix the indexes
call tcoo%fix(info) call tcoo%fix(info)
if (info /= psb_success_) then if (info /= psb_success_) then
@ -5950,7 +6099,7 @@ subroutine psb_ld_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
if (nz < 0) then if (nz < 0) then
info = psb_err_iarg_neg_ info = psb_err_iarg_neg_
call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) 3 call psb_errpush(info,name,i_err=(/1_psb_ipk_/))
goto 9999 goto 9999
end if end if
if (size(ia) < nz) then if (size(ia) < nz) then

@ -250,7 +250,6 @@ subroutine psb_s_base_mv_from_coo(a,b,info)
end subroutine psb_s_base_mv_from_coo end subroutine psb_s_base_mv_from_coo
subroutine psb_s_base_mv_to_fmt(a,b,info) subroutine psb_s_base_mv_to_fmt(a,b,info)
use psb_s_base_mat_mod, psb_protect_name => psb_s_base_mv_to_fmt use psb_s_base_mat_mod, psb_protect_name => psb_s_base_mv_to_fmt
use psb_error_mod use psb_error_mod
@ -698,6 +697,8 @@ subroutine psb_s_base_tril(a,l,info,&
call psb_realloc(max(mb,nb),ia,info) call psb_realloc(max(mb,nb),ia,info)
call psb_realloc(max(mb,nb),ja,info) call psb_realloc(max(mb,nb),ja,info)
call psb_realloc(max(mb,nb),val,info) call psb_realloc(max(mb,nb),val,info)
! Implementing this in OpenMP?
! Tricky, to be seen
do i=imin_,imax_, nbk do i=imin_,imax_, nbk
ibk = min(nbk,imax_-i+1) ibk = min(nbk,imax_-i+1)
call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,& call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,&
@ -849,6 +850,8 @@ subroutine psb_s_base_triu(a,u,info,&
call psb_realloc(max(mb,nb),ia,info) call psb_realloc(max(mb,nb),ia,info)
call psb_realloc(max(mb,nb),ja,info) call psb_realloc(max(mb,nb),ja,info)
call psb_realloc(max(mb,nb),val,info) call psb_realloc(max(mb,nb),val,info)
! Implementing this in OpenMP?
! Tricky, to be seen
do i=imin_,imax_, nbk do i=imin_,imax_, nbk
ibk = min(nbk,imax_-i+1) ibk = min(nbk,imax_-i+1)
call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,& call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,&
@ -914,8 +917,6 @@ subroutine psb_s_base_triu(a,u,info,&
end subroutine psb_s_base_triu end subroutine psb_s_base_triu
subroutine psb_s_base_clone(a,b,info) subroutine psb_s_base_clone(a,b,info)
use psb_s_base_mat_mod, psb_protect_name => psb_s_base_clone use psb_s_base_mat_mod, psb_protect_name => psb_s_base_clone
use psb_error_mod use psb_error_mod
@ -960,6 +961,7 @@ subroutine psb_s_base_make_nonunit(a)
mnm = min(m,n) mnm = min(m,n)
nz = tmp%get_nzeros() nz = tmp%get_nzeros()
call tmp%reallocate(nz+mnm) call tmp%reallocate(nz+mnm)
!$omp parallel do private(i) shared(nz)
do i=1, mnm do i=1, mnm
tmp%val(nz+i) = sone tmp%val(nz+i) = sone
tmp%ia(nz+i) = i tmp%ia(nz+i) = i
@ -1506,6 +1508,7 @@ contains
real(psb_spk_), intent(out) :: y(*) real(psb_spk_), intent(out) :: y(*)
integer(psb_ipk_) :: i integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1,n do i=1,n
y(i) = d(i)*x(i) y(i) = d(i)*x(i)
end do end do
@ -1519,6 +1522,7 @@ contains
real(psb_spk_), intent(inout) :: x(*) real(psb_spk_), intent(inout) :: x(*)
integer(psb_ipk_) :: i integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1,n do i=1,n
x(i) = d(i)*x(i) x(i) = d(i)*x(i)
end do end do
@ -3182,6 +3186,8 @@ subroutine psb_ls_base_tril(a,l,info,&
call psb_realloc(max(mb,nb),ia,info) call psb_realloc(max(mb,nb),ia,info)
call psb_realloc(max(mb,nb),ja,info) call psb_realloc(max(mb,nb),ja,info)
call psb_realloc(max(mb,nb),val,info) call psb_realloc(max(mb,nb),val,info)
! Implementing this in OpenMP?
! Tricky, to be seen
do i=imin_,imax_, nbk do i=imin_,imax_, nbk
ibk = min(nbk,imax_-i+1) ibk = min(nbk,imax_-i+1)
call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,& call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,&
@ -3334,6 +3340,8 @@ subroutine psb_ls_base_triu(a,u,info,&
call psb_realloc(max(mb,nb),ia,info) call psb_realloc(max(mb,nb),ia,info)
call psb_realloc(max(mb,nb),ja,info) call psb_realloc(max(mb,nb),ja,info)
call psb_realloc(max(mb,nb),val,info) call psb_realloc(max(mb,nb),val,info)
! Implementing this in OpenMP?
! Tricky, to be seen
do i=imin_,imax_, nbk do i=imin_,imax_, nbk
ibk = min(nbk,imax_-i+1) ibk = min(nbk,imax_-i+1)
call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,& call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,&
@ -3446,6 +3454,7 @@ subroutine psb_ls_base_make_nonunit(a)
mnm = min(m,n) mnm = min(m,n)
nz = tmp%get_nzeros() nz = tmp%get_nzeros()
call tmp%reallocate(nz+mnm) call tmp%reallocate(nz+mnm)
!$omp parallel do private(i) shared(nz)
do i=1, mnm do i=1, mnm
tmp%val(nz+i) = sone tmp%val(nz+i) = sone
tmp%ia(nz+i) = i tmp%ia(nz+i) = i

@ -114,6 +114,7 @@ subroutine psb_s_coo_scal(d,a,info,side)
goto 9999 goto 9999
end if end if
!$omp parallel do private(i,j)
do i=1,a%get_nzeros() do i=1,a%get_nzeros()
j = a%ia(i) j = a%ia(i)
a%val(i) = a%val(i) * d(j) a%val(i) = a%val(i) * d(j)
@ -126,6 +127,7 @@ subroutine psb_s_coo_scal(d,a,info,side)
goto 9999 goto 9999
end if end if
!$omp parallel do private(i,j)
do i=1,a%get_nzeros() do i=1,a%get_nzeros()
j = a%ja(i) j = a%ja(i)
a%val(i) = a%val(i) * d(j) a%val(i) = a%val(i) * d(j)
@ -201,6 +203,7 @@ subroutine psb_s_coo_scalplusidentity(d,a,info)
end if end if
mnm = min(a%get_nrows(),a%get_ncols()) mnm = min(a%get_nrows(),a%get_ncols())
!$omp parallel do private(i,j)
do i=1,a%get_nzeros() do i=1,a%get_nzeros()
a%val(i) = a%val(i) * d a%val(i) = a%val(i) * d
j=a%ia(i) j=a%ia(i)
@ -253,12 +256,30 @@ subroutine psb_s_coo_spaxpby(alpha,a,beta,b,info)
! Allocate (temporary) space for the solution ! Allocate (temporary) space for the solution
call tcoo%allocate(M,N,(nza+nzb)) call tcoo%allocate(M,N,(nza+nzb))
! Compute the sum ! Compute the sum
#if defined (OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nza
tcoo%ia(i) = a%ia(i)
tcoo%ja(i) = a%ja(i)
tcoo%val(i) = alpha*a%val(i)
end do
!$omp parallel do private(i)
do i=1, nzb
tcoo%ia(nza+i) = bcoo%ia(i)
tcoo%ja(nza+i) = bcoo%ja(i)
tcoo%val(nza+i) = beta*bcoo%val(i)
end do
end block
#else
tcoo%ia(1:nza) = a%ia(1:nza) tcoo%ia(1:nza) = a%ia(1:nza)
tcoo%ja(1:nza) = a%ja(1:nza) tcoo%ja(1:nza) = a%ja(1:nza)
tcoo%val(1:nza) = alpha*a%val(1:nza) tcoo%val(1:nza) = alpha*a%val(1:nza)
tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb) tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb)
tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb) tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb)
tcoo%val(nza+1:nza+nzb) = beta*bcoo%val(1:nzb) tcoo%val(nza+1:nza+nzb) = beta*bcoo%val(1:nzb)
#endif
! Fix the indexes ! Fix the indexes
call tcoo%fix(info) call tcoo%fix(info)
! Move to correct output format ! Move to correct output format
@ -346,12 +367,30 @@ function psb_s_coo_cmpmat(a,b,tol,info) result(res)
! Allocate (temporary) space for the solution ! Allocate (temporary) space for the solution
call tcoo%allocate(M,N,(nza+nzb)) call tcoo%allocate(M,N,(nza+nzb))
! Compute the sum ! Compute the sum
#if defined (OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nza
tcoo%ia(i) = a%ia(i)
tcoo%ja(i) = a%ja(i)
tcoo%val(i) = alpha*a%val(i)
end do
!$omp parallel do private(i)
do i=1, nzb
tcoo%ia(nza+i) = bcoo%ia(i)
tcoo%ja(nza+i) = bcoo%ja(i)
tcoo%val(nza+i) = (-sone)*beta*bcoo%val(i)
end do
end block
#else
tcoo%ia(1:nza) = a%ia(1:nza) tcoo%ia(1:nza) = a%ia(1:nza)
tcoo%ja(1:nza) = a%ja(1:nza) tcoo%ja(1:nza) = a%ja(1:nza)
tcoo%val(1:nza) = a%val(1:nza) tcoo%val(1:nza) = a%val(1:nza)
tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb) tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb)
tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb) tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb)
tcoo%val(nza+1:nza+nzb) = (-sone)*bcoo%val(1:nzb) tcoo%val(nza+1:nza+nzb) = (-sone)*bcoo%val(1:nzb)
#endif
! Fix the indexes ! Fix the indexes
call tcoo%fix(info) call tcoo%fix(info)
if (info /= psb_success_) then if (info /= psb_success_) then
@ -728,9 +767,6 @@ subroutine psb_s_coo_print(iout,a,iv,head,ivr,ivc)
end subroutine psb_s_coo_print end subroutine psb_s_coo_print
function psb_s_coo_get_nz_row(idx,a) result(res) function psb_s_coo_get_nz_row(idx,a) result(res)
use psb_const_mod use psb_const_mod
use psb_sort_mod use psb_sort_mod
@ -1670,7 +1706,6 @@ subroutine psb_s_coo_csmv(alpha,a,x,beta,y,info,trans)
end subroutine psb_s_coo_csmv end subroutine psb_s_coo_csmv
subroutine psb_s_coo_csmm(alpha,a,x,beta,y,info,trans) subroutine psb_s_coo_csmm(alpha,a,x,beta,y,info,trans)
use psb_const_mod use psb_const_mod
use psb_error_mod use psb_error_mod
@ -1709,11 +1744,9 @@ subroutine psb_s_coo_csmm(alpha,a,x,beta,y,info,trans)
trans_ = 'N' trans_ = 'N'
end if end if
tra = (psb_toupper(trans_) == 'T') tra = (psb_toupper(trans_) == 'T')
ctra = (psb_toupper(trans_) == 'C') ctra = (psb_toupper(trans_) == 'C')
if (tra.or.ctra) then if (tra.or.ctra) then
m = a%get_ncols() m = a%get_ncols()
n = a%get_nrows() n = a%get_nrows()
@ -1895,7 +1928,15 @@ function psb_s_coo_maxval(a) result(res)
nnz = a%get_nzeros() nnz = a%get_nzeros()
if (allocated(a%val)) then if (allocated(a%val)) then
nnz = min(nnz,size(a%val)) nnz = min(nnz,size(a%val))
#if defined(OPENMP)
res = szero
!$omp parallel do private(i) reduction(max: res)
do i=1, nnz
res = max(res,abs(a%val(i)))
end do
#else
res = maxval(abs(a%val(1:nnz))) res = maxval(abs(a%val(1:nnz)))
#endif
end if end if
end function psb_s_coo_maxval end function psb_s_coo_maxval
@ -2275,11 +2316,13 @@ subroutine psb_s_coo_csgetptn(imin,imax,a,nz,ia,ja,info,&
& iren) & iren)
if (rscale_) then if (rscale_) then
!$omp parallel do private(i)
do i=nzin_+1, nzin_+nz do i=nzin_+1, nzin_+nz
ia(i) = ia(i) - imin + 1 ia(i) = ia(i) - imin + 1
end do end do
end if end if
if (cscale_) then if (cscale_) then
!$omp parallel do private(i)
do i=nzin_+1, nzin_+nz do i=nzin_+1, nzin_+nz
ja(i) = ja(i) - jmin_ + 1 ja(i) = ja(i) - jmin_ + 1
end do end do
@ -2553,11 +2596,13 @@ subroutine psb_s_coo_csgetrow(imin,imax,a,nz,ia,ja,val,info,&
& iren) & iren)
if (rscale_) then if (rscale_) then
!$omp parallel do private(i)
do i=nzin_+1, nzin_+nz do i=nzin_+1, nzin_+nz
ia(i) = ia(i) - imin + 1 ia(i) = ia(i) - imin + 1
end do end do
end if end if
if (cscale_) then if (cscale_) then
!$omp parallel do private(i)
do i=nzin_+1, nzin_+nz do i=nzin_+1, nzin_+nz
ja(i) = ja(i) - jmin_ + 1 ja(i) = ja(i) - jmin_ + 1
end do end do
@ -2768,7 +2813,6 @@ contains
end subroutine psb_s_coo_csgetrow end subroutine psb_s_coo_csgetrow
subroutine psb_s_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) subroutine psb_s_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
use psb_error_mod use psb_error_mod
use psb_realloc_mod use psb_realloc_mod
@ -3021,7 +3065,6 @@ contains
end subroutine psb_s_coo_csput_a end subroutine psb_s_coo_csput_a
subroutine psb_s_cp_coo_to_coo(a,b,info) subroutine psb_s_cp_coo_to_coo(a,b,info)
use psb_error_mod use psb_error_mod
use psb_s_base_mat_mod, psb_protect_name => psb_s_cp_coo_to_coo use psb_s_base_mat_mod, psb_protect_name => psb_s_cp_coo_to_coo
@ -3045,10 +3088,21 @@ subroutine psb_s_cp_coo_to_coo(a,b,info)
call b%set_nzeros(nz) call b%set_nzeros(nz)
call b%reallocate(nz) call b%reallocate(nz)
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nz
b%ia(i) = a%ia(i)
b%ja(i) = a%ja(i)
b%val(i) = a%val(i)
end do
end block
#else
b%ia(1:nz) = a%ia(1:nz) b%ia(1:nz) = a%ia(1:nz)
b%ja(1:nz) = a%ja(1:nz) b%ja(1:nz) = a%ja(1:nz)
b%val(1:nz) = a%val(1:nz) b%val(1:nz) = a%val(1:nz)
#endif
call b%set_host() call b%set_host()
if (.not.b%is_by_rows()) call b%fix(info) if (.not.b%is_by_rows()) call b%fix(info)
@ -3087,10 +3141,21 @@ subroutine psb_s_cp_coo_from_coo(a,b,info)
call a%set_nzeros(nz) call a%set_nzeros(nz)
call a%reallocate(nz) call a%reallocate(nz)
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nz
a%ia(i) = b%ia(i)
a%ja(i) = b%ja(i)
a%val(i) = b%val(i)
end do
end block
#else
a%ia(1:nz) = b%ia(1:nz) a%ia(1:nz) = b%ia(1:nz)
a%ja(1:nz) = b%ja(1:nz) a%ja(1:nz) = b%ja(1:nz)
a%val(1:nz) = b%val(1:nz) a%val(1:nz) = b%val(1:nz)
#endif
call a%set_host() call a%set_host()
if (.not.a%is_by_rows()) call a%fix(info) if (.not.a%is_by_rows()) call a%fix(info)
@ -3445,8 +3510,6 @@ subroutine psb_s_fix_coo(a,info,idir)
end subroutine psb_s_fix_coo end subroutine psb_s_fix_coo
subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
use psb_const_mod use psb_const_mod
use psb_error_mod use psb_error_mod
@ -4174,7 +4237,6 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
end subroutine psb_s_fix_coo_inner end subroutine psb_s_fix_coo_inner
subroutine psb_s_cp_coo_to_lcoo(a,b,info) subroutine psb_s_cp_coo_to_lcoo(a,b,info)
use psb_error_mod use psb_error_mod
use psb_s_base_mat_mod, psb_protect_name => psb_s_cp_coo_to_lcoo use psb_s_base_mat_mod, psb_protect_name => psb_s_cp_coo_to_lcoo
@ -4199,10 +4261,21 @@ subroutine psb_s_cp_coo_to_lcoo(a,b,info)
call b%set_nzeros(nz) call b%set_nzeros(nz)
call b%reallocate(nz) call b%reallocate(nz)
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nz
b%ia(i) = a%ia(i)
b%ja(i) = a%ja(i)
b%val(i) = a%val(i)
end do
end block
#else
b%ia(1:nz) = a%ia(1:nz) b%ia(1:nz) = a%ia(1:nz)
b%ja(1:nz) = a%ja(1:nz) b%ja(1:nz) = a%ja(1:nz)
b%val(1:nz) = a%val(1:nz) b%val(1:nz) = a%val(1:nz)
#endif
call b%set_host() call b%set_host()
if (.not.b%is_by_rows()) call b%fix(info) if (.not.b%is_by_rows()) call b%fix(info)
@ -4240,10 +4313,21 @@ subroutine psb_s_cp_coo_from_lcoo(a,b,info)
call a%set_nzeros(nz) call a%set_nzeros(nz)
call a%reallocate(nz) call a%reallocate(nz)
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nz
a%ia(i) = b%ia(i)
a%ja(i) = b%ja(i)
a%val(i) = b%val(i)
end do
end block
#else
a%ia(1:nz) = b%ia(1:nz) a%ia(1:nz) = b%ia(1:nz)
a%ja(1:nz) = b%ja(1:nz) a%ja(1:nz) = b%ja(1:nz)
a%val(1:nz) = b%val(1:nz) a%val(1:nz) = b%val(1:nz)
#endif
call a%set_host() call a%set_host()
if (.not.a%is_by_rows()) call a%fix(info) if (.not.a%is_by_rows()) call a%fix(info)
@ -4442,7 +4526,17 @@ function psb_ls_coo_maxval(a) result(res)
nnz = a%get_nzeros() nnz = a%get_nzeros()
if (allocated(a%val)) then if (allocated(a%val)) then
nnz = min(nnz,size(a%val)) nnz = min(nnz,size(a%val))
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nnz
res = max(res,abs(a%val(i)))
end do
end block
#else
res = maxval(abs(a%val(1:nnz))) res = maxval(abs(a%val(1:nnz)))
#endif
end if end if
end function psb_ls_coo_maxval end function psb_ls_coo_maxval
@ -4499,7 +4593,17 @@ function psb_ls_coo_csnmi(a) result(res)
i = a%ia(j) i = a%ia(j)
vt(i) = vt(i) + abs(a%val(j)) vt(i) = vt(i) + abs(a%val(j))
end do end do
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, m
res = max(res,abs(vt(i)))
end do
end block
#else
res = maxval(vt(1:m)) res = maxval(vt(1:m))
#endif
deallocate(vt,stat=info) deallocate(vt,stat=info)
end if end if
@ -4539,7 +4643,17 @@ function psb_ls_coo_csnm1(a) result(res)
i = a%ja(j) i = a%ja(j)
vt(i) = vt(i) + abs(a%val(j)) vt(i) = vt(i) + abs(a%val(j))
end do end do
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, n
res = max(res,abs(vt(i)))
end do
end block
#else
res = maxval(vt(1:n)) res = maxval(vt(1:n))
#endif
deallocate(vt,stat=info) deallocate(vt,stat=info)
return return
@ -4584,7 +4698,6 @@ subroutine psb_ls_coo_rowsum(d,a)
d(i) = d(i) + a%val(j) d(i) = d(i) + a%val(j)
end do end do
return return
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
return return
@ -4592,7 +4705,6 @@ subroutine psb_ls_coo_rowsum(d,a)
9999 call psb_error_handler(err_act) 9999 call psb_error_handler(err_act)
return return
end subroutine psb_ls_coo_rowsum end subroutine psb_ls_coo_rowsum
subroutine psb_ls_coo_arwsum(d,a) subroutine psb_ls_coo_arwsum(d,a)
@ -4761,6 +4873,7 @@ subroutine psb_ls_coo_scalplusidentity(d,a,info)
end if end if
mnm = min(a%get_nrows(),a%get_ncols()) mnm = min(a%get_nrows(),a%get_ncols())
!$omp parallel do private(i,j)
do i=1,a%get_nzeros() do i=1,a%get_nzeros()
a%val(i) = a%val(i) * d a%val(i) = a%val(i) * d
j=a%ia(i) j=a%ia(i)
@ -4813,12 +4926,30 @@ subroutine psb_ls_coo_spaxpby(alpha,a,beta,b,info)
! Allocate (temporary) space for the solution ! Allocate (temporary) space for the solution
call tcoo%allocate(M,N,(nza+nzb)) call tcoo%allocate(M,N,(nza+nzb))
! Compute the sum ! Compute the sum
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nza
tcoo%ia(i) = a%ia(i)
tcoo%ja(i) = a%ja(i)
tcoo%val(i) = alpha*a%val(i)
end do
!$omp parallel do private(i)
do i=1, nzb
tcoo%ia(nza+i) = bcoo%ia(i)
tcoo%ja(nza+i) = bcoo%ja(i)
tcoo%val(nza+i) = beta*bcoo%val(i)
end do
end block
#else
tcoo%ia(1:nza) = a%ia(1:nza) tcoo%ia(1:nza) = a%ia(1:nza)
tcoo%ja(1:nza) = a%ja(1:nza) tcoo%ja(1:nza) = a%ja(1:nza)
tcoo%val(1:nza) = alpha*a%val(1:nza) tcoo%val(1:nza) = alpha*a%val(1:nza)
tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb) tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb)
tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb) tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb)
tcoo%val(nza+1:nza+nzb) = beta*bcoo%val(1:nzb) tcoo%val(nza+1:nza+nzb) = beta*bcoo%val(1:nzb)
#endif
! Fix the indexes ! Fix the indexes
call tcoo%fix(info) call tcoo%fix(info)
! Move to correct output format ! Move to correct output format
@ -4906,12 +5037,30 @@ function psb_ls_coo_cmpmat(a,b,tol,info) result(res)
! Allocate (temporary) space for the solution ! Allocate (temporary) space for the solution
call tcoo%allocate(M,N,(nza+nzb)) call tcoo%allocate(M,N,(nza+nzb))
! Compute the sum ! Compute the sum
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nza
tcoo%ia(i) = a%ia(i)
tcoo%ja(i) = a%ja(i)
tcoo%val(i) = a%val(i)
end do
!$omp parallel do private(i)
do i=1, nzb
tcoo%ia(nza+i) = bcoo%ia(i)
tcoo%ja(nza+i) = bcoo%ja(i)
tcoo%val(nza+i) = (-1_psb_spk_)*bcoo%val(i)
end do
end block
#else
tcoo%ia(1:nza) = a%ia(1:nza) tcoo%ia(1:nza) = a%ia(1:nza)
tcoo%ja(1:nza) = a%ja(1:nza) tcoo%ja(1:nza) = a%ja(1:nza)
tcoo%val(1:nza) = a%val(1:nza) tcoo%val(1:nza) = a%val(1:nza)
tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb) tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb)
tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb) tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb)
tcoo%val(nza+1:nza+nzb) = (-1_psb_spk_)*bcoo%val(1:nzb) tcoo%val(nza+1:nza+nzb) = (-1_psb_spk_)*bcoo%val(1:nzb)
#endif
! Fix the indexes ! Fix the indexes
call tcoo%fix(info) call tcoo%fix(info)
if (info /= psb_success_) then if (info /= psb_success_) then
@ -5950,7 +6099,7 @@ subroutine psb_ls_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
if (nz < 0) then if (nz < 0) then
info = psb_err_iarg_neg_ info = psb_err_iarg_neg_
call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) 3 call psb_errpush(info,name,i_err=(/1_psb_ipk_/))
goto 9999 goto 9999
end if end if
if (size(ia) < nz) then if (size(ia) < nz) then

@ -250,7 +250,6 @@ subroutine psb_z_base_mv_from_coo(a,b,info)
end subroutine psb_z_base_mv_from_coo end subroutine psb_z_base_mv_from_coo
subroutine psb_z_base_mv_to_fmt(a,b,info) subroutine psb_z_base_mv_to_fmt(a,b,info)
use psb_z_base_mat_mod, psb_protect_name => psb_z_base_mv_to_fmt use psb_z_base_mat_mod, psb_protect_name => psb_z_base_mv_to_fmt
use psb_error_mod use psb_error_mod
@ -698,6 +697,8 @@ subroutine psb_z_base_tril(a,l,info,&
call psb_realloc(max(mb,nb),ia,info) call psb_realloc(max(mb,nb),ia,info)
call psb_realloc(max(mb,nb),ja,info) call psb_realloc(max(mb,nb),ja,info)
call psb_realloc(max(mb,nb),val,info) call psb_realloc(max(mb,nb),val,info)
! Implementing this in OpenMP?
! Tricky, to be seen
do i=imin_,imax_, nbk do i=imin_,imax_, nbk
ibk = min(nbk,imax_-i+1) ibk = min(nbk,imax_-i+1)
call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,& call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,&
@ -849,6 +850,8 @@ subroutine psb_z_base_triu(a,u,info,&
call psb_realloc(max(mb,nb),ia,info) call psb_realloc(max(mb,nb),ia,info)
call psb_realloc(max(mb,nb),ja,info) call psb_realloc(max(mb,nb),ja,info)
call psb_realloc(max(mb,nb),val,info) call psb_realloc(max(mb,nb),val,info)
! Implementing this in OpenMP?
! Tricky, to be seen
do i=imin_,imax_, nbk do i=imin_,imax_, nbk
ibk = min(nbk,imax_-i+1) ibk = min(nbk,imax_-i+1)
call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,& call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,&
@ -914,8 +917,6 @@ subroutine psb_z_base_triu(a,u,info,&
end subroutine psb_z_base_triu end subroutine psb_z_base_triu
subroutine psb_z_base_clone(a,b,info) subroutine psb_z_base_clone(a,b,info)
use psb_z_base_mat_mod, psb_protect_name => psb_z_base_clone use psb_z_base_mat_mod, psb_protect_name => psb_z_base_clone
use psb_error_mod use psb_error_mod
@ -960,6 +961,7 @@ subroutine psb_z_base_make_nonunit(a)
mnm = min(m,n) mnm = min(m,n)
nz = tmp%get_nzeros() nz = tmp%get_nzeros()
call tmp%reallocate(nz+mnm) call tmp%reallocate(nz+mnm)
!$omp parallel do private(i) shared(nz)
do i=1, mnm do i=1, mnm
tmp%val(nz+i) = zone tmp%val(nz+i) = zone
tmp%ia(nz+i) = i tmp%ia(nz+i) = i
@ -1506,6 +1508,7 @@ contains
complex(psb_dpk_), intent(out) :: y(*) complex(psb_dpk_), intent(out) :: y(*)
integer(psb_ipk_) :: i integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1,n do i=1,n
y(i) = d(i)*x(i) y(i) = d(i)*x(i)
end do end do
@ -1519,6 +1522,7 @@ contains
complex(psb_dpk_), intent(inout) :: x(*) complex(psb_dpk_), intent(inout) :: x(*)
integer(psb_ipk_) :: i integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1,n do i=1,n
x(i) = d(i)*x(i) x(i) = d(i)*x(i)
end do end do
@ -3182,6 +3186,8 @@ subroutine psb_lz_base_tril(a,l,info,&
call psb_realloc(max(mb,nb),ia,info) call psb_realloc(max(mb,nb),ia,info)
call psb_realloc(max(mb,nb),ja,info) call psb_realloc(max(mb,nb),ja,info)
call psb_realloc(max(mb,nb),val,info) call psb_realloc(max(mb,nb),val,info)
! Implementing this in OpenMP?
! Tricky, to be seen
do i=imin_,imax_, nbk do i=imin_,imax_, nbk
ibk = min(nbk,imax_-i+1) ibk = min(nbk,imax_-i+1)
call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,& call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,&
@ -3334,6 +3340,8 @@ subroutine psb_lz_base_triu(a,u,info,&
call psb_realloc(max(mb,nb),ia,info) call psb_realloc(max(mb,nb),ia,info)
call psb_realloc(max(mb,nb),ja,info) call psb_realloc(max(mb,nb),ja,info)
call psb_realloc(max(mb,nb),val,info) call psb_realloc(max(mb,nb),val,info)
! Implementing this in OpenMP?
! Tricky, to be seen
do i=imin_,imax_, nbk do i=imin_,imax_, nbk
ibk = min(nbk,imax_-i+1) ibk = min(nbk,imax_-i+1)
call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,& call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,&
@ -3446,6 +3454,7 @@ subroutine psb_lz_base_make_nonunit(a)
mnm = min(m,n) mnm = min(m,n)
nz = tmp%get_nzeros() nz = tmp%get_nzeros()
call tmp%reallocate(nz+mnm) call tmp%reallocate(nz+mnm)
!$omp parallel do private(i) shared(nz)
do i=1, mnm do i=1, mnm
tmp%val(nz+i) = zone tmp%val(nz+i) = zone
tmp%ia(nz+i) = i tmp%ia(nz+i) = i

@ -114,6 +114,7 @@ subroutine psb_z_coo_scal(d,a,info,side)
goto 9999 goto 9999
end if end if
!$omp parallel do private(i,j)
do i=1,a%get_nzeros() do i=1,a%get_nzeros()
j = a%ia(i) j = a%ia(i)
a%val(i) = a%val(i) * d(j) a%val(i) = a%val(i) * d(j)
@ -126,6 +127,7 @@ subroutine psb_z_coo_scal(d,a,info,side)
goto 9999 goto 9999
end if end if
!$omp parallel do private(i,j)
do i=1,a%get_nzeros() do i=1,a%get_nzeros()
j = a%ja(i) j = a%ja(i)
a%val(i) = a%val(i) * d(j) a%val(i) = a%val(i) * d(j)
@ -201,6 +203,7 @@ subroutine psb_z_coo_scalplusidentity(d,a,info)
end if end if
mnm = min(a%get_nrows(),a%get_ncols()) mnm = min(a%get_nrows(),a%get_ncols())
!$omp parallel do private(i,j)
do i=1,a%get_nzeros() do i=1,a%get_nzeros()
a%val(i) = a%val(i) * d a%val(i) = a%val(i) * d
j=a%ia(i) j=a%ia(i)
@ -253,12 +256,30 @@ subroutine psb_z_coo_spaxpby(alpha,a,beta,b,info)
! Allocate (temporary) space for the solution ! Allocate (temporary) space for the solution
call tcoo%allocate(M,N,(nza+nzb)) call tcoo%allocate(M,N,(nza+nzb))
! Compute the sum ! Compute the sum
#if defined (OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nza
tcoo%ia(i) = a%ia(i)
tcoo%ja(i) = a%ja(i)
tcoo%val(i) = alpha*a%val(i)
end do
!$omp parallel do private(i)
do i=1, nzb
tcoo%ia(nza+i) = bcoo%ia(i)
tcoo%ja(nza+i) = bcoo%ja(i)
tcoo%val(nza+i) = beta*bcoo%val(i)
end do
end block
#else
tcoo%ia(1:nza) = a%ia(1:nza) tcoo%ia(1:nza) = a%ia(1:nza)
tcoo%ja(1:nza) = a%ja(1:nza) tcoo%ja(1:nza) = a%ja(1:nza)
tcoo%val(1:nza) = alpha*a%val(1:nza) tcoo%val(1:nza) = alpha*a%val(1:nza)
tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb) tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb)
tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb) tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb)
tcoo%val(nza+1:nza+nzb) = beta*bcoo%val(1:nzb) tcoo%val(nza+1:nza+nzb) = beta*bcoo%val(1:nzb)
#endif
! Fix the indexes ! Fix the indexes
call tcoo%fix(info) call tcoo%fix(info)
! Move to correct output format ! Move to correct output format
@ -346,12 +367,30 @@ function psb_z_coo_cmpmat(a,b,tol,info) result(res)
! Allocate (temporary) space for the solution ! Allocate (temporary) space for the solution
call tcoo%allocate(M,N,(nza+nzb)) call tcoo%allocate(M,N,(nza+nzb))
! Compute the sum ! Compute the sum
#if defined (OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nza
tcoo%ia(i) = a%ia(i)
tcoo%ja(i) = a%ja(i)
tcoo%val(i) = alpha*a%val(i)
end do
!$omp parallel do private(i)
do i=1, nzb
tcoo%ia(nza+i) = bcoo%ia(i)
tcoo%ja(nza+i) = bcoo%ja(i)
tcoo%val(nza+i) = (-done)*beta*bcoo%val(i)
end do
end block
#else
tcoo%ia(1:nza) = a%ia(1:nza) tcoo%ia(1:nza) = a%ia(1:nza)
tcoo%ja(1:nza) = a%ja(1:nza) tcoo%ja(1:nza) = a%ja(1:nza)
tcoo%val(1:nza) = a%val(1:nza) tcoo%val(1:nza) = a%val(1:nza)
tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb) tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb)
tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb) tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb)
tcoo%val(nza+1:nza+nzb) = (-done)*bcoo%val(1:nzb) tcoo%val(nza+1:nza+nzb) = (-done)*bcoo%val(1:nzb)
#endif
! Fix the indexes ! Fix the indexes
call tcoo%fix(info) call tcoo%fix(info)
if (info /= psb_success_) then if (info /= psb_success_) then
@ -728,9 +767,6 @@ subroutine psb_z_coo_print(iout,a,iv,head,ivr,ivc)
end subroutine psb_z_coo_print end subroutine psb_z_coo_print
function psb_z_coo_get_nz_row(idx,a) result(res) function psb_z_coo_get_nz_row(idx,a) result(res)
use psb_const_mod use psb_const_mod
use psb_sort_mod use psb_sort_mod
@ -1670,7 +1706,6 @@ subroutine psb_z_coo_csmv(alpha,a,x,beta,y,info,trans)
end subroutine psb_z_coo_csmv end subroutine psb_z_coo_csmv
subroutine psb_z_coo_csmm(alpha,a,x,beta,y,info,trans) subroutine psb_z_coo_csmm(alpha,a,x,beta,y,info,trans)
use psb_const_mod use psb_const_mod
use psb_error_mod use psb_error_mod
@ -1709,11 +1744,9 @@ subroutine psb_z_coo_csmm(alpha,a,x,beta,y,info,trans)
trans_ = 'N' trans_ = 'N'
end if end if
tra = (psb_toupper(trans_) == 'T') tra = (psb_toupper(trans_) == 'T')
ctra = (psb_toupper(trans_) == 'C') ctra = (psb_toupper(trans_) == 'C')
if (tra.or.ctra) then if (tra.or.ctra) then
m = a%get_ncols() m = a%get_ncols()
n = a%get_nrows() n = a%get_nrows()
@ -1895,7 +1928,15 @@ function psb_z_coo_maxval(a) result(res)
nnz = a%get_nzeros() nnz = a%get_nzeros()
if (allocated(a%val)) then if (allocated(a%val)) then
nnz = min(nnz,size(a%val)) nnz = min(nnz,size(a%val))
#if defined(OPENMP)
res = dzero
!$omp parallel do private(i) reduction(max: res)
do i=1, nnz
res = max(res,abs(a%val(i)))
end do
#else
res = maxval(abs(a%val(1:nnz))) res = maxval(abs(a%val(1:nnz)))
#endif
end if end if
end function psb_z_coo_maxval end function psb_z_coo_maxval
@ -2275,11 +2316,13 @@ subroutine psb_z_coo_csgetptn(imin,imax,a,nz,ia,ja,info,&
& iren) & iren)
if (rscale_) then if (rscale_) then
!$omp parallel do private(i)
do i=nzin_+1, nzin_+nz do i=nzin_+1, nzin_+nz
ia(i) = ia(i) - imin + 1 ia(i) = ia(i) - imin + 1
end do end do
end if end if
if (cscale_) then if (cscale_) then
!$omp parallel do private(i)
do i=nzin_+1, nzin_+nz do i=nzin_+1, nzin_+nz
ja(i) = ja(i) - jmin_ + 1 ja(i) = ja(i) - jmin_ + 1
end do end do
@ -2553,11 +2596,13 @@ subroutine psb_z_coo_csgetrow(imin,imax,a,nz,ia,ja,val,info,&
& iren) & iren)
if (rscale_) then if (rscale_) then
!$omp parallel do private(i)
do i=nzin_+1, nzin_+nz do i=nzin_+1, nzin_+nz
ia(i) = ia(i) - imin + 1 ia(i) = ia(i) - imin + 1
end do end do
end if end if
if (cscale_) then if (cscale_) then
!$omp parallel do private(i)
do i=nzin_+1, nzin_+nz do i=nzin_+1, nzin_+nz
ja(i) = ja(i) - jmin_ + 1 ja(i) = ja(i) - jmin_ + 1
end do end do
@ -2768,7 +2813,6 @@ contains
end subroutine psb_z_coo_csgetrow end subroutine psb_z_coo_csgetrow
subroutine psb_z_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) subroutine psb_z_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
use psb_error_mod use psb_error_mod
use psb_realloc_mod use psb_realloc_mod
@ -3021,7 +3065,6 @@ contains
end subroutine psb_z_coo_csput_a end subroutine psb_z_coo_csput_a
subroutine psb_z_cp_coo_to_coo(a,b,info) subroutine psb_z_cp_coo_to_coo(a,b,info)
use psb_error_mod use psb_error_mod
use psb_z_base_mat_mod, psb_protect_name => psb_z_cp_coo_to_coo use psb_z_base_mat_mod, psb_protect_name => psb_z_cp_coo_to_coo
@ -3045,10 +3088,21 @@ subroutine psb_z_cp_coo_to_coo(a,b,info)
call b%set_nzeros(nz) call b%set_nzeros(nz)
call b%reallocate(nz) call b%reallocate(nz)
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nz
b%ia(i) = a%ia(i)
b%ja(i) = a%ja(i)
b%val(i) = a%val(i)
end do
end block
#else
b%ia(1:nz) = a%ia(1:nz) b%ia(1:nz) = a%ia(1:nz)
b%ja(1:nz) = a%ja(1:nz) b%ja(1:nz) = a%ja(1:nz)
b%val(1:nz) = a%val(1:nz) b%val(1:nz) = a%val(1:nz)
#endif
call b%set_host() call b%set_host()
if (.not.b%is_by_rows()) call b%fix(info) if (.not.b%is_by_rows()) call b%fix(info)
@ -3087,10 +3141,21 @@ subroutine psb_z_cp_coo_from_coo(a,b,info)
call a%set_nzeros(nz) call a%set_nzeros(nz)
call a%reallocate(nz) call a%reallocate(nz)
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nz
a%ia(i) = b%ia(i)
a%ja(i) = b%ja(i)
a%val(i) = b%val(i)
end do
end block
#else
a%ia(1:nz) = b%ia(1:nz) a%ia(1:nz) = b%ia(1:nz)
a%ja(1:nz) = b%ja(1:nz) a%ja(1:nz) = b%ja(1:nz)
a%val(1:nz) = b%val(1:nz) a%val(1:nz) = b%val(1:nz)
#endif
call a%set_host() call a%set_host()
if (.not.a%is_by_rows()) call a%fix(info) if (.not.a%is_by_rows()) call a%fix(info)
@ -3445,8 +3510,6 @@ subroutine psb_z_fix_coo(a,info,idir)
end subroutine psb_z_fix_coo end subroutine psb_z_fix_coo
subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
use psb_const_mod use psb_const_mod
use psb_error_mod use psb_error_mod
@ -4174,7 +4237,6 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
end subroutine psb_z_fix_coo_inner end subroutine psb_z_fix_coo_inner
subroutine psb_z_cp_coo_to_lcoo(a,b,info) subroutine psb_z_cp_coo_to_lcoo(a,b,info)
use psb_error_mod use psb_error_mod
use psb_z_base_mat_mod, psb_protect_name => psb_z_cp_coo_to_lcoo use psb_z_base_mat_mod, psb_protect_name => psb_z_cp_coo_to_lcoo
@ -4199,10 +4261,21 @@ subroutine psb_z_cp_coo_to_lcoo(a,b,info)
call b%set_nzeros(nz) call b%set_nzeros(nz)
call b%reallocate(nz) call b%reallocate(nz)
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nz
b%ia(i) = a%ia(i)
b%ja(i) = a%ja(i)
b%val(i) = a%val(i)
end do
end block
#else
b%ia(1:nz) = a%ia(1:nz) b%ia(1:nz) = a%ia(1:nz)
b%ja(1:nz) = a%ja(1:nz) b%ja(1:nz) = a%ja(1:nz)
b%val(1:nz) = a%val(1:nz) b%val(1:nz) = a%val(1:nz)
#endif
call b%set_host() call b%set_host()
if (.not.b%is_by_rows()) call b%fix(info) if (.not.b%is_by_rows()) call b%fix(info)
@ -4240,10 +4313,21 @@ subroutine psb_z_cp_coo_from_lcoo(a,b,info)
call a%set_nzeros(nz) call a%set_nzeros(nz)
call a%reallocate(nz) call a%reallocate(nz)
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nz
a%ia(i) = b%ia(i)
a%ja(i) = b%ja(i)
a%val(i) = b%val(i)
end do
end block
#else
a%ia(1:nz) = b%ia(1:nz) a%ia(1:nz) = b%ia(1:nz)
a%ja(1:nz) = b%ja(1:nz) a%ja(1:nz) = b%ja(1:nz)
a%val(1:nz) = b%val(1:nz) a%val(1:nz) = b%val(1:nz)
#endif
call a%set_host() call a%set_host()
if (.not.a%is_by_rows()) call a%fix(info) if (.not.a%is_by_rows()) call a%fix(info)
@ -4442,7 +4526,17 @@ function psb_lz_coo_maxval(a) result(res)
nnz = a%get_nzeros() nnz = a%get_nzeros()
if (allocated(a%val)) then if (allocated(a%val)) then
nnz = min(nnz,size(a%val)) nnz = min(nnz,size(a%val))
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nnz
res = max(res,abs(a%val(i)))
end do
end block
#else
res = maxval(abs(a%val(1:nnz))) res = maxval(abs(a%val(1:nnz)))
#endif
end if end if
end function psb_lz_coo_maxval end function psb_lz_coo_maxval
@ -4499,7 +4593,17 @@ function psb_lz_coo_csnmi(a) result(res)
i = a%ia(j) i = a%ia(j)
vt(i) = vt(i) + abs(a%val(j)) vt(i) = vt(i) + abs(a%val(j))
end do end do
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, m
res = max(res,abs(vt(i)))
end do
end block
#else
res = maxval(vt(1:m)) res = maxval(vt(1:m))
#endif
deallocate(vt,stat=info) deallocate(vt,stat=info)
end if end if
@ -4539,7 +4643,17 @@ function psb_lz_coo_csnm1(a) result(res)
i = a%ja(j) i = a%ja(j)
vt(i) = vt(i) + abs(a%val(j)) vt(i) = vt(i) + abs(a%val(j))
end do end do
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, n
res = max(res,abs(vt(i)))
end do
end block
#else
res = maxval(vt(1:n)) res = maxval(vt(1:n))
#endif
deallocate(vt,stat=info) deallocate(vt,stat=info)
return return
@ -4584,7 +4698,6 @@ subroutine psb_lz_coo_rowsum(d,a)
d(i) = d(i) + a%val(j) d(i) = d(i) + a%val(j)
end do end do
return return
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
return return
@ -4592,7 +4705,6 @@ subroutine psb_lz_coo_rowsum(d,a)
9999 call psb_error_handler(err_act) 9999 call psb_error_handler(err_act)
return return
end subroutine psb_lz_coo_rowsum end subroutine psb_lz_coo_rowsum
subroutine psb_lz_coo_arwsum(d,a) subroutine psb_lz_coo_arwsum(d,a)
@ -4761,6 +4873,7 @@ subroutine psb_lz_coo_scalplusidentity(d,a,info)
end if end if
mnm = min(a%get_nrows(),a%get_ncols()) mnm = min(a%get_nrows(),a%get_ncols())
!$omp parallel do private(i,j)
do i=1,a%get_nzeros() do i=1,a%get_nzeros()
a%val(i) = a%val(i) * d a%val(i) = a%val(i) * d
j=a%ia(i) j=a%ia(i)
@ -4813,12 +4926,30 @@ subroutine psb_lz_coo_spaxpby(alpha,a,beta,b,info)
! Allocate (temporary) space for the solution ! Allocate (temporary) space for the solution
call tcoo%allocate(M,N,(nza+nzb)) call tcoo%allocate(M,N,(nza+nzb))
! Compute the sum ! Compute the sum
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nza
tcoo%ia(i) = a%ia(i)
tcoo%ja(i) = a%ja(i)
tcoo%val(i) = alpha*a%val(i)
end do
!$omp parallel do private(i)
do i=1, nzb
tcoo%ia(nza+i) = bcoo%ia(i)
tcoo%ja(nza+i) = bcoo%ja(i)
tcoo%val(nza+i) = beta*bcoo%val(i)
end do
end block
#else
tcoo%ia(1:nza) = a%ia(1:nza) tcoo%ia(1:nza) = a%ia(1:nza)
tcoo%ja(1:nza) = a%ja(1:nza) tcoo%ja(1:nza) = a%ja(1:nza)
tcoo%val(1:nza) = alpha*a%val(1:nza) tcoo%val(1:nza) = alpha*a%val(1:nza)
tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb) tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb)
tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb) tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb)
tcoo%val(nza+1:nza+nzb) = beta*bcoo%val(1:nzb) tcoo%val(nza+1:nza+nzb) = beta*bcoo%val(1:nzb)
#endif
! Fix the indexes ! Fix the indexes
call tcoo%fix(info) call tcoo%fix(info)
! Move to correct output format ! Move to correct output format
@ -4906,12 +5037,30 @@ function psb_lz_coo_cmpmat(a,b,tol,info) result(res)
! Allocate (temporary) space for the solution ! Allocate (temporary) space for the solution
call tcoo%allocate(M,N,(nza+nzb)) call tcoo%allocate(M,N,(nza+nzb))
! Compute the sum ! Compute the sum
#if defined(OPENMP)
block
integer(psb_ipk_) :: i
!$omp parallel do private(i)
do i=1, nza
tcoo%ia(i) = a%ia(i)
tcoo%ja(i) = a%ja(i)
tcoo%val(i) = a%val(i)
end do
!$omp parallel do private(i)
do i=1, nzb
tcoo%ia(nza+i) = bcoo%ia(i)
tcoo%ja(nza+i) = bcoo%ja(i)
tcoo%val(nza+i) = (-1_psb_dpk_)*bcoo%val(i)
end do
end block
#else
tcoo%ia(1:nza) = a%ia(1:nza) tcoo%ia(1:nza) = a%ia(1:nza)
tcoo%ja(1:nza) = a%ja(1:nza) tcoo%ja(1:nza) = a%ja(1:nza)
tcoo%val(1:nza) = a%val(1:nza) tcoo%val(1:nza) = a%val(1:nza)
tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb) tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb)
tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb) tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb)
tcoo%val(nza+1:nza+nzb) = (-1_psb_dpk_)*bcoo%val(1:nzb) tcoo%val(nza+1:nza+nzb) = (-1_psb_dpk_)*bcoo%val(1:nzb)
#endif
! Fix the indexes ! Fix the indexes
call tcoo%fix(info) call tcoo%fix(info)
if (info /= psb_success_) then if (info /= psb_success_) then
@ -5950,7 +6099,7 @@ subroutine psb_lz_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
if (nz < 0) then if (nz < 0) then
info = psb_err_iarg_neg_ info = psb_err_iarg_neg_
call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) 3 call psb_errpush(info,name,i_err=(/1_psb_ipk_/))
goto 9999 goto 9999
end if end if
if (size(ia) < nz) then if (size(ia) < nz) then

Loading…
Cancel
Save