Rework cp_{from|to}_fmt for better OpenMP performance

master
sfilippone 2 years ago
parent d378266f33
commit 2f403e0df7

@ -768,7 +768,7 @@ Contains
integer(psb_ipk_) :: info
! ...Local Variables
integer(psb_ipk_) :: isz,err_act,lb
integer(psb_ipk_) :: isz,err_act,lb, i
character(len=30) :: name, char_err
logical, parameter :: debug=.false.
@ -790,9 +790,11 @@ Contains
call psb_errpush(info,name,a_err=char_err)
goto 9999
else
!$omp workshare
vout(:) = vin(:)
!$omp end workshare
!$omp parallel do private(i)
do i=lb,lb+isz-1
vout(i) = vin(i)
end do
!$omp end parallel do
endif
endif

@ -768,7 +768,7 @@ Contains
integer(psb_ipk_) :: info
! ...Local Variables
integer(psb_ipk_) :: isz,err_act,lb
integer(psb_ipk_) :: isz,err_act,lb, i
character(len=30) :: name, char_err
logical, parameter :: debug=.false.
@ -790,9 +790,11 @@ Contains
call psb_errpush(info,name,a_err=char_err)
goto 9999
else
!$omp workshare
vout(:) = vin(:)
!$omp end workshare
!$omp parallel do private(i)
do i=lb,lb+isz-1
vout(i) = vin(i)
end do
!$omp end parallel do
endif
endif

@ -768,7 +768,7 @@ Contains
integer(psb_ipk_) :: info
! ...Local Variables
integer(psb_ipk_) :: isz,err_act,lb
integer(psb_ipk_) :: isz,err_act,lb, i
character(len=30) :: name, char_err
logical, parameter :: debug=.false.
@ -790,9 +790,11 @@ Contains
call psb_errpush(info,name,a_err=char_err)
goto 9999
else
!$omp workshare
vout(:) = vin(:)
!$omp end workshare
!$omp parallel do private(i)
do i=lb,lb+isz-1
vout(i) = vin(i)
end do
!$omp end parallel do
endif
endif

@ -768,7 +768,7 @@ Contains
integer(psb_ipk_) :: info
! ...Local Variables
integer(psb_ipk_) :: isz,err_act,lb
integer(psb_ipk_) :: isz,err_act,lb, i
character(len=30) :: name, char_err
logical, parameter :: debug=.false.
@ -790,9 +790,11 @@ Contains
call psb_errpush(info,name,a_err=char_err)
goto 9999
else
!$omp workshare
vout(:) = vin(:)
!$omp end workshare
!$omp parallel do private(i)
do i=lb,lb+isz-1
vout(i) = vin(i)
end do
!$omp end parallel do
endif
endif

@ -768,7 +768,7 @@ Contains
integer(psb_ipk_) :: info
! ...Local Variables
integer(psb_ipk_) :: isz,err_act,lb
integer(psb_ipk_) :: isz,err_act,lb, i
character(len=30) :: name, char_err
logical, parameter :: debug=.false.
@ -790,9 +790,11 @@ Contains
call psb_errpush(info,name,a_err=char_err)
goto 9999
else
!$omp workshare
vout(:) = vin(:)
!$omp end workshare
!$omp parallel do private(i)
do i=lb,lb+isz-1
vout(i) = vin(i)
end do
!$omp end parallel do
endif
endif

@ -768,7 +768,7 @@ Contains
integer(psb_ipk_) :: info
! ...Local Variables
integer(psb_ipk_) :: isz,err_act,lb
integer(psb_ipk_) :: isz,err_act,lb, i
character(len=30) :: name, char_err
logical, parameter :: debug=.false.
@ -790,9 +790,11 @@ Contains
call psb_errpush(info,name,a_err=char_err)
goto 9999
else
!$omp workshare
vout(:) = vin(:)
!$omp end workshare
!$omp parallel do private(i)
do i=lb,lb+isz-1
vout(i) = vin(i)
end do
!$omp end parallel do
endif
endif

@ -768,7 +768,7 @@ Contains
integer(psb_ipk_) :: info
! ...Local Variables
integer(psb_ipk_) :: isz,err_act,lb
integer(psb_ipk_) :: isz,err_act,lb, i
character(len=30) :: name, char_err
logical, parameter :: debug=.false.
@ -790,9 +790,11 @@ Contains
call psb_errpush(info,name,a_err=char_err)
goto 9999
else
!$omp workshare
vout(:) = vin(:)
!$omp end workshare
!$omp parallel do private(i)
do i=lb,lb+isz-1
vout(i) = vin(i)
end do
!$omp end parallel do
endif
endif

@ -2329,9 +2329,28 @@ subroutine psb_c_cp_csc_to_fmt(a,b,info)
b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat
nc = a%get_ncols()
nz = a%get_nzeros()
if (.false.) then
if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info)
if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info)
if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nc+1,b%icp,info)
call psb_realloc(nz,b%ia,info)
call psb_realloc(nz,b%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nc+1
b%icp(i)=a%icp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
b%ia(j) = a%ia(j)
b%val(j) = a%val(j)
end do
!$omp end parallel do
end if
call b%set_host()
class default
@ -2443,9 +2462,27 @@ subroutine psb_c_cp_csc_from_fmt(a,b,info)
a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat
nc = b%get_ncols()
nz = b%get_nzeros()
if (.false.) then
if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info)
if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info)
if (info == 0) call psb_safe_cpy( b%val(1:nz), a%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nc+1,a%icp,info)
call psb_realloc(nz,a%ia,info)
call psb_realloc(nz,a%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nc+1
a%icp(i)=b%icp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
a%ia(j)=b%ia(j)
a%val(j)=b%val(j)
end do
!$omp end parallel do
end if
call a%set_host()
class default

@ -3190,9 +3190,28 @@ subroutine psb_c_cp_csr_to_fmt(a,b,info)
b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat
nr = a%get_nrows()
nz = a%get_nzeros()
if (.false.) then
if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info)
if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info)
if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nr+1,b%irp,info)
call psb_realloc(nz,b%ja,info)
call psb_realloc(nz,b%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nr+1
b%irp(i)=a%irp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
b%ja(j) = a%ja(j)
b%val(j) = a%val(j)
end do
!$omp end parallel do
end if
call b%set_host()
class default
@ -3276,9 +3295,27 @@ subroutine psb_c_cp_csr_from_fmt(a,b,info)
a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat
nr = b%get_nrows()
nz = b%get_nzeros()
if (.false.) then
if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info)
if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info)
if (info == 0) call psb_safe_cpy( b%val(1:nz) , a%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nr+1,a%irp,info)
call psb_realloc(nz,a%ja,info)
call psb_realloc(nz,a%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nr+1
a%irp(i)=b%irp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
a%ja(j)=b%ja(j)
a%val(j)=b%val(j)
end do
!$omp end parallel do
end if
call a%set_host()
class default

@ -2329,9 +2329,28 @@ subroutine psb_d_cp_csc_to_fmt(a,b,info)
b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat
nc = a%get_ncols()
nz = a%get_nzeros()
if (.false.) then
if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info)
if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info)
if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nc+1,b%icp,info)
call psb_realloc(nz,b%ia,info)
call psb_realloc(nz,b%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nc+1
b%icp(i)=a%icp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
b%ia(j) = a%ia(j)
b%val(j) = a%val(j)
end do
!$omp end parallel do
end if
call b%set_host()
class default
@ -2443,9 +2462,27 @@ subroutine psb_d_cp_csc_from_fmt(a,b,info)
a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat
nc = b%get_ncols()
nz = b%get_nzeros()
if (.false.) then
if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info)
if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info)
if (info == 0) call psb_safe_cpy( b%val(1:nz), a%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nc+1,a%icp,info)
call psb_realloc(nz,a%ia,info)
call psb_realloc(nz,a%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nc+1
a%icp(i)=b%icp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
a%ia(j)=b%ia(j)
a%val(j)=b%val(j)
end do
!$omp end parallel do
end if
call a%set_host()
class default

@ -3190,9 +3190,28 @@ subroutine psb_d_cp_csr_to_fmt(a,b,info)
b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat
nr = a%get_nrows()
nz = a%get_nzeros()
if (.false.) then
if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info)
if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info)
if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nr+1,b%irp,info)
call psb_realloc(nz,b%ja,info)
call psb_realloc(nz,b%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nr+1
b%irp(i)=a%irp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
b%ja(j) = a%ja(j)
b%val(j) = a%val(j)
end do
!$omp end parallel do
end if
call b%set_host()
class default
@ -3276,9 +3295,27 @@ subroutine psb_d_cp_csr_from_fmt(a,b,info)
a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat
nr = b%get_nrows()
nz = b%get_nzeros()
if (.false.) then
if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info)
if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info)
if (info == 0) call psb_safe_cpy( b%val(1:nz) , a%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nr+1,a%irp,info)
call psb_realloc(nz,a%ja,info)
call psb_realloc(nz,a%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nr+1
a%irp(i)=b%irp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
a%ja(j)=b%ja(j)
a%val(j)=b%val(j)
end do
!$omp end parallel do
end if
call a%set_host()
class default

@ -2329,9 +2329,28 @@ subroutine psb_s_cp_csc_to_fmt(a,b,info)
b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat
nc = a%get_ncols()
nz = a%get_nzeros()
if (.false.) then
if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info)
if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info)
if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nc+1,b%icp,info)
call psb_realloc(nz,b%ia,info)
call psb_realloc(nz,b%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nc+1
b%icp(i)=a%icp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
b%ia(j) = a%ia(j)
b%val(j) = a%val(j)
end do
!$omp end parallel do
end if
call b%set_host()
class default
@ -2443,9 +2462,27 @@ subroutine psb_s_cp_csc_from_fmt(a,b,info)
a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat
nc = b%get_ncols()
nz = b%get_nzeros()
if (.false.) then
if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info)
if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info)
if (info == 0) call psb_safe_cpy( b%val(1:nz), a%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nc+1,a%icp,info)
call psb_realloc(nz,a%ia,info)
call psb_realloc(nz,a%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nc+1
a%icp(i)=b%icp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
a%ia(j)=b%ia(j)
a%val(j)=b%val(j)
end do
!$omp end parallel do
end if
call a%set_host()
class default

@ -3190,9 +3190,28 @@ subroutine psb_s_cp_csr_to_fmt(a,b,info)
b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat
nr = a%get_nrows()
nz = a%get_nzeros()
if (.false.) then
if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info)
if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info)
if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nr+1,b%irp,info)
call psb_realloc(nz,b%ja,info)
call psb_realloc(nz,b%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nr+1
b%irp(i)=a%irp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
b%ja(j) = a%ja(j)
b%val(j) = a%val(j)
end do
!$omp end parallel do
end if
call b%set_host()
class default
@ -3276,9 +3295,27 @@ subroutine psb_s_cp_csr_from_fmt(a,b,info)
a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat
nr = b%get_nrows()
nz = b%get_nzeros()
if (.false.) then
if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info)
if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info)
if (info == 0) call psb_safe_cpy( b%val(1:nz) , a%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nr+1,a%irp,info)
call psb_realloc(nz,a%ja,info)
call psb_realloc(nz,a%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nr+1
a%irp(i)=b%irp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
a%ja(j)=b%ja(j)
a%val(j)=b%val(j)
end do
!$omp end parallel do
end if
call a%set_host()
class default

@ -2329,9 +2329,28 @@ subroutine psb_z_cp_csc_to_fmt(a,b,info)
b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat
nc = a%get_ncols()
nz = a%get_nzeros()
if (.false.) then
if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info)
if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info)
if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nc+1,b%icp,info)
call psb_realloc(nz,b%ia,info)
call psb_realloc(nz,b%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nc+1
b%icp(i)=a%icp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
b%ia(j) = a%ia(j)
b%val(j) = a%val(j)
end do
!$omp end parallel do
end if
call b%set_host()
class default
@ -2443,9 +2462,27 @@ subroutine psb_z_cp_csc_from_fmt(a,b,info)
a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat
nc = b%get_ncols()
nz = b%get_nzeros()
if (.false.) then
if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info)
if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info)
if (info == 0) call psb_safe_cpy( b%val(1:nz), a%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nc+1,a%icp,info)
call psb_realloc(nz,a%ia,info)
call psb_realloc(nz,a%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nc+1
a%icp(i)=b%icp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
a%ia(j)=b%ia(j)
a%val(j)=b%val(j)
end do
!$omp end parallel do
end if
call a%set_host()
class default

@ -3190,9 +3190,28 @@ subroutine psb_z_cp_csr_to_fmt(a,b,info)
b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat
nr = a%get_nrows()
nz = a%get_nzeros()
if (.false.) then
if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info)
if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info)
if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nr+1,b%irp,info)
call psb_realloc(nz,b%ja,info)
call psb_realloc(nz,b%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nr+1
b%irp(i)=a%irp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
b%ja(j) = a%ja(j)
b%val(j) = a%val(j)
end do
!$omp end parallel do
end if
call b%set_host()
class default
@ -3276,9 +3295,27 @@ subroutine psb_z_cp_csr_from_fmt(a,b,info)
a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat
nr = b%get_nrows()
nz = b%get_nzeros()
if (.false.) then
if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info)
if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info)
if (info == 0) call psb_safe_cpy( b%val(1:nz) , a%val , info)
else
! Despite the implementation in safe_cpy, it seems better this way
call psb_realloc(nr+1,a%irp,info)
call psb_realloc(nz,a%ja,info)
call psb_realloc(nz,a%val,info)
!$omp parallel do private(i) schedule(static)
do i=1,nr+1
a%irp(i)=b%irp(i)
end do
!$omp end parallel do
!$omp parallel do private(j) schedule(static)
do j=1,nz
a%ja(j)=b%ja(j)
a%val(j)=b%val(j)
end do
!$omp end parallel do
end if
call a%set_host()
class default

Loading…
Cancel
Save