|
|
|
@ -152,6 +152,7 @@ contains
|
|
|
|
|
!$omp parallel do private(i,j, acc) schedule(static)
|
|
|
|
|
do i=1,m
|
|
|
|
|
acc = czero
|
|
|
|
|
!$omp simd
|
|
|
|
|
do j=irp(i), irp(i+1)-1
|
|
|
|
|
acc = acc + val(j) * x(ja(j))
|
|
|
|
|
enddo
|
|
|
|
@ -163,6 +164,7 @@ contains
|
|
|
|
|
!$omp parallel do private(i,j, acc)
|
|
|
|
|
do i=1,m
|
|
|
|
|
acc = czero
|
|
|
|
|
!$omp simd
|
|
|
|
|
do j=irp(i), irp(i+1)-1
|
|
|
|
|
acc = acc + val(j) * x(ja(j))
|
|
|
|
|
enddo
|
|
|
|
@ -174,6 +176,7 @@ contains
|
|
|
|
|
!$omp parallel do private(i,j,acc)
|
|
|
|
|
do i=1,m
|
|
|
|
|
acc = czero
|
|
|
|
|
!$omp simd
|
|
|
|
|
do j=irp(i), irp(i+1)-1
|
|
|
|
|
acc = acc + val(j) * x(ja(j))
|
|
|
|
|
enddo
|
|
|
|
@ -189,6 +192,7 @@ contains
|
|
|
|
|
!$omp parallel do private(i,j,acc)
|
|
|
|
|
do i=1,m
|
|
|
|
|
acc = czero
|
|
|
|
|
!$omp simd
|
|
|
|
|
do j=irp(i), irp(i+1)-1
|
|
|
|
|
acc = acc + val(j) * x(ja(j))
|
|
|
|
|
enddo
|
|
|
|
@ -200,6 +204,7 @@ contains
|
|
|
|
|
!$omp parallel do private(i,j,acc)
|
|
|
|
|
do i=1,m
|
|
|
|
|
acc = czero
|
|
|
|
|
!$omp simd
|
|
|
|
|
do j=irp(i), irp(i+1)-1
|
|
|
|
|
acc = acc + val(j) * x(ja(j))
|
|
|
|
|
enddo
|
|
|
|
@ -211,6 +216,7 @@ contains
|
|
|
|
|
!$omp parallel do private(i,j,acc)
|
|
|
|
|
do i=1,m
|
|
|
|
|
acc = czero
|
|
|
|
|
!$omp simd
|
|
|
|
|
do j=irp(i), irp(i+1)-1
|
|
|
|
|
acc = acc + val(j) * x(ja(j))
|
|
|
|
|
enddo
|
|
|
|
@ -225,6 +231,7 @@ contains
|
|
|
|
|
!$omp parallel do private(i,j,acc)
|
|
|
|
|
do i=1,m
|
|
|
|
|
acc = czero
|
|
|
|
|
!$omp simd
|
|
|
|
|
do j=irp(i), irp(i+1)-1
|
|
|
|
|
acc = acc + val(j) * x(ja(j))
|
|
|
|
|
enddo
|
|
|
|
@ -236,6 +243,7 @@ contains
|
|
|
|
|
!$omp parallel do private(i,j,acc)
|
|
|
|
|
do i=1,m
|
|
|
|
|
acc = czero
|
|
|
|
|
!$omp simd
|
|
|
|
|
do j=irp(i), irp(i+1)-1
|
|
|
|
|
acc = acc + val(j) * x(ja(j))
|
|
|
|
|
enddo
|
|
|
|
@ -247,6 +255,7 @@ contains
|
|
|
|
|
!$omp parallel do private(i,j,acc)
|
|
|
|
|
do i=1,m
|
|
|
|
|
acc = czero
|
|
|
|
|
!$omp simd
|
|
|
|
|
do j=irp(i), irp(i+1)-1
|
|
|
|
|
acc = acc + val(j) * x(ja(j))
|
|
|
|
|
enddo
|
|
|
|
@ -261,6 +270,7 @@ contains
|
|
|
|
|
!$omp parallel do private(i,j,acc)
|
|
|
|
|
do i=1,m
|
|
|
|
|
acc = czero
|
|
|
|
|
!$omp simd
|
|
|
|
|
do j=irp(i), irp(i+1)-1
|
|
|
|
|
acc = acc + val(j) * x(ja(j))
|
|
|
|
|
enddo
|
|
|
|
@ -272,6 +282,7 @@ contains
|
|
|
|
|
!$omp parallel do private(i,j,acc)
|
|
|
|
|
do i=1,m
|
|
|
|
|
acc = czero
|
|
|
|
|
!$omp simd
|
|
|
|
|
do j=irp(i), irp(i+1)-1
|
|
|
|
|
acc = acc + val(j) * x(ja(j))
|
|
|
|
|
enddo
|
|
|
|
@ -283,6 +294,7 @@ contains
|
|
|
|
|
!$omp parallel do private(i,j,acc)
|
|
|
|
|
do i=1,m
|
|
|
|
|
acc = czero
|
|
|
|
|
!$omp simd
|
|
|
|
|
do j=irp(i), irp(i+1)-1
|
|
|
|
|
acc = acc + val(j) * x(ja(j))
|
|
|
|
|
enddo
|
|
|
|
@ -2277,7 +2289,155 @@ subroutine psb_c_csr_tril(a,l,info,&
|
|
|
|
|
nb = jmax_
|
|
|
|
|
endif
|
|
|
|
|
|
|
|
|
|
#if defined(OPENMP)
|
|
|
|
|
block
|
|
|
|
|
integer(psb_ipk_), allocatable :: lrws(:),urws(:)
|
|
|
|
|
integer(psb_ipk_) :: lpnt, upnt, lnz, unz
|
|
|
|
|
call psb_realloc(mb,lrws,info)
|
|
|
|
|
!$omp workshare
|
|
|
|
|
lrws(:) = 0
|
|
|
|
|
!$omp end workshare
|
|
|
|
|
nz = a%get_nzeros()
|
|
|
|
|
call l%allocate(mb,nb,nz)
|
|
|
|
|
!write(0,*) 'Invocation of COO%TRIL', present(u),nz
|
|
|
|
|
if (present(u)) then
|
|
|
|
|
nzlin = l%get_nzeros() ! At this point it should be 0
|
|
|
|
|
call u%allocate(mb,nb,nz)
|
|
|
|
|
nzuin = u%get_nzeros() ! At this point it should be 0
|
|
|
|
|
if (info == 0) call psb_realloc(mb,urws,info)
|
|
|
|
|
!$omp workshare
|
|
|
|
|
urws(:) = 0
|
|
|
|
|
!$omp end workshare
|
|
|
|
|
!write(0,*) 'omp version of COO%TRIL/TRIU'
|
|
|
|
|
lnz = 0
|
|
|
|
|
unz = 0
|
|
|
|
|
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws,urws) reduction(+: lnz,unz)
|
|
|
|
|
loop1: do i=imin_,imax_
|
|
|
|
|
do k = a%irp(i),a%irp(i+1)-1
|
|
|
|
|
j = a%ja(k)
|
|
|
|
|
if ((jmin_<=j).and.(j<=jmax_)) then
|
|
|
|
|
if ((j-i)<=diag_) then
|
|
|
|
|
!$omp atomic update
|
|
|
|
|
lrws(i-imin_+1) = lrws(i-imin_+1) +1
|
|
|
|
|
!$omp end atomic
|
|
|
|
|
lnz = lnz + 1
|
|
|
|
|
else
|
|
|
|
|
!$omp atomic update
|
|
|
|
|
urws(i-imin_+1) = urws(i-imin_+1) +1
|
|
|
|
|
!$omp end atomic
|
|
|
|
|
unz = unz + 1
|
|
|
|
|
end if
|
|
|
|
|
end if
|
|
|
|
|
end do
|
|
|
|
|
end do loop1
|
|
|
|
|
!$omp end parallel do
|
|
|
|
|
|
|
|
|
|
call psi_exscan(mb,lrws,info)
|
|
|
|
|
call psi_exscan(mb,urws,info)
|
|
|
|
|
!write(0,*) lrws(:), urws(:)
|
|
|
|
|
!$omp parallel do private(i,j,k,lpnt,upnt) shared(imin_,imax_,a)
|
|
|
|
|
loop2: do i=imin_,imax_
|
|
|
|
|
do k = a%irp(i),a%irp(i+1)-1
|
|
|
|
|
j = a%ja(k)
|
|
|
|
|
if ((jmin_<=j).and.(j<=jmax_)) then
|
|
|
|
|
if ((j-i)<=diag_) then
|
|
|
|
|
!$omp atomic capture
|
|
|
|
|
lrws(i-imin_+1) = lrws(i-imin_+1) +1
|
|
|
|
|
lpnt = lrws(i-imin_+1)
|
|
|
|
|
!$omp end atomic
|
|
|
|
|
l%ia(lpnt) = i
|
|
|
|
|
l%ja(lpnt) = a%ja(k)
|
|
|
|
|
l%val(lpnt) = a%val(k)
|
|
|
|
|
else
|
|
|
|
|
!$omp atomic capture
|
|
|
|
|
urws(i-imin_+1) = urws(i-imin_+1) +1
|
|
|
|
|
upnt = urws(i-imin_+1)
|
|
|
|
|
!$omp end atomic
|
|
|
|
|
u%ia(upnt) = i
|
|
|
|
|
u%ja(upnt) = a%ja(k)
|
|
|
|
|
u%val(upnt) = a%val(k)
|
|
|
|
|
end if
|
|
|
|
|
end if
|
|
|
|
|
end do
|
|
|
|
|
end do loop2
|
|
|
|
|
!$omp end parallel do
|
|
|
|
|
!write(0,*) 'End of copyout',lnz,unz
|
|
|
|
|
call l%set_nzeros(lnz)
|
|
|
|
|
call l%fix(info)
|
|
|
|
|
call u%set_nzeros(unz)
|
|
|
|
|
call u%fix(info)
|
|
|
|
|
nzout = u%get_nzeros()
|
|
|
|
|
if (rscale_) then
|
|
|
|
|
!$omp workshare
|
|
|
|
|
u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
|
|
|
|
|
!$omp end workshare
|
|
|
|
|
end if
|
|
|
|
|
if (cscale_) then
|
|
|
|
|
!$omp workshare
|
|
|
|
|
u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
|
|
|
|
|
!$omp end workshare
|
|
|
|
|
end if
|
|
|
|
|
if ((diag_ >=-1).and.(imin_ == jmin_)) then
|
|
|
|
|
call u%set_triangle(.true.)
|
|
|
|
|
call u%set_lower(.false.)
|
|
|
|
|
end if
|
|
|
|
|
else
|
|
|
|
|
lnz = 0
|
|
|
|
|
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws) reduction(+: lnz)
|
|
|
|
|
loop3: do i=imin_,imax_
|
|
|
|
|
do k = a%irp(i),a%irp(i+1)-1
|
|
|
|
|
j = a%ja(k)
|
|
|
|
|
if ((jmin_<=j).and.(j<=jmax_)) then
|
|
|
|
|
if ((j-i)<=diag_) then
|
|
|
|
|
!$omp atomic update
|
|
|
|
|
lrws(i-imin_+1) = lrws(i-imin_+1) +1
|
|
|
|
|
!$omp end atomic
|
|
|
|
|
lnz = lnz + 1
|
|
|
|
|
end if
|
|
|
|
|
end if
|
|
|
|
|
end do
|
|
|
|
|
end do loop3
|
|
|
|
|
!$omp end parallel do
|
|
|
|
|
call psi_exscan(mb,lrws,info)
|
|
|
|
|
!$omp parallel do private(i,j,k,lpnt) shared(imin_,imax_,a)
|
|
|
|
|
loop4: do i=imin_,imax_
|
|
|
|
|
do k = a%irp(i),a%irp(i+1)-1
|
|
|
|
|
j = a%ja(k)
|
|
|
|
|
if ((jmin_<=j).and.(j<=jmax_)) then
|
|
|
|
|
if ((j-i)<=diag_) then
|
|
|
|
|
!$omp atomic capture
|
|
|
|
|
lrws(i-imin_+1) = lrws(i-imin_+1) +1
|
|
|
|
|
lpnt = lrws(i-imin_+1)
|
|
|
|
|
!$omp end atomic
|
|
|
|
|
l%ia(lpnt) = i
|
|
|
|
|
l%ja(lpnt) = a%ja(k)
|
|
|
|
|
l%val(lpnt) = a%val(k)
|
|
|
|
|
end if
|
|
|
|
|
end if
|
|
|
|
|
end do
|
|
|
|
|
end do loop4
|
|
|
|
|
!$omp end parallel do
|
|
|
|
|
call l%set_nzeros(lnz)
|
|
|
|
|
call l%fix(info)
|
|
|
|
|
end if
|
|
|
|
|
nzout = l%get_nzeros()
|
|
|
|
|
if (rscale_) then
|
|
|
|
|
!$omp workshare
|
|
|
|
|
l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
|
|
|
|
|
!$omp end workshare
|
|
|
|
|
end if
|
|
|
|
|
if (cscale_) then
|
|
|
|
|
!$omp workshare
|
|
|
|
|
l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
|
|
|
|
|
!$omp end workshare
|
|
|
|
|
end if
|
|
|
|
|
|
|
|
|
|
if ((diag_ <= 0).and.(imin_ == jmin_)) then
|
|
|
|
|
call l%set_triangle(.true.)
|
|
|
|
|
call l%set_lower(.true.)
|
|
|
|
|
end if
|
|
|
|
|
end block
|
|
|
|
|
#else
|
|
|
|
|
nz = a%get_nzeros()
|
|
|
|
|
call l%allocate(mb,nb,nz)
|
|
|
|
|
|
|
|
|
@ -2347,7 +2507,7 @@ subroutine psb_c_csr_tril(a,l,info,&
|
|
|
|
|
call l%set_triangle(.true.)
|
|
|
|
|
call l%set_lower(.true.)
|
|
|
|
|
end if
|
|
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
if (info /= psb_success_) goto 9999
|
|
|
|
|
|
|
|
|
|
call psb_erractionrestore(err_act)
|
|
|
|
@ -2431,6 +2591,158 @@ subroutine psb_c_csr_triu(a,u,info,&
|
|
|
|
|
endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#if defined(OPENMP)
|
|
|
|
|
block
|
|
|
|
|
integer(psb_ipk_), allocatable :: lrws(:),urws(:)
|
|
|
|
|
integer(psb_ipk_) :: lpnt, upnt, lnz, unz
|
|
|
|
|
call psb_realloc(mb,urws,info)
|
|
|
|
|
!$omp workshare
|
|
|
|
|
urws(:) = 0
|
|
|
|
|
!$omp end workshare
|
|
|
|
|
nz = a%get_nzeros()
|
|
|
|
|
call u%allocate(mb,nb,nz)
|
|
|
|
|
!write(0,*) 'Invocation of COO%TRIL', present(u),nz
|
|
|
|
|
if (present(l)) then
|
|
|
|
|
nzuin = u%get_nzeros() ! At this point it should be 0
|
|
|
|
|
call l%allocate(mb,nb,nz)
|
|
|
|
|
nzlin = l%get_nzeros() ! At this point it should be 0
|
|
|
|
|
if (info == 0) call psb_realloc(mb,urws,info)
|
|
|
|
|
!$omp workshare
|
|
|
|
|
lrws(:) = 0
|
|
|
|
|
!$omp end workshare
|
|
|
|
|
!write(0,*) 'omp version of COO%TRIL/TRIU'
|
|
|
|
|
lnz = 0
|
|
|
|
|
unz = 0
|
|
|
|
|
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws,urws) reduction(+: lnz,unz)
|
|
|
|
|
loop1: do i=imin_,imax_
|
|
|
|
|
do k = a%irp(i),a%irp(i+1)-1
|
|
|
|
|
j = a%ja(k)
|
|
|
|
|
if ((jmin_<=j).and.(j<=jmax_)) then
|
|
|
|
|
if ((j-i)<diag_) then
|
|
|
|
|
!$omp atomic update
|
|
|
|
|
lrws(i-imin_+1) = lrws(i-imin_+1) +1
|
|
|
|
|
!$omp end atomic
|
|
|
|
|
lnz = lnz + 1
|
|
|
|
|
else
|
|
|
|
|
!$omp atomic update
|
|
|
|
|
urws(i-imin_+1) = urws(i-imin_+1) +1
|
|
|
|
|
!$omp end atomic
|
|
|
|
|
unz = unz + 1
|
|
|
|
|
end if
|
|
|
|
|
end if
|
|
|
|
|
end do
|
|
|
|
|
end do loop1
|
|
|
|
|
!$omp end parallel do
|
|
|
|
|
|
|
|
|
|
call psi_exscan(mb,lrws,info)
|
|
|
|
|
call psi_exscan(mb,urws,info)
|
|
|
|
|
!write(0,*) lrws(:), urws(:)
|
|
|
|
|
!$omp parallel do private(i,j,k,lpnt,upnt) shared(imin_,imax_,a)
|
|
|
|
|
loop2: do i=imin_,imax_
|
|
|
|
|
do k = a%irp(i),a%irp(i+1)-1
|
|
|
|
|
j = a%ja(k)
|
|
|
|
|
if ((jmin_<=j).and.(j<=jmax_)) then
|
|
|
|
|
if ((j-i)<diag_) then
|
|
|
|
|
!$omp atomic capture
|
|
|
|
|
lrws(i-imin_+1) = lrws(i-imin_+1) +1
|
|
|
|
|
lpnt = lrws(i-imin_+1)
|
|
|
|
|
!$omp end atomic
|
|
|
|
|
l%ia(lpnt) = i
|
|
|
|
|
l%ja(lpnt) = a%ja(k)
|
|
|
|
|
l%val(lpnt) = a%val(k)
|
|
|
|
|
else
|
|
|
|
|
!$omp atomic capture
|
|
|
|
|
urws(i-imin_+1) = urws(i-imin_+1) +1
|
|
|
|
|
upnt = urws(i-imin_+1)
|
|
|
|
|
!$omp end atomic
|
|
|
|
|
u%ia(upnt) = i
|
|
|
|
|
u%ja(upnt) = a%ja(k)
|
|
|
|
|
u%val(upnt) = a%val(k)
|
|
|
|
|
end if
|
|
|
|
|
end if
|
|
|
|
|
end do
|
|
|
|
|
end do loop2
|
|
|
|
|
!$omp end parallel do
|
|
|
|
|
!write(0,*) 'End of copyout',lnz,unz
|
|
|
|
|
call l%set_nzeros(lnz)
|
|
|
|
|
call l%fix(info)
|
|
|
|
|
call u%set_nzeros(unz)
|
|
|
|
|
call u%fix(info)
|
|
|
|
|
nzout = l%get_nzeros()
|
|
|
|
|
if (rscale_) then
|
|
|
|
|
!$omp workshare
|
|
|
|
|
l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
|
|
|
|
|
!$omp end workshare
|
|
|
|
|
end if
|
|
|
|
|
if (cscale_) then
|
|
|
|
|
!$omp workshare
|
|
|
|
|
l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
|
|
|
|
|
!$omp end workshare
|
|
|
|
|
end if
|
|
|
|
|
if ((diag_ <=-1).and.(imin_ == jmin_)) then
|
|
|
|
|
call l%set_triangle(.true.)
|
|
|
|
|
call l%set_lower(.false.)
|
|
|
|
|
end if
|
|
|
|
|
else
|
|
|
|
|
unz = 0
|
|
|
|
|
!$omp parallel do private(i,j,k) shared(imin_,imax_,a,urws) reduction(+: unz)
|
|
|
|
|
loop3: do i=imin_,imax_
|
|
|
|
|
do k = a%irp(i),a%irp(i+1)-1
|
|
|
|
|
j = a%ja(k)
|
|
|
|
|
if ((jmin_<=j).and.(j<=jmax_)) then
|
|
|
|
|
if ((j-i)>=diag_) then
|
|
|
|
|
!$omp atomic update
|
|
|
|
|
urws(i-imin_+1) = urws(i-imin_+1) +1
|
|
|
|
|
!$omp end atomic
|
|
|
|
|
unz = unz + 1
|
|
|
|
|
end if
|
|
|
|
|
end if
|
|
|
|
|
end do
|
|
|
|
|
end do loop3
|
|
|
|
|
!$omp end parallel do
|
|
|
|
|
call psi_exscan(mb,urws,info)
|
|
|
|
|
!$omp parallel do private(i,j,k,upnt) shared(imin_,imax_,a)
|
|
|
|
|
loop4: do i=imin_,imax_
|
|
|
|
|
do k = a%irp(i),a%irp(i+1)-1
|
|
|
|
|
j = a%ja(k)
|
|
|
|
|
if ((jmin_<=j).and.(j<=jmax_)) then
|
|
|
|
|
if ((j-i)>=diag_) then
|
|
|
|
|
!$omp atomic capture
|
|
|
|
|
urws(i-imin_+1) = urws(i-imin_+1) +1
|
|
|
|
|
upnt = urws(i-imin_+1)
|
|
|
|
|
!$omp end atomic
|
|
|
|
|
u%ia(upnt) = i
|
|
|
|
|
u%ja(upnt) = a%ja(k)
|
|
|
|
|
u%val(upnt) = a%val(k)
|
|
|
|
|
end if
|
|
|
|
|
end if
|
|
|
|
|
end do
|
|
|
|
|
end do loop4
|
|
|
|
|
!$omp end parallel do
|
|
|
|
|
call u%set_nzeros(unz)
|
|
|
|
|
call u%fix(info)
|
|
|
|
|
end if
|
|
|
|
|
nzout = u%get_nzeros()
|
|
|
|
|
if (rscale_) then
|
|
|
|
|
!$omp workshare
|
|
|
|
|
u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
|
|
|
|
|
!$omp end workshare
|
|
|
|
|
end if
|
|
|
|
|
if (cscale_) then
|
|
|
|
|
!$omp workshare
|
|
|
|
|
u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
|
|
|
|
|
!$omp end workshare
|
|
|
|
|
end if
|
|
|
|
|
|
|
|
|
|
if ((diag_ >= 0).and.(imin_ == jmin_)) then
|
|
|
|
|
call u%set_triangle(.true.)
|
|
|
|
|
call u%set_upper(.true.)
|
|
|
|
|
end if
|
|
|
|
|
end block
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
|
|
nz = a%get_nzeros()
|
|
|
|
|
call u%allocate(mb,nb,nz)
|
|
|
|
|
|
|
|
|
@ -2499,7 +2811,7 @@ subroutine psb_c_csr_triu(a,u,info,&
|
|
|
|
|
call u%set_triangle(.true.)
|
|
|
|
|
call u%set_upper(.true.)
|
|
|
|
|
end if
|
|
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
if (info /= psb_success_) goto 9999
|
|
|
|
|
|
|
|
|
|
call psb_erractionrestore(err_act)
|
|
|
|
@ -2844,6 +3156,9 @@ subroutine psb_c_cp_csr_from_coo(a,b,info)
|
|
|
|
|
use psb_realloc_mod
|
|
|
|
|
use psb_c_base_mat_mod
|
|
|
|
|
use psb_c_csr_mat_mod, psb_protect_name => psb_c_cp_csr_from_coo
|
|
|
|
|
#if defined(OPENMP)
|
|
|
|
|
use omp_lib
|
|
|
|
|
#endif
|
|
|
|
|
implicit none
|
|
|
|
|
|
|
|
|
|
class(psb_c_csr_sparse_mat), intent(inout) :: a
|
|
|
|
@ -2860,12 +3175,6 @@ subroutine psb_c_cp_csr_from_coo(a,b,info)
|
|
|
|
|
character(len=20) :: name='c_cp_csr_from_coo'
|
|
|
|
|
logical :: use_openmp = .false.
|
|
|
|
|
|
|
|
|
|
!$ integer(psb_ipk_), allocatable :: sum(:)
|
|
|
|
|
!$ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s,j
|
|
|
|
|
!$ integer(psb_ipk_) :: nxt_val,old_val,saved_elem,maxthreads
|
|
|
|
|
!$ use_openmp = .true.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
info = psb_success_
|
|
|
|
|
debug_unit = psb_get_debug_unit()
|
|
|
|
|
debug_level = psb_get_debug_level()
|
|
|
|
@ -2907,96 +3216,37 @@ subroutine psb_c_cp_csr_from_coo(a,b,info)
|
|
|
|
|
|
|
|
|
|
endif
|
|
|
|
|
|
|
|
|
|
a%irp(:) = 0
|
|
|
|
|
|
|
|
|
|
!!$ if (use_openmp) then
|
|
|
|
|
!!$ !$ maxthreads = omp_get_max_threads()
|
|
|
|
|
!!$ !$ allocate(sum(maxthreads+1))
|
|
|
|
|
!!$ !$ sum(:) = 0
|
|
|
|
|
!!$ !$ sum(1) = 1
|
|
|
|
|
!!$
|
|
|
|
|
!!$ !$OMP PARALLEL default(none) &
|
|
|
|
|
!!$ !$OMP shared(nza,itemp,a,nthreads,sum,nr) &
|
|
|
|
|
!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
|
|
|
|
|
!!$
|
|
|
|
|
!!$ !$OMP DO schedule(STATIC) &
|
|
|
|
|
!!$ !$OMP private(k,i)
|
|
|
|
|
!!$ do k=1,nza
|
|
|
|
|
!!$ i = itemp(k)
|
|
|
|
|
!!$ a%irp(i) = a%irp(i) + 1
|
|
|
|
|
!!$ end do
|
|
|
|
|
!!$ !$OMP END DO
|
|
|
|
|
!!$
|
|
|
|
|
!!$ !$OMP SINGLE
|
|
|
|
|
!!$ !$ nthreads = omp_get_num_threads()
|
|
|
|
|
!!$ !$OMP END SINGLE
|
|
|
|
|
!!$
|
|
|
|
|
!!$ !$ ithread = omp_get_thread_num()
|
|
|
|
|
!!$
|
|
|
|
|
!!$ !$ work = nr/nthreads
|
|
|
|
|
!!$ !$ if (ithread < MOD(nr,nthreads)) then
|
|
|
|
|
!!$ !$ work = work + 1
|
|
|
|
|
!!$ !$ first_idx = ithread*work + 1
|
|
|
|
|
!!$ !$ else
|
|
|
|
|
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
|
|
|
|
|
!!$ !$ end if
|
|
|
|
|
!!$
|
|
|
|
|
!!$ !$ last_idx = first_idx + work - 1
|
|
|
|
|
!!$
|
|
|
|
|
!!$ !$ s = 0
|
|
|
|
|
!!$ !$ do i=first_idx,last_idx
|
|
|
|
|
!!$ !$ s = s + a%irp(i)
|
|
|
|
|
!!$ !$ end do
|
|
|
|
|
!!$ !$ if (work > 0) then
|
|
|
|
|
!!$ !$ sum(ithread+2) = s
|
|
|
|
|
!!$ !$ end if
|
|
|
|
|
!!$
|
|
|
|
|
!!$ !$OMP BARRIER
|
|
|
|
|
!!$
|
|
|
|
|
!!$ !$OMP SINGLE
|
|
|
|
|
!!$ !$ do i=2,nthreads+1
|
|
|
|
|
!!$ !$ sum(i) = sum(i) + sum(i-1)
|
|
|
|
|
!!$ !$ end do
|
|
|
|
|
!!$ !$OMP END SINGLE
|
|
|
|
|
!!$
|
|
|
|
|
!!$ !$ if (work > 0) then
|
|
|
|
|
!!$ !$ saved_elem = a%irp(first_idx)
|
|
|
|
|
!!$ !$ end if
|
|
|
|
|
!!$ !$ if (ithread == 0) then
|
|
|
|
|
!!$ !$ a%irp(1) = 1
|
|
|
|
|
!!$ !$ end if
|
|
|
|
|
!!$
|
|
|
|
|
!!$ !$OMP BARRIER
|
|
|
|
|
!!$
|
|
|
|
|
!!$ !$ if (work > 0) then
|
|
|
|
|
!!$ !$ old_val = a%irp(first_idx+1)
|
|
|
|
|
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
|
|
|
|
|
!!$ !$ end if
|
|
|
|
|
!!$
|
|
|
|
|
!!$ !$ do i=first_idx+2,last_idx+1
|
|
|
|
|
!!$ !$ nxt_val = a%irp(i)
|
|
|
|
|
!!$ !$ a%irp(i) = a%irp(i-1) + old_val
|
|
|
|
|
!!$ !$ old_val = nxt_val
|
|
|
|
|
!!$ !$ end do
|
|
|
|
|
!!$
|
|
|
|
|
!!$ !$OMP END PARALLEL
|
|
|
|
|
!!$ else
|
|
|
|
|
|
|
|
|
|
do k=1,nza
|
|
|
|
|
i = itemp(k)
|
|
|
|
|
a%irp(i) = a%irp(i) + 1
|
|
|
|
|
end do
|
|
|
|
|
ip = 1
|
|
|
|
|
do i=1,nr
|
|
|
|
|
ncl = a%irp(i)
|
|
|
|
|
a%irp(i) = ip
|
|
|
|
|
ip = ip + ncl
|
|
|
|
|
end do
|
|
|
|
|
a%irp(nr+1) = ip
|
|
|
|
|
!!$ end if
|
|
|
|
|
call a%set_host()
|
|
|
|
|
#if defined(OPENMP)
|
|
|
|
|
|
|
|
|
|
!$OMP PARALLEL default(shared) reduction(max:info)
|
|
|
|
|
|
|
|
|
|
!$OMP WORKSHARE
|
|
|
|
|
a%irp(:) = 0
|
|
|
|
|
!$OMP END WORKSHARE
|
|
|
|
|
|
|
|
|
|
!$OMP DO schedule(STATIC) &
|
|
|
|
|
!$OMP private(k,i)
|
|
|
|
|
do k=1,nza
|
|
|
|
|
i = itemp(k)
|
|
|
|
|
!$OMP ATOMIC UPDATE
|
|
|
|
|
a%irp(i) = a%irp(i) + 1
|
|
|
|
|
!$OMP END ATOMIC
|
|
|
|
|
end do
|
|
|
|
|
!$OMP END DO
|
|
|
|
|
call psi_exscan(nr+1,a%irp,info,shift=ione)
|
|
|
|
|
!$OMP END PARALLEL
|
|
|
|
|
#else
|
|
|
|
|
a%irp(:) = 0
|
|
|
|
|
do k=1,nza
|
|
|
|
|
i = itemp(k)
|
|
|
|
|
a%irp(i) = a%irp(i) + 1
|
|
|
|
|
end do
|
|
|
|
|
call psi_exscan(nr+1,a%irp,info,shift=ione)
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
call a%set_host()
|
|
|
|
|
|
|
|
|
|
end subroutine psb_c_cp_csr_from_coo
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -3096,6 +3346,9 @@ subroutine psb_c_mv_csr_from_coo(a,b,info)
|
|
|
|
|
use psb_error_mod
|
|
|
|
|
use psb_c_base_mat_mod
|
|
|
|
|
use psb_c_csr_mat_mod, psb_protect_name => psb_c_mv_csr_from_coo
|
|
|
|
|
#if defined(OPENMP)
|
|
|
|
|
use omp_lib
|
|
|
|
|
#endif
|
|
|
|
|
implicit none
|
|
|
|
|
|
|
|
|
|
class(psb_c_csr_sparse_mat), intent(inout) :: a
|
|
|
|
@ -3109,13 +3362,6 @@ subroutine psb_c_mv_csr_from_coo(a,b,info)
|
|
|
|
|
integer(psb_ipk_), Parameter :: maxtry=8
|
|
|
|
|
integer(psb_ipk_) :: debug_level, debug_unit
|
|
|
|
|
character(len=20) :: name='mv_from_coo'
|
|
|
|
|
logical :: use_openmp = .false.
|
|
|
|
|
|
|
|
|
|
! $ integer(psb_ipk_), allocatable :: sum(:)
|
|
|
|
|
! $ integer(psb_ipk_) :: first_idx,last_idx,work,ithread,nthreads,s
|
|
|
|
|
! $ integer(psb_ipk_) :: nxt_val,old_val,saved_elem
|
|
|
|
|
! $ use_openmp = .true.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
info = psb_success_
|
|
|
|
|
debug_unit = psb_get_debug_unit()
|
|
|
|
@ -3139,90 +3385,34 @@ subroutine psb_c_mv_csr_from_coo(a,b,info)
|
|
|
|
|
call psb_realloc(max(nr+1,nc+1),a%irp,info)
|
|
|
|
|
call b%free()
|
|
|
|
|
|
|
|
|
|
#if defined(OPENMP)
|
|
|
|
|
|
|
|
|
|
!$OMP PARALLEL default(shared) reduction(max:info)
|
|
|
|
|
|
|
|
|
|
!$OMP WORKSHARE
|
|
|
|
|
a%irp(:) = 0
|
|
|
|
|
!$OMP END WORKSHARE
|
|
|
|
|
|
|
|
|
|
!$OMP DO schedule(STATIC) &
|
|
|
|
|
!$OMP private(k,i)
|
|
|
|
|
do k=1,nza
|
|
|
|
|
i = itemp(k)
|
|
|
|
|
!$OMP ATOMIC UPDATE
|
|
|
|
|
a%irp(i) = a%irp(i) + 1
|
|
|
|
|
!$OMP END ATOMIC
|
|
|
|
|
end do
|
|
|
|
|
!$OMP END DO
|
|
|
|
|
call psi_exscan(nr+1,a%irp,info,shift=ione)
|
|
|
|
|
!$OMP END PARALLEL
|
|
|
|
|
#else
|
|
|
|
|
a%irp(:) = 0
|
|
|
|
|
do k=1,nza
|
|
|
|
|
i = itemp(k)
|
|
|
|
|
a%irp(i) = a%irp(i) + 1
|
|
|
|
|
end do
|
|
|
|
|
call psi_exscan(nr+1,a%irp,info,shift=ione)
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
!!$ if (use_openmp) then
|
|
|
|
|
!!$ !$OMP PARALLEL default(none) &
|
|
|
|
|
!!$ !$OMP shared(sum,nthreads,nr,a,itemp,nza) &
|
|
|
|
|
!!$ !$OMP private(ithread,work,first_idx,last_idx,s,saved_elem,nxt_val,old_val)
|
|
|
|
|
!!$
|
|
|
|
|
!!$ !$OMP DO schedule(STATIC) &
|
|
|
|
|
!!$ !$OMP private(k,i)
|
|
|
|
|
!!$ do k=1,nza
|
|
|
|
|
!!$ i = itemp(k)
|
|
|
|
|
!!$ a%irp(i) = a%irp(i) + 1
|
|
|
|
|
!!$ end do
|
|
|
|
|
!!$ !$OMP END DO
|
|
|
|
|
!!$
|
|
|
|
|
!!$ !$OMP SINGLE
|
|
|
|
|
!!$ !$ nthreads = omp_get_num_threads()
|
|
|
|
|
!!$ !$ allocate(sum(nthreads+1))
|
|
|
|
|
!!$ !$ sum(:) = 0
|
|
|
|
|
!!$ !$ sum(1) = 1
|
|
|
|
|
!!$ !$OMP END SINGLE
|
|
|
|
|
!!$
|
|
|
|
|
!!$ !$ ithread = omp_get_thread_num()
|
|
|
|
|
!!$
|
|
|
|
|
!!$ !$ work = nr/nthreads
|
|
|
|
|
!!$ !$ if (ithread < MOD(nr,nthreads)) then
|
|
|
|
|
!!$ !$ work = work + 1
|
|
|
|
|
!!$ !$ first_idx = ithread*work + 1
|
|
|
|
|
!!$ !$ else
|
|
|
|
|
!!$ !$ first_idx = ithread*work + MOD(nr,nthreads) + 1
|
|
|
|
|
!!$ !$ end if
|
|
|
|
|
!!$
|
|
|
|
|
!!$ !$ last_idx = first_idx + work - 1
|
|
|
|
|
!!$
|
|
|
|
|
!!$ !$ s = 0
|
|
|
|
|
!!$ !$ do i=first_idx,last_idx
|
|
|
|
|
!!$ !$ s = s + a%irp(i)
|
|
|
|
|
!!$ !$ end do
|
|
|
|
|
!!$ !$ if (work > 0) then
|
|
|
|
|
!!$ !$ sum(ithread+2) = s
|
|
|
|
|
!!$ !$ end if
|
|
|
|
|
!!$
|
|
|
|
|
!!$ !$OMP BARRIER
|
|
|
|
|
!!$
|
|
|
|
|
!!$ !$OMP SINGLE
|
|
|
|
|
!!$ !$ do i=2,nthreads+1
|
|
|
|
|
!!$ !$ sum(i) = sum(i) + sum(i-1)
|
|
|
|
|
!!$ !$ end do
|
|
|
|
|
!!$ !$OMP END SINGLE
|
|
|
|
|
!!$
|
|
|
|
|
!!$ !$ if (work > 0) then
|
|
|
|
|
!!$ !$ saved_elem = a%irp(first_idx)
|
|
|
|
|
!!$ !$ end if
|
|
|
|
|
!!$ !$ if (ithread == 0) then
|
|
|
|
|
!!$ !$ a%irp(1) = 1
|
|
|
|
|
!!$ !$ end if
|
|
|
|
|
!!$
|
|
|
|
|
!!$ !$ if (work > 0) then
|
|
|
|
|
!!$ !$ old_val = a%irp(first_idx+1)
|
|
|
|
|
!!$ !$ a%irp(first_idx+1) = saved_elem + sum(ithread+1)
|
|
|
|
|
!!$ !$ end if
|
|
|
|
|
!!$
|
|
|
|
|
!!$ !$ do i=first_idx+2,last_idx+1
|
|
|
|
|
!!$ !$ nxt_val = a%irp(i)
|
|
|
|
|
!!$ !$ a%irp(i) = a%irp(i-1) + old_val
|
|
|
|
|
!!$ !$ old_val = nxt_val
|
|
|
|
|
!!$ !$ end do
|
|
|
|
|
!!$
|
|
|
|
|
!!$ !$OMP END PARALLEL
|
|
|
|
|
!!$ else
|
|
|
|
|
do k=1,nza
|
|
|
|
|
i = itemp(k)
|
|
|
|
|
a%irp(i) = a%irp(i) + 1
|
|
|
|
|
end do
|
|
|
|
|
ip = 1
|
|
|
|
|
do i=1,nr
|
|
|
|
|
ncl = a%irp(i)
|
|
|
|
|
a%irp(i) = ip
|
|
|
|
|
ip = ip + ncl
|
|
|
|
|
end do
|
|
|
|
|
a%irp(nr+1) = ip
|
|
|
|
|
!!$ end if
|
|
|
|
|
|
|
|
|
|
call a%set_host()
|
|
|
|
|
|
|
|
|
|
end subroutine psb_c_mv_csr_from_coo
|
|
|
|
@ -3300,9 +3490,28 @@ subroutine psb_c_cp_csr_to_fmt(a,b,info)
|
|
|
|
|
b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat
|
|
|
|
|
nr = a%get_nrows()
|
|
|
|
|
nz = a%get_nzeros()
|
|
|
|
|
if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info)
|
|
|
|
|
if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info)
|
|
|
|
|
if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info)
|
|
|
|
|
if (.false.) then
|
|
|
|
|
if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info)
|
|
|
|
|
if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info)
|
|
|
|
|
if (info == 0) call psb_safe_cpy( a%val(1:nz), b%val , info)
|
|
|
|
|
else
|
|
|
|
|
! Despite the implementation in safe_cpy, it seems better this way
|
|
|
|
|
call psb_realloc(nr+1,b%irp,info)
|
|
|
|
|
call psb_realloc(nz,b%ja,info)
|
|
|
|
|
call psb_realloc(nz,b%val,info)
|
|
|
|
|
!$omp parallel do private(i) schedule(static)
|
|
|
|
|
do i=1,nr+1
|
|
|
|
|
b%irp(i)=a%irp(i)
|
|
|
|
|
end do
|
|
|
|
|
!$omp end parallel do
|
|
|
|
|
!$omp parallel do private(j) schedule(static)
|
|
|
|
|
do j=1,nz
|
|
|
|
|
b%ja(j) = a%ja(j)
|
|
|
|
|
b%val(j) = a%val(j)
|
|
|
|
|
end do
|
|
|
|
|
!$omp end parallel do
|
|
|
|
|
end if
|
|
|
|
|
|
|
|
|
|
call b%set_host()
|
|
|
|
|
|
|
|
|
|
class default
|
|
|
|
@ -3386,9 +3595,27 @@ subroutine psb_c_cp_csr_from_fmt(a,b,info)
|
|
|
|
|
a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat
|
|
|
|
|
nr = b%get_nrows()
|
|
|
|
|
nz = b%get_nzeros()
|
|
|
|
|
if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info)
|
|
|
|
|
if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info)
|
|
|
|
|
if (info == 0) call psb_safe_cpy( b%val(1:nz) , a%val , info)
|
|
|
|
|
if (.false.) then
|
|
|
|
|
if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info)
|
|
|
|
|
if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info)
|
|
|
|
|
if (info == 0) call psb_safe_cpy( b%val(1:nz) , a%val , info)
|
|
|
|
|
else
|
|
|
|
|
! Despite the implementation in safe_cpy, it seems better this way
|
|
|
|
|
call psb_realloc(nr+1,a%irp,info)
|
|
|
|
|
call psb_realloc(nz,a%ja,info)
|
|
|
|
|
call psb_realloc(nz,a%val,info)
|
|
|
|
|
!$omp parallel do private(i) schedule(static)
|
|
|
|
|
do i=1,nr+1
|
|
|
|
|
a%irp(i)=b%irp(i)
|
|
|
|
|
end do
|
|
|
|
|
!$omp end parallel do
|
|
|
|
|
!$omp parallel do private(j) schedule(static)
|
|
|
|
|
do j=1,nz
|
|
|
|
|
a%ja(j)=b%ja(j)
|
|
|
|
|
a%val(j)=b%val(j)
|
|
|
|
|
end do
|
|
|
|
|
!$omp end parallel do
|
|
|
|
|
end if
|
|
|
|
|
call a%set_host()
|
|
|
|
|
|
|
|
|
|
class default
|