psbn_d_base_mat_mod.f03
 psbn_d_coo_impl.f03
 psbn_d_csr_impl.f03
 psbn_d_csr_mat_mod.f03
performance fixes
psblas3-type-indexed
Salvatore Filippone 16 years ago
parent 4ecc1b632d
commit d0c4c5c77c

@ -1882,6 +1882,7 @@ contains
call a%set_nzeros(0) call a%set_nzeros(0)
call a%set_bld() call a%set_bld()
call a%set_triangle(.false.) call a%set_triangle(.false.)
call a%set_unit(.false.)
call a%set_dupl(psbn_dupl_def_) call a%set_dupl(psbn_dupl_def_)
end if end if
if (info /= 0) goto 9999 if (info /= 0) goto 9999

@ -1,6 +1,8 @@
subroutine d_coo_cssm_impl(alpha,a,x,beta,y,info,trans) subroutine d_coo_cssm_impl(alpha,a,x,beta,y,info,trans)
use psb_const_mod
use psb_error_mod use psb_error_mod
use psb_string_mod
use psbn_d_base_mat_mod, psb_protect_name => d_coo_cssm_impl use psbn_d_base_mat_mod, psb_protect_name => d_coo_cssm_impl
implicit none implicit none
class(psbn_d_coo_sparse_mat), intent(in) :: a class(psbn_d_coo_sparse_mat), intent(in) :: a
@ -39,7 +41,7 @@ subroutine d_coo_cssm_impl(alpha,a,x,beta,y,info,trans)
else else
trans_ = 'N' trans_ = 'N'
end if end if
tra = ((trans_=='T').or.(trans_=='t')) tra = (psb_toupper(trans_)=='T').or.(psb_toupper(trans_)=='C'))
m = a%get_nrows() m = a%get_nrows()
nc = min(size(x,2) , size(y,2)) nc = min(size(x,2) , size(y,2))
@ -270,6 +272,7 @@ end subroutine d_coo_cssm_impl
subroutine d_coo_cssv_impl(alpha,a,x,beta,y,info,trans) subroutine d_coo_cssv_impl(alpha,a,x,beta,y,info,trans)
use psb_const_mod use psb_const_mod
use psb_error_mod use psb_error_mod
use psb_string_mod
use psbn_d_base_mat_mod, psb_protect_name => d_coo_cssv_impl use psbn_d_base_mat_mod, psb_protect_name => d_coo_cssv_impl
implicit none implicit none
class(psbn_d_coo_sparse_mat), intent(in) :: a class(psbn_d_coo_sparse_mat), intent(in) :: a
@ -301,7 +304,7 @@ subroutine d_coo_cssv_impl(alpha,a,x,beta,y,info,trans)
goto 9999 goto 9999
endif endif
tra = ((trans_=='T').or.(trans_=='t')) tra = (psb_toupper(trans_)=='T').or.(psb_toupper(trans_)=='C'))
m = a%get_nrows() m = a%get_nrows()
if (.not. (a%is_triangle())) then if (.not. (a%is_triangle())) then
@ -589,18 +592,7 @@ subroutine d_coo_csmv_impl(alpha,a,x,beta,y,info,trans)
endif endif
return return
else else
if (.not.a%is_unit()) then if (a%is_triangle().and.a%is_unit()) then
if (beta == dzero) then
do i = 1, m
y(i) = dzero
enddo
else
do i = 1, m
y(i) = beta*y(i)
end do
endif
else if (a%is_unit()) then
if (beta == dzero) then if (beta == dzero) then
do i = 1, min(m,n) do i = 1, min(m,n)
y(i) = alpha*x(i) y(i) = alpha*x(i)
@ -616,6 +608,16 @@ subroutine d_coo_csmv_impl(alpha,a,x,beta,y,info,trans)
y(i) = beta*y(i) y(i) = beta*y(i)
enddo enddo
endif endif
else
if (beta == dzero) then
do i = 1, m
y(i) = dzero
enddo
else
do i = 1, m
y(i) = beta*y(i)
end do
endif
endif endif
@ -643,6 +645,7 @@ subroutine d_coo_csmv_impl(alpha,a,x,beta,y,info,trans)
end if end if
else if (tra) then else if (tra) then
if (alpha.eq.done) then if (alpha.eq.done) then
i = 1 i = 1
do i=1,nnz do i=1,nnz
@ -755,18 +758,7 @@ subroutine d_coo_csmm_impl(alpha,a,x,beta,y,info,trans)
endif endif
return return
else else
if (.not.a%is_unit()) then if (a%is_triangle().and.a%is_unit()) then
if (beta == dzero) then
do i = 1, m
y(i,1:nc) = dzero
enddo
else
do i = 1, m
y(i,1:nc) = beta*y(i,1:nc)
end do
endif
else if (a%is_unit()) then
if (beta == dzero) then if (beta == dzero) then
do i = 1, min(m,n) do i = 1, min(m,n)
y(i,1:nc) = alpha*x(i,1:nc) y(i,1:nc) = alpha*x(i,1:nc)
@ -782,6 +774,16 @@ subroutine d_coo_csmm_impl(alpha,a,x,beta,y,info,trans)
y(i,1:nc) = beta*y(i,1:nc) y(i,1:nc) = beta*y(i,1:nc)
enddo enddo
endif endif
else
if (beta == dzero) then
do i = 1, m
y(i,1:nc) = dzero
enddo
else
do i = 1, m
y(i,1:nc) = beta*y(i,1:nc)
end do
endif
endif endif
@ -1581,6 +1583,7 @@ subroutine d_cp_coo_to_coo_impl(a,b,info)
call b%set_state(a%get_state()) call b%set_state(a%get_state())
call b%set_triangle(a%is_triangle()) call b%set_triangle(a%is_triangle())
call b%set_upper(a%is_upper()) call b%set_upper(a%is_upper())
call b%set_unit(a%is_unit())
call b%reallocate(a%get_nzeros()) call b%reallocate(a%get_nzeros())
@ -1631,6 +1634,7 @@ subroutine d_cp_coo_from_coo_impl(a,b,info)
call a%set_state(b%get_state()) call a%set_state(b%get_state())
call a%set_triangle(b%is_triangle()) call a%set_triangle(b%is_triangle())
call a%set_upper(b%is_upper()) call a%set_upper(b%is_upper())
call a%set_unit(b%is_unit())
call a%reallocate(b%get_nzeros()) call a%reallocate(b%get_nzeros())
@ -2054,6 +2058,7 @@ subroutine d_mv_coo_to_coo_impl(a,b,info)
call b%set_state(a%get_state()) call b%set_state(a%get_state())
call b%set_triangle(a%is_triangle()) call b%set_triangle(a%is_triangle())
call b%set_upper(a%is_upper()) call b%set_upper(a%is_upper())
call b%set_unit(a%is_unit())
call b%reallocate(a%get_nzeros()) call b%reallocate(a%get_nzeros())
@ -2105,6 +2110,7 @@ subroutine d_mv_coo_from_coo_impl(a,b,info)
call a%set_state(b%get_state()) call a%set_state(b%get_state())
call a%set_triangle(b%is_triangle()) call a%set_triangle(b%is_triangle())
call a%set_upper(b%is_upper()) call a%set_upper(b%is_upper())
call a%set_unit(b%is_unit())
call a%reallocate(b%get_nzeros()) call a%reallocate(b%get_nzeros())

@ -14,6 +14,7 @@
subroutine d_csr_csmv_impl(alpha,a,x,beta,y,info,trans) subroutine d_csr_csmv_impl(alpha,a,x,beta,y,info,trans)
use psb_error_mod use psb_error_mod
use psb_string_mod
use psbn_d_csr_mat_mod, psb_protect_name => d_csr_csmv_impl use psbn_d_csr_mat_mod, psb_protect_name => d_csr_csmv_impl
implicit none implicit none
class(psbn_d_csr_sparse_mat), intent(in) :: a class(psbn_d_csr_sparse_mat), intent(in) :: a
@ -46,7 +47,7 @@ subroutine d_csr_csmv_impl(alpha,a,x,beta,y,info,trans)
endif endif
tra = ((trans_=='T').or.(trans_=='t')) tra = (psb_toupper(trans_)=='T').or.(psb_toupper(trans_)=='C')
if (tra) then if (tra) then
m = a%get_ncols() m = a%get_ncols()
@ -56,6 +57,33 @@ subroutine d_csr_csmv_impl(alpha,a,x,beta,y,info,trans)
m = a%get_nrows() m = a%get_nrows()
end if end if
call d_csr_csmv_inner(m,n,alpha,a%irp,a%ja,a%val,&
& a%is_triangle(),a%is_unit(),&
& x,beta,y,tra)
call psb_erractionrestore(err_act)
return
9999 continue
call psb_erractionrestore(err_act)
if (err_act == psb_act_abort_) then
call psb_error()
return
end if
return
contains
subroutine d_csr_csmv_inner(m,n,alpha,irp,ja,val,is_triangle,is_unit,&
& x,beta,y,tra)
integer, intent(in) :: m,n,irp(*),ja(*)
real(psb_dpk_), intent(in) :: alpha, beta, x(*),val(*)
real(psb_dpk_), intent(inout) :: y(*)
logical, intent(in) :: is_triangle,is_unit,tra
integer :: i,j,k, ir, jc
real(psb_dpk_) :: acc
if (alpha == dzero) then if (alpha == dzero) then
if (beta == dzero) then if (beta == dzero) then
@ -70,6 +98,7 @@ subroutine d_csr_csmv_impl(alpha,a,x,beta,y,info,trans)
return return
end if end if
if (.not.tra) then if (.not.tra) then
if (beta == dzero) then if (beta == dzero) then
@ -77,8 +106,8 @@ subroutine d_csr_csmv_impl(alpha,a,x,beta,y,info,trans)
if (alpha == done) then if (alpha == done) then
do i=1,m do i=1,m
acc = dzero acc = dzero
do j=a%irp(i), a%irp(i+1)-1 do j=irp(i), irp(i+1)-1
acc = acc + a%val(j) * x(a%ja(j)) acc = acc + val(j) * x(ja(j))
enddo enddo
y(i) = acc y(i) = acc
end do end do
@ -87,8 +116,8 @@ subroutine d_csr_csmv_impl(alpha,a,x,beta,y,info,trans)
do i=1,m do i=1,m
acc = dzero acc = dzero
do j=a%irp(i), a%irp(i+1)-1 do j=irp(i), irp(i+1)-1
acc = acc + a%val(j) * x(a%ja(j)) acc = acc + val(j) * x(ja(j))
enddo enddo
y(i) = -acc y(i) = -acc
end do end do
@ -97,8 +126,8 @@ subroutine d_csr_csmv_impl(alpha,a,x,beta,y,info,trans)
do i=1,m do i=1,m
acc = dzero acc = dzero
do j=a%irp(i), a%irp(i+1)-1 do j=irp(i), irp(i+1)-1
acc = acc + a%val(j) * x(a%ja(j)) acc = acc + val(j) * x(ja(j))
enddo enddo
y(i) = alpha*acc y(i) = alpha*acc
end do end do
@ -111,8 +140,8 @@ subroutine d_csr_csmv_impl(alpha,a,x,beta,y,info,trans)
if (alpha == done) then if (alpha == done) then
do i=1,m do i=1,m
acc = dzero acc = dzero
do j=a%irp(i), a%irp(i+1)-1 do j=irp(i), irp(i+1)-1
acc = acc + a%val(j) * x(a%ja(j)) acc = acc + val(j) * x(ja(j))
enddo enddo
y(i) = y(i) + acc y(i) = y(i) + acc
end do end do
@ -121,8 +150,8 @@ subroutine d_csr_csmv_impl(alpha,a,x,beta,y,info,trans)
do i=1,m do i=1,m
acc = dzero acc = dzero
do j=a%irp(i), a%irp(i+1)-1 do j=irp(i), irp(i+1)-1
acc = acc + a%val(j) * x(a%ja(j)) acc = acc + val(j) * x(ja(j))
enddo enddo
y(i) = y(i) -acc y(i) = y(i) -acc
end do end do
@ -131,8 +160,8 @@ subroutine d_csr_csmv_impl(alpha,a,x,beta,y,info,trans)
do i=1,m do i=1,m
acc = dzero acc = dzero
do j=a%irp(i), a%irp(i+1)-1 do j=irp(i), irp(i+1)-1
acc = acc + a%val(j) * x(a%ja(j)) acc = acc + val(j) * x(ja(j))
enddo enddo
y(i) = y(i) + alpha*acc y(i) = y(i) + alpha*acc
end do end do
@ -144,8 +173,8 @@ subroutine d_csr_csmv_impl(alpha,a,x,beta,y,info,trans)
if (alpha == done) then if (alpha == done) then
do i=1,m do i=1,m
acc = dzero acc = dzero
do j=a%irp(i), a%irp(i+1)-1 do j=irp(i), irp(i+1)-1
acc = acc + a%val(j) * x(a%ja(j)) acc = acc + val(j) * x(ja(j))
enddo enddo
y(i) = -y(i) + acc y(i) = -y(i) + acc
end do end do
@ -154,8 +183,8 @@ subroutine d_csr_csmv_impl(alpha,a,x,beta,y,info,trans)
do i=1,m do i=1,m
acc = dzero acc = dzero
do j=a%irp(i), a%irp(i+1)-1 do j=irp(i), irp(i+1)-1
acc = acc + a%val(j) * x(a%ja(j)) acc = acc + val(j) * x(ja(j))
enddo enddo
y(i) = -y(i) -acc y(i) = -y(i) -acc
end do end do
@ -164,8 +193,8 @@ subroutine d_csr_csmv_impl(alpha,a,x,beta,y,info,trans)
do i=1,m do i=1,m
acc = dzero acc = dzero
do j=a%irp(i), a%irp(i+1)-1 do j=irp(i), irp(i+1)-1
acc = acc + a%val(j) * x(a%ja(j)) acc = acc + val(j) * x(ja(j))
enddo enddo
y(i) = -y(i) + alpha*acc y(i) = -y(i) + alpha*acc
end do end do
@ -177,8 +206,8 @@ subroutine d_csr_csmv_impl(alpha,a,x,beta,y,info,trans)
if (alpha == done) then if (alpha == done) then
do i=1,m do i=1,m
acc = dzero acc = dzero
do j=a%irp(i), a%irp(i+1)-1 do j=irp(i), irp(i+1)-1
acc = acc + a%val(j) * x(a%ja(j)) acc = acc + val(j) * x(ja(j))
enddo enddo
y(i) = beta*y(i) + acc y(i) = beta*y(i) + acc
end do end do
@ -187,8 +216,8 @@ subroutine d_csr_csmv_impl(alpha,a,x,beta,y,info,trans)
do i=1,m do i=1,m
acc = dzero acc = dzero
do j=a%irp(i), a%irp(i+1)-1 do j=irp(i), irp(i+1)-1
acc = acc + a%val(j) * x(a%ja(j)) acc = acc + val(j) * x(ja(j))
enddo enddo
y(i) = beta*y(i) - acc y(i) = beta*y(i) - acc
end do end do
@ -197,8 +226,8 @@ subroutine d_csr_csmv_impl(alpha,a,x,beta,y,info,trans)
do i=1,m do i=1,m
acc = dzero acc = dzero
do j=a%irp(i), a%irp(i+1)-1 do j=irp(i), irp(i+1)-1
acc = acc + a%val(j) * x(a%ja(j)) acc = acc + val(j) * x(ja(j))
enddo enddo
y(i) = beta*y(i) + alpha*acc y(i) = beta*y(i) + alpha*acc
end do end do
@ -228,27 +257,27 @@ subroutine d_csr_csmv_impl(alpha,a,x,beta,y,info,trans)
if (alpha.eq.done) then if (alpha.eq.done) then
do i=1,n do i=1,n
do j=a%irp(i), a%irp(i+1)-1 do j=irp(i), irp(i+1)-1
ir = a%ja(j) ir = ja(j)
y(ir) = y(ir) + a%val(j)*x(i) y(ir) = y(ir) + val(j)*x(i)
end do end do
enddo enddo
else if (alpha.eq.-done) then else if (alpha.eq.-done) then
do i=1,n do i=1,n
do j=a%irp(i), a%irp(i+1)-1 do j=irp(i), irp(i+1)-1
ir = a%ja(j) ir = ja(j)
y(ir) = y(ir) - a%val(j)*x(i) y(ir) = y(ir) - val(j)*x(i)
end do end do
enddo enddo
else else
do i=1,n do i=1,n
do j=a%irp(i), a%irp(i+1)-1 do j=irp(i), irp(i+1)-1
ir = a%ja(j) ir = ja(j)
y(ir) = y(ir) + alpha*a%val(j)*x(i) y(ir) = y(ir) + alpha*val(j)*x(i)
end do end do
enddo enddo
@ -256,27 +285,21 @@ subroutine d_csr_csmv_impl(alpha,a,x,beta,y,info,trans)
endif endif
if (a%is_unit()) then if (is_triangle.and.is_unit) then
do i=1, min(m,n) do i=1, min(m,n)
y(i) = y(i) + alpha*x(i) y(i) = y(i) + alpha*x(i)
end do end do
end if end if
call psb_erractionrestore(err_act)
return
9999 continue
call psb_erractionrestore(err_act)
if (err_act == psb_act_abort_) then end subroutine d_csr_csmv_inner
call psb_error()
return
end if
return
end subroutine d_csr_csmv_impl end subroutine d_csr_csmv_impl
subroutine d_csr_csmm_impl(alpha,a,x,beta,y,info,trans) subroutine d_csr_csmm_impl(alpha,a,x,beta,y,info,trans)
use psb_error_mod use psb_error_mod
use psb_string_mod
use psbn_d_csr_mat_mod, psb_protect_name => d_csr_csmm_impl use psbn_d_csr_mat_mod, psb_protect_name => d_csr_csmm_impl
implicit none implicit none
class(psbn_d_csr_sparse_mat), intent(in) :: a class(psbn_d_csr_sparse_mat), intent(in) :: a
@ -302,12 +325,12 @@ subroutine d_csr_csmm_impl(alpha,a,x,beta,y,info,trans)
trans_ = 'N' trans_ = 'N'
end if end if
tra = ((trans_=='T').or.(trans_=='t'))
if (.not.a%is_asb()) then if (.not.a%is_asb()) then
info = 1121 info = 1121
call psb_errpush(info,name) call psb_errpush(info,name)
goto 9999 goto 9999
endif endif
tra = (psb_toupper(trans_)=='T').or.(psb_toupper(trans_)=='C')
if (tra) then if (tra) then
m = a%get_ncols() m = a%get_ncols()
@ -326,14 +349,42 @@ subroutine d_csr_csmm_impl(alpha,a,x,beta,y,info,trans)
goto 9999 goto 9999
end if end if
call d_csr_csmm_inner(m,n,nc,alpha,a%irp,a%ja,a%val, &
& a%is_triangle(),a%is_unit(),x,size(x,1), &
& beta,y,size(y,1),tra,acc)
call psb_erractionrestore(err_act)
return
9999 continue
call psb_erractionrestore(err_act)
if (err_act == psb_act_abort_) then
call psb_error()
return
end if
return
contains
subroutine d_csr_csmm_inner(m,n,nc,alpha,irp,ja,val,&
& is_triangle,is_unit,x,ldx,beta,y,ldy,tra,acc)
integer, intent(in) :: m,n,ldx,ldy,nc,irp(*),ja(*)
real(psb_dpk_), intent(in) :: alpha, beta, x(ldx,*),val(*)
real(psb_dpk_), intent(inout) :: y(ldy,*)
logical, intent(in) :: is_triangle,is_unit,tra
real(psb_dpk_), intent(inout) :: acc(*)
integer :: i,j,k, ir, jc
if (alpha == dzero) then if (alpha == dzero) then
if (beta == dzero) then if (beta == dzero) then
do i = 1, m do i = 1, m
y(i,:) = dzero y(i,1:nc) = dzero
enddo enddo
else else
do i = 1, m do i = 1, m
y(i,:) = beta*y(i,:) y(i,1:nc) = beta*y(i,1:nc)
end do end do
endif endif
return return
@ -345,31 +396,31 @@ subroutine d_csr_csmm_impl(alpha,a,x,beta,y,info,trans)
if (alpha == done) then if (alpha == done) then
do i=1,m do i=1,m
acc = dzero acc(1:nc) = dzero
do j=a%irp(i), a%irp(i+1)-1 do j=irp(i), irp(i+1)-1
acc = acc + a%val(j) * x(a%ja(j),:) acc(1:nc) = acc(1:nc) + val(j) * x(ja(j),1:nc)
enddo enddo
y(i,:) = acc y(i,1:nc) = acc(1:nc)
end do end do
else if (alpha == -done) then else if (alpha == -done) then
do i=1,m do i=1,m
acc = dzero acc(1:nc) = dzero
do j=a%irp(i), a%irp(i+1)-1 do j=irp(i), irp(i+1)-1
acc = acc + a%val(j) * x(a%ja(j),:) acc(1:nc) = acc(1:nc) + val(j) * x(ja(j),1:nc)
enddo enddo
y(i,:) = -acc y(i,1:nc) = -acc(1:nc)
end do end do
else else
do i=1,m do i=1,m
acc = dzero acc(1:nc) = dzero
do j=a%irp(i), a%irp(i+1)-1 do j=irp(i), irp(i+1)-1
acc = acc + a%val(j) * x(a%ja(j),:) acc(1:nc) = acc(1:nc) + val(j) * x(ja(j),1:nc)
enddo enddo
y(i,:) = alpha*acc y(i,1:nc) = alpha*acc(1:nc)
end do end do
end if end if
@ -379,31 +430,31 @@ subroutine d_csr_csmm_impl(alpha,a,x,beta,y,info,trans)
if (alpha == done) then if (alpha == done) then
do i=1,m do i=1,m
acc = dzero acc(1:nc) = dzero
do j=a%irp(i), a%irp(i+1)-1 do j=irp(i), irp(i+1)-1
acc = acc + a%val(j) * x(a%ja(j),:) acc(1:nc) = acc(1:nc) + val(j) * x(ja(j),1:nc)
enddo enddo
y(i,:) = y(i,:) + acc y(i,1:nc) = y(i,1:nc) + acc(1:nc)
end do end do
else if (alpha == -done) then else if (alpha == -done) then
do i=1,m do i=1,m
acc = dzero acc(1:nc) = dzero
do j=a%irp(i), a%irp(i+1)-1 do j=irp(i), irp(i+1)-1
acc = acc + a%val(j) * x(a%ja(j),:) acc(1:nc) = acc(1:nc) + val(j) * x(ja(j),1:nc)
enddo enddo
y(i,:) = y(i,:) -acc y(i,1:nc) = y(i,1:nc) -acc(1:nc)
end do end do
else else
do i=1,m do i=1,m
acc = dzero acc(1:nc) = dzero
do j=a%irp(i), a%irp(i+1)-1 do j=irp(i), irp(i+1)-1
acc = acc + a%val(j) * x(a%ja(j),:) acc(1:nc) = acc(1:nc) + val(j) * x(ja(j),1:nc)
enddo enddo
y(i,:) = y(i,:) + alpha*acc y(i,1:nc) = y(i,1:nc) + alpha*acc(1:nc)
end do end do
end if end if
@ -412,31 +463,31 @@ subroutine d_csr_csmm_impl(alpha,a,x,beta,y,info,trans)
if (alpha == done) then if (alpha == done) then
do i=1,m do i=1,m
acc = dzero acc(1:nc) = dzero
do j=a%irp(i), a%irp(i+1)-1 do j=irp(i), irp(i+1)-1
acc = acc + a%val(j) * x(a%ja(j),:) acc(1:nc) = acc(1:nc) + val(j) * x(ja(j),1:nc)
enddo enddo
y(i,:) = -y(i,:) + acc y(i,1:nc) = -y(i,1:nc) + acc(1:nc)
end do end do
else if (alpha == -done) then else if (alpha == -done) then
do i=1,m do i=1,m
acc = dzero acc(1:nc) = dzero
do j=a%irp(i), a%irp(i+1)-1 do j=irp(i), irp(i+1)-1
acc = acc + a%val(j) * x(a%ja(j),:) acc(1:nc) = acc(1:nc) + val(j) * x(ja(j),1:nc)
enddo enddo
y(i,:) = -y(i,:) -acc y(i,1:nc) = -y(i,1:nc) -acc(1:nc)
end do end do
else else
do i=1,m do i=1,m
acc = dzero acc(1:nc) = dzero
do j=a%irp(i), a%irp(i+1)-1 do j=irp(i), irp(i+1)-1
acc = acc + a%val(j) * x(a%ja(j),:) acc(1:nc) = acc(1:nc) + val(j) * x(ja(j),1:nc)
enddo enddo
y(i,:) = -y(i,:) + alpha*acc y(i,1:nc) = -y(i,1:nc) + alpha*acc(1:nc)
end do end do
end if end if
@ -445,31 +496,31 @@ subroutine d_csr_csmm_impl(alpha,a,x,beta,y,info,trans)
if (alpha == done) then if (alpha == done) then
do i=1,m do i=1,m
acc = dzero acc(1:nc) = dzero
do j=a%irp(i), a%irp(i+1)-1 do j=irp(i), irp(i+1)-1
acc = acc + a%val(j) * x(a%ja(j),:) acc(1:nc) = acc(1:nc) + val(j) * x(ja(j),1:nc)
enddo enddo
y(i,:) = beta*y(i,:) + acc y(i,1:nc) = beta*y(i,1:nc) + acc(1:nc)
end do end do
else if (alpha == -done) then else if (alpha == -done) then
do i=1,m do i=1,m
acc = dzero acc(1:nc) = dzero
do j=a%irp(i), a%irp(i+1)-1 do j=irp(i), irp(i+1)-1
acc = acc + a%val(j) * x(a%ja(j),:) acc(1:nc) = acc(1:nc) + val(j) * x(ja(j),1:nc)
enddo enddo
y(i,:) = beta*y(i,:) - acc y(i,1:nc) = beta*y(i,1:nc) - acc(1:nc)
end do end do
else else
do i=1,m do i=1,m
acc = dzero acc(1:nc) = dzero
do j=a%irp(i), a%irp(i+1)-1 do j=irp(i), irp(i+1)-1
acc = acc + a%val(j) * x(a%ja(j),:) acc(1:nc) = acc(1:nc) + val(j) * x(ja(j),1:nc)
enddo enddo
y(i,:) = beta*y(i,:) + alpha*acc y(i,1:nc) = beta*y(i,1:nc) + alpha*acc(1:nc)
end do end do
end if end if
@ -480,44 +531,44 @@ subroutine d_csr_csmm_impl(alpha,a,x,beta,y,info,trans)
if (beta == dzero) then if (beta == dzero) then
do i=1, m do i=1, m
y(i,:) = dzero y(i,1:nc) = dzero
end do end do
else if (beta == done) then else if (beta == done) then
! Do nothing ! Do nothing
else if (beta == -done) then else if (beta == -done) then
do i=1, m do i=1, m
y(i,:) = -y(i,:) y(i,1:nc) = -y(i,1:nc)
end do end do
else else
do i=1, m do i=1, m
y(i,:) = beta*y(i,:) y(i,1:nc) = beta*y(i,1:nc)
end do end do
end if end if
if (alpha.eq.done) then if (alpha.eq.done) then
do i=1,n do i=1,n
do j=a%irp(i), a%irp(i+1)-1 do j=irp(i), irp(i+1)-1
ir = a%ja(j) ir = ja(j)
y(ir,:) = y(ir,:) + a%val(j)*x(i,:) y(ir,1:nc) = y(ir,1:nc) + val(j)*x(i,1:nc)
end do end do
enddo enddo
else if (alpha.eq.-done) then else if (alpha.eq.-done) then
do i=1,n do i=1,n
do j=a%irp(i), a%irp(i+1)-1 do j=irp(i), irp(i+1)-1
ir = a%ja(j) ir = ja(j)
y(ir,:) = y(ir,:) - a%val(j)*x(i,:) y(ir,1:nc) = y(ir,1:nc) - val(j)*x(i,1:nc)
end do end do
enddo enddo
else else
do i=1,n do i=1,n
do j=a%irp(i), a%irp(i+1)-1 do j=irp(i), irp(i+1)-1
ir = a%ja(j) ir = ja(j)
y(ir,:) = y(ir,:) + alpha*a%val(j)*x(i,:) y(ir,1:nc) = y(ir,1:nc) + alpha*val(j)*x(i,1:nc)
end do end do
enddo enddo
@ -525,22 +576,13 @@ subroutine d_csr_csmm_impl(alpha,a,x,beta,y,info,trans)
endif endif
if (a%is_unit()) then if (is_triangle.and.is_unit) then
do i=1, min(m,n) do i=1, min(m,n)
y(i,:) = y(i,:) + alpha*x(i,:) y(i,1:nc) = y(i,1:nc) + alpha*x(i,1:nc)
end do end do
end if end if
call psb_erractionrestore(err_act) end subroutine d_csr_csmm_inner
return
9999 continue
call psb_erractionrestore(err_act)
if (err_act == psb_act_abort_) then
call psb_error()
return
end if
return
end subroutine d_csr_csmm_impl end subroutine d_csr_csmm_impl
@ -601,7 +643,7 @@ subroutine d_csr_cssv_impl(alpha,a,x,beta,y,info,trans)
end if end if
if (beta == dzero) then if (beta == dzero) then
!!$ call inner_csrsv(tra,a,x,y)
call inner_csrsv(tra,a%is_lower(),a%is_unit(),a%get_nrows(),& call inner_csrsv(tra,a%is_lower(),a%is_unit(),a%get_nrows(),&
& a%irp,a%ja,a%val,x,y) & a%irp,a%ja,a%val,x,y)
if (alpha == done) then if (alpha == done) then
@ -623,7 +665,6 @@ subroutine d_csr_cssv_impl(alpha,a,x,beta,y,info,trans)
tmp(1:m) = x(1:m) tmp(1:m) = x(1:m)
call inner_csrsv(tra,a%is_lower(),a%is_unit(),a%get_nrows(),& call inner_csrsv(tra,a%is_lower(),a%is_unit(),a%get_nrows(),&
& a%irp,a%ja,a%val,tmp,y) & a%irp,a%ja,a%val,tmp,y)
!!$ call inner_csrsv(tra,a,tmp,y)
do i = 1, m do i = 1, m
y(i) = alpha*tmp(i) + beta*y(i) y(i) = alpha*tmp(i) + beta*y(i)
end do end do
@ -646,9 +687,7 @@ contains
subroutine inner_csrsv(tra,lower,unit,n,irp,ja,val,x,y) subroutine inner_csrsv(tra,lower,unit,n,irp,ja,val,x,y)
implicit none implicit none
logical, intent(in) :: tra,lower,unit logical, intent(in) :: tra,lower,unit
!!$ class(psbn_d_csr_sparse_mat), intent(in) :: a
integer, intent(in) :: irp(*), ja(*),n integer, intent(in) :: irp(*), ja(*),n
real(psb_dpk_), intent(in) :: val(*) real(psb_dpk_), intent(in) :: val(*)
real(psb_dpk_), intent(in) :: x(*) real(psb_dpk_), intent(in) :: x(*)
real(psb_dpk_), intent(out) :: y(*) real(psb_dpk_), intent(out) :: y(*)
@ -811,7 +850,6 @@ subroutine d_csr_cssm_impl(alpha,a,x,beta,y,info,trans)
end if end if
if (beta == dzero) then if (beta == dzero) then
!!$ call inner_csrsm(tra,a,x,y,info)
call inner_csrsm(tra,a%is_lower(),a%is_unit(),a%get_nrows(),nc,& call inner_csrsm(tra,a%is_lower(),a%is_unit(),a%get_nrows(),nc,&
& a%irp,a%ja,a%val,x,size(x,1),y,size(y,1),info) & a%irp,a%ja,a%val,x,size(x,1),y,size(y,1),info)
do i = 1, m do i = 1, m
@ -828,7 +866,6 @@ subroutine d_csr_cssm_impl(alpha,a,x,beta,y,info,trans)
tmp(1:m,:) = x(1:m,1:nc) tmp(1:m,:) = x(1:m,1:nc)
call inner_csrsm(tra,a%is_lower(),a%is_unit(),a%get_nrows(),nc,& call inner_csrsm(tra,a%is_lower(),a%is_unit(),a%get_nrows(),nc,&
& a%irp,a%ja,a%val,tmp,size(tmp,1),y,size(y,1),info) & a%irp,a%ja,a%val,tmp,size(tmp,1),y,size(y,1),info)
!!$ call inner_csrsm(tra,a,tmp,y,info)
do i = 1, m do i = 1, m
y(i,1:nc) = alpha*tmp(i,1:nc) + beta*y(i,1:nc) y(i,1:nc) = alpha*tmp(i,1:nc) + beta*y(i,1:nc)
end do end do
@ -1496,7 +1533,7 @@ subroutine d_cp_csr_to_coo_impl(a,b,info)
nc = a%get_ncols() nc = a%get_ncols()
nza = a%get_nzeros() nza = a%get_nzeros()
call b%allocate(nr,nr,nza) call b%allocate(nr,nc,nza)
do i=1, nr do i=1, nr
do j=a%irp(i),a%irp(i+1)-1 do j=a%irp(i),a%irp(i+1)-1
@ -1513,6 +1550,7 @@ subroutine d_cp_csr_to_coo_impl(a,b,info)
call b%set_state(a%get_state()) call b%set_state(a%get_state())
call b%set_triangle(a%is_triangle()) call b%set_triangle(a%is_triangle())
call b%set_upper(a%is_upper()) call b%set_upper(a%is_upper())
call b%set_unit(a%is_unit())
call b%fix(info) call b%fix(info)
@ -1551,6 +1589,7 @@ subroutine d_mv_csr_to_coo_impl(a,b,info)
call b%set_state(a%get_state()) call b%set_state(a%get_state())
call b%set_triangle(a%is_triangle()) call b%set_triangle(a%is_triangle())
call b%set_upper(a%is_upper()) call b%set_upper(a%is_upper())
call b%set_unit(a%is_unit())
call move_alloc(a%ja,b%ja) call move_alloc(a%ja,b%ja)
call move_alloc(a%val,b%val) call move_alloc(a%val,b%val)
@ -1603,6 +1642,7 @@ subroutine d_mv_csr_from_coo_impl(a,b,info)
call a%set_state(b%get_state()) call a%set_state(b%get_state())
call a%set_triangle(b%is_triangle()) call a%set_triangle(b%is_triangle())
call a%set_upper(b%is_upper()) call a%set_upper(b%is_upper())
call a%set_unit(b%is_unit())
! Dirty trick: call move_alloc to have the new data allocated just once. ! Dirty trick: call move_alloc to have the new data allocated just once.
call move_alloc(b%ia,itemp) call move_alloc(b%ia,itemp)
call move_alloc(b%ja,a%ja) call move_alloc(b%ja,a%ja)

@ -941,6 +941,7 @@ contains
call a%set_ncols(n) call a%set_ncols(n)
call a%set_bld() call a%set_bld()
call a%set_triangle(.false.) call a%set_triangle(.false.)
call a%set_unit(.false.)
end if end if
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)

Loading…
Cancel
Save