Changed handling of scaling for transpose product in presence of

overlap: save/restore of only the overlap entries is faster than a
full copy.
psblas3-type-indexed
Salvatore Filippone 17 years ago
parent 47fe430a5f
commit 125078164c

@ -332,6 +332,18 @@ module psi_mod
& psi_zovrl_updr1, psi_zovrl_updr2
end interface
interface psi_ovrl_save
module procedure psi_iovrl_saver1, psi_iovrl_saver2,&
& psi_dovrl_saver1, psi_dovrl_saver2,&
& psi_zovrl_saver1, psi_zovrl_saver2
end interface
interface psi_ovrl_restore
module procedure psi_iovrl_restrr1, psi_iovrl_restrr2,&
& psi_dovrl_restrr1, psi_dovrl_restrr2,&
& psi_zovrl_restrr1, psi_zovrl_restrr2
end interface
interface psi_gth
module procedure psi_igthm, psi_igthv,&
& psi_dgthm, psi_dgthv,&
@ -960,6 +972,656 @@ contains
end subroutine psi_iovrl_updr2
subroutine psi_dovrl_saver1(x,xs,desc_a,info)
use psb_descriptor_type
use psb_const_mod
use psb_error_mod
use psb_realloc_mod
use psb_penv_mod
implicit none
real(kind(1.d0)), intent(inout) :: x(:)
real(kind(1.d0)), allocatable :: xs(:)
type(psb_desc_type), intent(in) :: desc_a
integer, intent(out) :: info
! locals
integer :: ictxt, np, me, err_act, i, idx, isz
character(len=20) :: name, ch_err
name='psi_dovrl_saver1'
if (psb_get_errstatus() /= 0) return
info = 0
call psb_erractionsave(err_act)
ictxt = psb_cd_get_context(desc_a)
call psb_info(ictxt, me, np)
if (np == -1) then
info = 2010
call psb_errpush(info,name)
goto 9999
endif
isz = size(desc_a%ovrlap_elem,1)
call psb_realloc(isz,xs,info)
if (info /= 0) then
info = 4000
call psb_errpush(info,name)
goto 9999
endif
do i=1, isz
idx = desc_a%ovrlap_elem(i,1)
xs(i) = x(idx)
end do
call psb_erractionrestore(err_act)
return
9999 continue
call psb_erractionrestore(err_act)
if (err_act == psb_act_abort_) then
call psb_error(ictxt)
return
end if
return
end subroutine psi_dovrl_saver1
subroutine psi_dovrl_restrr1(x,xs,desc_a,info)
use psb_descriptor_type
use psb_const_mod
use psb_error_mod
use psb_penv_mod
implicit none
real(kind(1.d0)), intent(inout) :: x(:)
real(kind(1.d0)) :: xs(:)
type(psb_desc_type), intent(in) :: desc_a
integer, intent(out) :: info
! locals
integer :: ictxt, np, me, err_act, i, idx, isz
character(len=20) :: name, ch_err
name='psi_dovrl_restrr1'
if (psb_get_errstatus() /= 0) return
info = 0
call psb_erractionsave(err_act)
ictxt = psb_cd_get_context(desc_a)
call psb_info(ictxt, me, np)
if (np == -1) then
info = 2010
call psb_errpush(info,name)
goto 9999
endif
isz = size(desc_a%ovrlap_elem,1)
do i=1, isz
idx = desc_a%ovrlap_elem(i,1)
x(idx) = xs(i)
end do
call psb_erractionrestore(err_act)
return
9999 continue
call psb_erractionrestore(err_act)
if (err_act == psb_act_abort_) then
call psb_error(ictxt)
return
end if
return
end subroutine psi_dovrl_restrr1
subroutine psi_dovrl_saver2(x,xs,desc_a,info)
use psb_descriptor_type
use psb_const_mod
use psb_error_mod
use psb_realloc_mod
use psb_penv_mod
implicit none
real(kind(1.d0)), intent(inout) :: x(:,:)
real(kind(1.d0)), allocatable :: xs(:,:)
type(psb_desc_type), intent(in) :: desc_a
integer, intent(out) :: info
! locals
integer :: ictxt, np, me, err_act, i, idx, isz, nc
character(len=20) :: name, ch_err
name='psi_dovrl_saver2'
if (psb_get_errstatus() /= 0) return
info = 0
call psb_erractionsave(err_act)
ictxt = psb_cd_get_context(desc_a)
call psb_info(ictxt, me, np)
if (np == -1) then
info = 2010
call psb_errpush(info,name)
goto 9999
endif
isz = size(desc_a%ovrlap_elem,1)
nc = size(x,2)
call psb_realloc(isz,nc,xs,info)
if (info /= 0) then
info = 4000
call psb_errpush(info,name)
goto 9999
endif
do i=1, isz
idx = desc_a%ovrlap_elem(i,1)
xs(i,:) = x(idx,:)
end do
call psb_erractionrestore(err_act)
return
9999 continue
call psb_erractionrestore(err_act)
if (err_act == psb_act_abort_) then
call psb_error(ictxt)
return
end if
return
end subroutine psi_dovrl_saver2
subroutine psi_dovrl_restrr2(x,xs,desc_a,info)
use psb_descriptor_type
use psb_const_mod
use psb_error_mod
use psb_penv_mod
implicit none
real(kind(1.d0)), intent(inout) :: x(:,:)
real(kind(1.d0)) :: xs(:,:)
type(psb_desc_type), intent(in) :: desc_a
integer, intent(out) :: info
! locals
integer :: ictxt, np, me, err_act, i, idx, isz
character(len=20) :: name, ch_err
name='psi_dovrl_restrr2'
if (psb_get_errstatus() /= 0) return
info = 0
call psb_erractionsave(err_act)
ictxt = psb_cd_get_context(desc_a)
call psb_info(ictxt, me, np)
if (np == -1) then
info = 2010
call psb_errpush(info,name)
goto 9999
endif
if (size(x,2) /= size(xs,2)) then
info = 4001
call psb_errpush(info,name, a_err='Mismacth columns X vs XS')
goto 9999
endif
isz = size(desc_a%ovrlap_elem,1)
do i=1, isz
idx = desc_a%ovrlap_elem(i,1)
x(idx,:) = xs(i,:)
end do
call psb_erractionrestore(err_act)
return
9999 continue
call psb_erractionrestore(err_act)
if (err_act == psb_act_abort_) then
call psb_error(ictxt)
return
end if
return
end subroutine psi_dovrl_restrr2
subroutine psi_zovrl_saver1(x,xs,desc_a,info)
use psb_descriptor_type
use psb_const_mod
use psb_error_mod
use psb_realloc_mod
use psb_penv_mod
implicit none
complex(kind(1.d0)), intent(inout) :: x(:)
complex(kind(1.d0)), allocatable :: xs(:)
type(psb_desc_type), intent(in) :: desc_a
integer, intent(out) :: info
! locals
integer :: ictxt, np, me, err_act, i, idx, isz
character(len=20) :: name, ch_err
name='psi_zovrl_saver1'
if (psb_get_errstatus() /= 0) return
info = 0
call psb_erractionsave(err_act)
ictxt = psb_cd_get_context(desc_a)
call psb_info(ictxt, me, np)
if (np == -1) then
info = 2010
call psb_errpush(info,name)
goto 9999
endif
isz = size(desc_a%ovrlap_elem,1)
call psb_realloc(isz,xs,info)
if (info /= 0) then
info = 4000
call psb_errpush(info,name)
goto 9999
endif
do i=1, isz
idx = desc_a%ovrlap_elem(i,1)
xs(i) = x(idx)
end do
call psb_erractionrestore(err_act)
return
9999 continue
call psb_erractionrestore(err_act)
if (err_act == psb_act_abort_) then
call psb_error(ictxt)
return
end if
return
end subroutine psi_zovrl_saver1
subroutine psi_zovrl_restrr1(x,xs,desc_a,info)
use psb_descriptor_type
use psb_const_mod
use psb_error_mod
use psb_penv_mod
implicit none
complex(kind(1.d0)), intent(inout) :: x(:)
complex(kind(1.d0)) :: xs(:)
type(psb_desc_type), intent(in) :: desc_a
integer, intent(out) :: info
! locals
integer :: ictxt, np, me, err_act, i, idx, isz
character(len=20) :: name, ch_err
name='psi_zovrl_restrr1'
if (psb_get_errstatus() /= 0) return
info = 0
call psb_erractionsave(err_act)
ictxt = psb_cd_get_context(desc_a)
call psb_info(ictxt, me, np)
if (np == -1) then
info = 2010
call psb_errpush(info,name)
goto 9999
endif
isz = size(desc_a%ovrlap_elem,1)
do i=1, isz
idx = desc_a%ovrlap_elem(i,1)
x(idx) = xs(i)
end do
call psb_erractionrestore(err_act)
return
9999 continue
call psb_erractionrestore(err_act)
if (err_act == psb_act_abort_) then
call psb_error(ictxt)
return
end if
return
end subroutine psi_zovrl_restrr1
subroutine psi_zovrl_saver2(x,xs,desc_a,info)
use psb_descriptor_type
use psb_const_mod
use psb_error_mod
use psb_realloc_mod
use psb_penv_mod
implicit none
complex(kind(1.d0)), intent(inout) :: x(:,:)
complex(kind(1.d0)), allocatable :: xs(:,:)
type(psb_desc_type), intent(in) :: desc_a
integer, intent(out) :: info
! locals
integer :: ictxt, np, me, err_act, i, idx, isz, nc
character(len=20) :: name, ch_err
name='psi_zovrl_saver2'
if (psb_get_errstatus() /= 0) return
info = 0
call psb_erractionsave(err_act)
ictxt = psb_cd_get_context(desc_a)
call psb_info(ictxt, me, np)
if (np == -1) then
info = 2010
call psb_errpush(info,name)
goto 9999
endif
isz = size(desc_a%ovrlap_elem,1)
nc = size(x,2)
call psb_realloc(isz,nc,xs,info)
if (info /= 0) then
info = 4000
call psb_errpush(info,name)
goto 9999
endif
do i=1, isz
idx = desc_a%ovrlap_elem(i,1)
xs(i,:) = x(idx,:)
end do
call psb_erractionrestore(err_act)
return
9999 continue
call psb_erractionrestore(err_act)
if (err_act == psb_act_abort_) then
call psb_error(ictxt)
return
end if
return
end subroutine psi_zovrl_saver2
subroutine psi_zovrl_restrr2(x,xs,desc_a,info)
use psb_descriptor_type
use psb_const_mod
use psb_error_mod
use psb_penv_mod
implicit none
complex(kind(1.d0)), intent(inout) :: x(:,:)
complex(kind(1.d0)) :: xs(:,:)
type(psb_desc_type), intent(in) :: desc_a
integer, intent(out) :: info
! locals
integer :: ictxt, np, me, err_act, i, idx, isz
character(len=20) :: name, ch_err
name='psi_zovrl_restrr2'
if (psb_get_errstatus() /= 0) return
info = 0
call psb_erractionsave(err_act)
ictxt = psb_cd_get_context(desc_a)
call psb_info(ictxt, me, np)
if (np == -1) then
info = 2010
call psb_errpush(info,name)
goto 9999
endif
if (size(x,2) /= size(xs,2)) then
info = 4001
call psb_errpush(info,name, a_err='Mismacth columns X vs XS')
goto 9999
endif
isz = size(desc_a%ovrlap_elem,1)
do i=1, isz
idx = desc_a%ovrlap_elem(i,1)
x(idx,:) = xs(i,:)
end do
call psb_erractionrestore(err_act)
return
9999 continue
call psb_erractionrestore(err_act)
if (err_act == psb_act_abort_) then
call psb_error(ictxt)
return
end if
return
end subroutine psi_zovrl_restrr2
subroutine psi_iovrl_saver1(x,xs,desc_a,info)
use psb_descriptor_type
use psb_const_mod
use psb_error_mod
use psb_realloc_mod
use psb_penv_mod
implicit none
integer, intent(inout) :: x(:)
integer, allocatable :: xs(:)
type(psb_desc_type), intent(in) :: desc_a
integer, intent(out) :: info
! locals
integer :: ictxt, np, me, err_act, i, idx, isz
character(len=20) :: name, ch_err
name='psi_iovrl_saver1'
if (psb_get_errstatus() /= 0) return
info = 0
call psb_erractionsave(err_act)
ictxt = psb_cd_get_context(desc_a)
call psb_info(ictxt, me, np)
if (np == -1) then
info = 2010
call psb_errpush(info,name)
goto 9999
endif
isz = size(desc_a%ovrlap_elem,1)
call psb_realloc(isz,xs,info)
if (info /= 0) then
info = 4000
call psb_errpush(info,name)
goto 9999
endif
do i=1, isz
idx = desc_a%ovrlap_elem(i,1)
xs(i) = x(idx)
end do
call psb_erractionrestore(err_act)
return
9999 continue
call psb_erractionrestore(err_act)
if (err_act == psb_act_abort_) then
call psb_error(ictxt)
return
end if
return
end subroutine psi_iovrl_saver1
subroutine psi_iovrl_restrr1(x,xs,desc_a,info)
use psb_descriptor_type
use psb_const_mod
use psb_error_mod
use psb_penv_mod
implicit none
integer, intent(inout) :: x(:)
integer :: xs(:)
type(psb_desc_type), intent(in) :: desc_a
integer, intent(out) :: info
! locals
integer :: ictxt, np, me, err_act, i, idx, isz
character(len=20) :: name, ch_err
name='psi_iovrl_restrr1'
if (psb_get_errstatus() /= 0) return
info = 0
call psb_erractionsave(err_act)
ictxt = psb_cd_get_context(desc_a)
call psb_info(ictxt, me, np)
if (np == -1) then
info = 2010
call psb_errpush(info,name)
goto 9999
endif
isz = size(desc_a%ovrlap_elem,1)
do i=1, isz
idx = desc_a%ovrlap_elem(i,1)
x(idx) = xs(i)
end do
call psb_erractionrestore(err_act)
return
9999 continue
call psb_erractionrestore(err_act)
if (err_act == psb_act_abort_) then
call psb_error(ictxt)
return
end if
return
end subroutine psi_iovrl_restrr1
subroutine psi_iovrl_saver2(x,xs,desc_a,info)
use psb_descriptor_type
use psb_const_mod
use psb_error_mod
use psb_realloc_mod
use psb_penv_mod
implicit none
integer, intent(inout) :: x(:,:)
integer, allocatable :: xs(:,:)
type(psb_desc_type), intent(in) :: desc_a
integer, intent(out) :: info
! locals
integer :: ictxt, np, me, err_act, i, idx, isz, nc
character(len=20) :: name, ch_err
name='psi_iovrl_saver2'
if (psb_get_errstatus() /= 0) return
info = 0
call psb_erractionsave(err_act)
ictxt = psb_cd_get_context(desc_a)
call psb_info(ictxt, me, np)
if (np == -1) then
info = 2010
call psb_errpush(info,name)
goto 9999
endif
isz = size(desc_a%ovrlap_elem,1)
nc = size(x,2)
call psb_realloc(isz,nc,xs,info)
if (info /= 0) then
info = 4000
call psb_errpush(info,name)
goto 9999
endif
do i=1, isz
idx = desc_a%ovrlap_elem(i,1)
xs(i,:) = x(idx,:)
end do
call psb_erractionrestore(err_act)
return
9999 continue
call psb_erractionrestore(err_act)
if (err_act == psb_act_abort_) then
call psb_error(ictxt)
return
end if
return
end subroutine psi_iovrl_saver2
subroutine psi_iovrl_restrr2(x,xs,desc_a,info)
use psb_descriptor_type
use psb_const_mod
use psb_error_mod
use psb_penv_mod
implicit none
integer, intent(inout) :: x(:,:)
integer :: xs(:,:)
type(psb_desc_type), intent(in) :: desc_a
integer, intent(out) :: info
! locals
integer :: ictxt, np, me, err_act, i, idx, isz
character(len=20) :: name, ch_err
name='psi_iovrl_restrr2'
if (psb_get_errstatus() /= 0) return
info = 0
call psb_erractionsave(err_act)
ictxt = psb_cd_get_context(desc_a)
call psb_info(ictxt, me, np)
if (np == -1) then
info = 2010
call psb_errpush(info,name)
goto 9999
endif
if (size(x,2) /= size(xs,2)) then
info = 4001
call psb_errpush(info,name, a_err='Mismacth columns X vs XS')
goto 9999
endif
isz = size(desc_a%ovrlap_elem,1)
do i=1, isz
idx = desc_a%ovrlap_elem(i,1)
x(idx,:) = xs(i,:)
end do
call psb_erractionrestore(err_act)
return
9999 continue
call psb_erractionrestore(err_act)
if (err_act == psb_act_abort_) then
call psb_error(ictxt)
return
end if
return
end subroutine psi_iovrl_restrr2
subroutine psi_dgthm(n,k,idx,x,y)
use psb_const_mod

@ -93,7 +93,7 @@ subroutine psb_dspmm(alpha,a,x,beta,y,desc_a,info,&
& i, ib, ib1, ip, idx
integer, parameter :: nb=4
real(kind(1.d0)), pointer :: xp(:,:), yp(:,:), iwork(:)
real(kind(1.d0)), allocatable :: wrkt(:,:)
real(kind(1.d0)), allocatable :: xvsave(:,:)
character :: trans_
character(len=20) :: name, ch_err
logical :: aliw, doswap_
@ -299,30 +299,20 @@ subroutine psb_dspmm(alpha,a,x,beta,y,desc_a,info,&
! Non-empty overlap, need a buffer to hold
! the entries updated with average operator.
!
allocate(wrkt(ncol,ik),stat=info)
if (info /= 0) then
info=4010
ch_err='Allocate'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
!
wrkt(1:nrow,1:ik) = x(1:nrow,1:ik)
wrkt(nrow+1:ncol,1:ik) = dzero
call psi_ovrl_save(x(:,1:ik),xvsave,desc_a,info)
if (info == 0) call psi_ovrl_upd(x,desc_a,psb_avg_,info)
y(nrow+1:ncol,1:ik) = dzero
call psi_ovrl_upd(wrkt,desc_a,psb_avg_,info)
call psb_csmm(alpha,a,wrkt(:,1:ik),beta,y(:,1:ik),info,trans=trans_)
if (info == 0) call psb_csmm(alpha,a,x(:,1:ik),beta,y(:,1:ik),info,trans=trans_)
if (debug_level >= psb_debug_comp_) &
& write(debug_unit,*) me,' ',trim(name),' csmm ', info
if(info /= 0) then
if (info /= 0) then
info = 4010
ch_err='psb_csmm'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
if (info == 0) call psi_ovrl_restore(x,xvsave,desc_a,info)
if (doswap_)then
call psi_swaptran(ior(psb_swap_send_,psb_swap_recv_),&
@ -445,7 +435,8 @@ subroutine psb_dspmv(alpha,a,x,beta,y,desc_a,info,&
& m, nrow, ncol, lldx, lldy, liwork, jx, jy, iiy, jjy,&
& ib, ip, idx
integer, parameter :: nb=4
real(kind(1.d0)),pointer :: iwork(:), xp(:), yp(:)
real(kind(1.d0)), pointer :: iwork(:), xp(:), yp(:)
real(kind(1.d0)), allocatable :: xvsave(:)
character :: trans_
character(len=20) :: name, ch_err
logical :: aliw, doswap_
@ -615,16 +606,17 @@ subroutine psb_dspmv(alpha,a,x,beta,y,desc_a,info,&
! Non-empty overlap, need a buffer to hold
! the entries updated with average operator.
!
iwork(1:nrow) = x(1:nrow)
iwork(nrow+1:ncol) = dzero
yp(nrow+1:ncol) = dzero
call psi_ovrl_upd(iwork,desc_a,psb_avg_,info)
call psi_ovrl_save(x,xvsave,desc_a,info)
if (info == 0) call psi_ovrl_upd(x,desc_a,psb_avg_,info)
yp(nrow+1:ncol) = dzero
! local Matrix-vector product
call psb_csmm(alpha,a,iwork,beta,yp,info,trans=trans_)
if (info == 0) call psb_csmm(alpha,a,x,beta,yp,info,trans=trans_)
if (debug_level >= psb_debug_comp_) &
& write(debug_unit,*) me,' ',trim(name),' csmm ', info
if (info == 0) call psi_ovrl_restore(x,xvsave,desc_a,info)
if (info /= 0) then
info = 4010
ch_err='psb_csmm'
@ -635,7 +627,6 @@ subroutine psb_dspmv(alpha,a,x,beta,y,desc_a,info,&
if (doswap_) then
call psi_swaptran(ior(psb_swap_send_,psb_swap_recv_),&
& done,yp,desc_a,iwork,info)
if (info == 0) call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),&
& done,yp,desc_a,iwork,info,data=psb_comm_ovr_)

@ -93,7 +93,7 @@ subroutine psb_zspmm(alpha,a,x,beta,y,desc_a,info,&
& i, ib, ib1, ip, idx
integer, parameter :: nb=4
complex(kind(1.d0)), pointer :: xp(:,:), yp(:,:), iwork(:)
complex(kind(1.d0)), allocatable :: wrkt(:,:)
complex(kind(1.d0)), allocatable :: xvsave(:,:)
character :: trans_
character(len=20) :: name, ch_err
logical :: aliw, doswap_
@ -299,30 +299,20 @@ subroutine psb_zspmm(alpha,a,x,beta,y,desc_a,info,&
! Non-empty overlap, need a buffer to hold
! the entries updated with average operator.
!
allocate(wrkt(ncol,ik),stat=info)
if (info /= 0) then
info=4010
ch_err='Allocate'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
!
wrkt(1:nrow,1:ik) = x(1:nrow,1:ik)
wrkt(nrow+1:ncol,1:ik) = zzero
call psi_ovrl_save(x(:,1:ik),xvsave,desc_a,info)
if (info == 0) call psi_ovrl_upd(x,desc_a,psb_avg_,info)
y(nrow+1:ncol,1:ik) = zzero
call psi_ovrl_upd(wrkt,desc_a,psb_avg_,info)
call psb_csmm(alpha,a,wrkt(:,1:ik),beta,y(:,1:ik),info,trans=trans_)
if (info == 0) call psb_csmm(alpha,a,x(:,1:ik),beta,y(:,1:ik),info,trans=trans_)
if (debug_level >= psb_debug_comp_) &
& write(debug_unit,*) me,' ',trim(name),' csmm ', info
if(info /= 0) then
if (info /= 0) then
info = 4010
ch_err='psb_csmm'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
if (info == 0) call psi_ovrl_restore(x,xvsave,desc_a,info)
if (doswap_)then
call psi_swaptran(ior(psb_swap_send_,psb_swap_recv_),&
@ -446,6 +436,7 @@ subroutine psb_zspmv(alpha,a,x,beta,y,desc_a,info,&
& ib, ip, idx
integer, parameter :: nb=4
complex(kind(1.d0)), pointer :: iwork(:), xp(:), yp(:)
complex(kind(1.d0)), allocatable :: xvsave(:)
character :: trans_
character(len=20) :: name, ch_err
logical :: aliw, doswap_
@ -615,16 +606,17 @@ subroutine psb_zspmv(alpha,a,x,beta,y,desc_a,info,&
! Non-empty overlap, need a buffer to hold
! the entries updated with average operator.
!
iwork(1:nrow) = x(1:nrow)
iwork(nrow+1:ncol) = zzero
yp(nrow+1:ncol) = zzero
call psi_ovrl_upd(iwork,desc_a,psb_avg_,info)
call psi_ovrl_save(x,xvsave,desc_a,info)
if (info == 0) call psi_ovrl_upd(x,desc_a,psb_avg_,info)
yp(nrow+1:ncol) = zzero
! local Matrix-vector product
call psb_csmm(alpha,a,iwork,beta,yp,info,trans=trans_)
if (info == 0) call psb_csmm(alpha,a,x,beta,yp,info,trans=trans_)
if (debug_level >= psb_debug_comp_) &
& write(debug_unit,*) me,' ',trim(name),' csmm ', info
if (info == 0) call psi_ovrl_restore(x,xvsave,desc_a,info)
if (info /= 0) then
info = 4010
ch_err='psb_csmm'

Loading…
Cancel
Save