|
|
@ -37,7 +37,7 @@
|
|
|
|
!
|
|
|
|
!
|
|
|
|
! sub( Y ) := alpha * Pr * A' * Pr * sub( X ) + beta * sub( Y ),
|
|
|
|
! sub( Y ) := alpha * Pr * A' * Pr * sub( X ) + beta * sub( Y ),
|
|
|
|
!
|
|
|
|
!
|
|
|
|
!
|
|
|
|
! where:
|
|
|
|
!
|
|
|
|
!
|
|
|
|
! sub( X ) denotes: X(1:N,JX:JX+K-1),
|
|
|
|
! sub( X ) denotes: X(1:N,JX:JX+K-1),
|
|
|
|
!
|
|
|
|
!
|
|
|
@ -90,17 +90,21 @@ subroutine psb_zspmm(alpha,a,x,beta,y,desc_a,info,&
|
|
|
|
integer :: ictxt, np, me,&
|
|
|
|
integer :: ictxt, np, me,&
|
|
|
|
& err_act, n, iix, jjx, ia, ja, iia, jja, ix, iy, ik, ijx, ijy,&
|
|
|
|
& err_act, n, iix, jjx, ia, ja, iia, jja, ix, iy, ik, ijx, ijy,&
|
|
|
|
& m, nrow, ncol, lldx, lldy, liwork, iiy, jjy,&
|
|
|
|
& m, nrow, ncol, lldx, lldy, liwork, iiy, jjy,&
|
|
|
|
& i, ib, ib1
|
|
|
|
& i, ib, ib1,ip
|
|
|
|
integer, parameter :: nb=4
|
|
|
|
integer, parameter :: nb=4
|
|
|
|
complex(kind(1.d0)),pointer :: xp(:,:), yp(:,:), iwork(:)
|
|
|
|
complex(kind(1.d0)),pointer :: xp(:,:), yp(:,:), iwork(:)
|
|
|
|
|
|
|
|
complex(kind(1.d0)), allocatable :: wrkt(:,:)
|
|
|
|
character :: trans_
|
|
|
|
character :: trans_
|
|
|
|
character(len=20) :: name, ch_err
|
|
|
|
character(len=20) :: name, ch_err
|
|
|
|
logical :: aliw, doswap_
|
|
|
|
logical :: aliw, doswap_
|
|
|
|
|
|
|
|
integer :: debug_level, debug_unit
|
|
|
|
|
|
|
|
|
|
|
|
name='psb_zspmm'
|
|
|
|
name='psb_zspmm'
|
|
|
|
if(psb_get_errstatus() /= 0) return
|
|
|
|
if(psb_get_errstatus() /= 0) return
|
|
|
|
info=0
|
|
|
|
info=0
|
|
|
|
call psb_erractionsave(err_act)
|
|
|
|
call psb_erractionsave(err_act)
|
|
|
|
|
|
|
|
debug_unit = psb_get_debug_unit()
|
|
|
|
|
|
|
|
debug_level = psb_get_debug_level()
|
|
|
|
|
|
|
|
|
|
|
|
ictxt=psb_cd_get_context(desc_a)
|
|
|
|
ictxt=psb_cd_get_context(desc_a)
|
|
|
|
|
|
|
|
|
|
|
@ -163,8 +167,7 @@ subroutine psb_zspmm(alpha,a,x,beta,y,desc_a,info,&
|
|
|
|
|
|
|
|
|
|
|
|
! check for presence/size of a work area
|
|
|
|
! check for presence/size of a work area
|
|
|
|
liwork= 2*ncol
|
|
|
|
liwork= 2*ncol
|
|
|
|
if (a%pr(1) /= 0) liwork = liwork + n * ik
|
|
|
|
|
|
|
|
if (a%pl(1) /= 0) liwork = liwork + m * ik
|
|
|
|
|
|
|
|
if (present(work)) then
|
|
|
|
if (present(work)) then
|
|
|
|
if (size(work) >= liwork) then
|
|
|
|
if (size(work) >= liwork) then
|
|
|
|
aliw =.false.
|
|
|
|
aliw =.false.
|
|
|
@ -264,6 +267,7 @@ subroutine psb_zspmm(alpha,a,x,beta,y,desc_a,info,&
|
|
|
|
end if
|
|
|
|
end if
|
|
|
|
|
|
|
|
|
|
|
|
else
|
|
|
|
else
|
|
|
|
|
|
|
|
|
|
|
|
! Matrix is transposed
|
|
|
|
! Matrix is transposed
|
|
|
|
if((ja /= iy).or.(ia /= ix)) then
|
|
|
|
if((ja /= iy).or.(ia /= ix)) then
|
|
|
|
! this case is not yet implemented
|
|
|
|
! this case is not yet implemented
|
|
|
@ -272,11 +276,6 @@ subroutine psb_zspmm(alpha,a,x,beta,y,desc_a,info,&
|
|
|
|
goto 9999
|
|
|
|
goto 9999
|
|
|
|
end if
|
|
|
|
end if
|
|
|
|
|
|
|
|
|
|
|
|
if(desc_a%ovrlap_elem(1) /= -1) then
|
|
|
|
|
|
|
|
info = 3070
|
|
|
|
|
|
|
|
call psb_errpush(info,name)
|
|
|
|
|
|
|
|
goto 9999
|
|
|
|
|
|
|
|
end if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
! checking for vectors correctness
|
|
|
|
! checking for vectors correctness
|
|
|
|
call psb_chkvect(m,ik,size(x,1),ix,ijx,desc_a,info,iix,jjx)
|
|
|
|
call psb_chkvect(m,ik,size(x,1),ix,ijx,desc_a,info,iix,jjx)
|
|
|
@ -296,6 +295,83 @@ subroutine psb_zspmm(alpha,a,x,beta,y,desc_a,info,&
|
|
|
|
goto 9999
|
|
|
|
goto 9999
|
|
|
|
end if
|
|
|
|
end if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (desc_a%ovrlap_elem(1) /= -1) then
|
|
|
|
|
|
|
|
!
|
|
|
|
|
|
|
|
! Non-empty overlap, need special care.
|
|
|
|
|
|
|
|
! This is a bit kludgy, but works; is there a better way?
|
|
|
|
|
|
|
|
!
|
|
|
|
|
|
|
|
allocate(wrkt(ncol,ik),stat=info)
|
|
|
|
|
|
|
|
if (info /= 0) then
|
|
|
|
|
|
|
|
info=4010
|
|
|
|
|
|
|
|
ch_err='Allocate'
|
|
|
|
|
|
|
|
call psb_errpush(info,name,a_err=ch_err)
|
|
|
|
|
|
|
|
goto 9999
|
|
|
|
|
|
|
|
end if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
!
|
|
|
|
|
|
|
|
! First do a product, with the overlap entries zeroed out.
|
|
|
|
|
|
|
|
!
|
|
|
|
|
|
|
|
wrkt(1:nrow,1:ik) = x(1:nrow,1:ik)
|
|
|
|
|
|
|
|
ip = 1
|
|
|
|
|
|
|
|
do while(desc_a%ovrlap_elem(ip) /= -ione)
|
|
|
|
|
|
|
|
wrkt(desc_a%ovrlap_elem(ip+psb_ovrlp_elem_),1:ik) = zzero
|
|
|
|
|
|
|
|
ip = ip+2
|
|
|
|
|
|
|
|
end do
|
|
|
|
|
|
|
|
y(nrow+1:ncol,:)=zzero
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
! local Matrix-vector product
|
|
|
|
|
|
|
|
call psb_csmm(alpha,a,wrkt(:,1:ik),beta,y(:,1:ik),info,trans=trans_)
|
|
|
|
|
|
|
|
if (debug_level >= psb_debug_comp_) &
|
|
|
|
|
|
|
|
& write(debug_unit,*) me,' ',trim(name),' csmm ', info
|
|
|
|
|
|
|
|
if(info /= 0) then
|
|
|
|
|
|
|
|
info = 4010
|
|
|
|
|
|
|
|
ch_err='psb_csmm'
|
|
|
|
|
|
|
|
call psb_errpush(info,name,a_err=ch_err)
|
|
|
|
|
|
|
|
goto 9999
|
|
|
|
|
|
|
|
end if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (doswap_)&
|
|
|
|
|
|
|
|
& call psi_swaptran(ior(psb_swap_send_,psb_swap_recv_),&
|
|
|
|
|
|
|
|
& ik,zone,y(:,1:ik),desc_a,iwork,info)
|
|
|
|
|
|
|
|
if (debug_level >= psb_debug_comp_) &
|
|
|
|
|
|
|
|
& write(debug_unit,*) me,' ',trim(name),' swaptran ', info
|
|
|
|
|
|
|
|
if(info /= 0) then
|
|
|
|
|
|
|
|
info = 4010
|
|
|
|
|
|
|
|
ch_err='PSI_dSwapTran'
|
|
|
|
|
|
|
|
call psb_errpush(info,name,a_err=ch_err)
|
|
|
|
|
|
|
|
goto 9999
|
|
|
|
|
|
|
|
end if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
!
|
|
|
|
|
|
|
|
! Then do a local product with only the overlap entries,
|
|
|
|
|
|
|
|
! and don't do a remote update.
|
|
|
|
|
|
|
|
!
|
|
|
|
|
|
|
|
wrkt(1:nrow,1:ik) = zzero
|
|
|
|
|
|
|
|
ip = 1
|
|
|
|
|
|
|
|
do while(desc_a%ovrlap_elem(ip) /= -ione)
|
|
|
|
|
|
|
|
wrkt(desc_a%ovrlap_elem(ip+psb_ovrlp_elem_),1:ik) = &
|
|
|
|
|
|
|
|
& x(desc_a%ovrlap_elem(ip+psb_ovrlp_elem_),1:ik)
|
|
|
|
|
|
|
|
ip = ip+2
|
|
|
|
|
|
|
|
end do
|
|
|
|
|
|
|
|
! y(nrow+1:ncol,:)=zzero
|
|
|
|
|
|
|
|
if (debug_level >= psb_debug_comp_) &
|
|
|
|
|
|
|
|
& write(debug_unit,*) me,' ',trim(name),' checkvect ', info
|
|
|
|
|
|
|
|
! local Matrix-vector product
|
|
|
|
|
|
|
|
call psb_csmm(alpha,a,wrkt(:,1:ik),zone,y(:,1:ik),info,trans=trans_)
|
|
|
|
|
|
|
|
if (debug_level >= psb_debug_comp_) &
|
|
|
|
|
|
|
|
& write(debug_unit,*) me,' ',trim(name),' csmm ', info
|
|
|
|
|
|
|
|
if(info /= 0) then
|
|
|
|
|
|
|
|
info = 4010
|
|
|
|
|
|
|
|
ch_err='psb_csmm'
|
|
|
|
|
|
|
|
call psb_errpush(info,name,a_err=ch_err)
|
|
|
|
|
|
|
|
goto 9999
|
|
|
|
|
|
|
|
end if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
else
|
|
|
|
|
|
|
|
!
|
|
|
|
|
|
|
|
! Empty overlap, go ahead
|
|
|
|
|
|
|
|
!
|
|
|
|
|
|
|
|
|
|
|
|
y(iiy+nrow+1-1:iiy+ncol,1:ik)=zzero
|
|
|
|
y(iiy+nrow+1-1:iiy+ncol,1:ik)=zzero
|
|
|
|
|
|
|
|
|
|
|
|
! local Matrix-vector product
|
|
|
|
! local Matrix-vector product
|
|
|
@ -323,6 +399,7 @@ subroutine psb_zspmm(alpha,a,x,beta,y,desc_a,info,&
|
|
|
|
end if
|
|
|
|
end if
|
|
|
|
|
|
|
|
|
|
|
|
end if
|
|
|
|
end if
|
|
|
|
|
|
|
|
end if
|
|
|
|
|
|
|
|
|
|
|
|
if (aliw) deallocate(iwork)
|
|
|
|
if (aliw) deallocate(iwork)
|
|
|
|
nullify(iwork)
|
|
|
|
nullify(iwork)
|
|
|
@ -425,7 +502,7 @@ subroutine psb_zspmv(alpha,a,x,beta,y,desc_a,info,&
|
|
|
|
integer :: ictxt, np, me,&
|
|
|
|
integer :: ictxt, np, me,&
|
|
|
|
& err_act, n, iix, jjx, ia, ja, iia, jja, ix, iy, ik, &
|
|
|
|
& err_act, n, iix, jjx, ia, ja, iia, jja, ix, iy, ik, &
|
|
|
|
& m, nrow, ncol, lldx, lldy, liwork, jx, jy, iiy, jjy,&
|
|
|
|
& m, nrow, ncol, lldx, lldy, liwork, jx, jy, iiy, jjy,&
|
|
|
|
& ib
|
|
|
|
& ib, ip
|
|
|
|
integer, parameter :: nb=4
|
|
|
|
integer, parameter :: nb=4
|
|
|
|
complex(kind(1.d0)),pointer :: iwork(:), xp(:), yp(:)
|
|
|
|
complex(kind(1.d0)),pointer :: iwork(:), xp(:), yp(:)
|
|
|
|
character :: trans_
|
|
|
|
character :: trans_
|
|
|
@ -486,8 +563,6 @@ subroutine psb_zspmv(alpha,a,x,beta,y,desc_a,info,&
|
|
|
|
iwork => null()
|
|
|
|
iwork => null()
|
|
|
|
! check for presence/size of a work area
|
|
|
|
! check for presence/size of a work area
|
|
|
|
liwork= 2*ncol
|
|
|
|
liwork= 2*ncol
|
|
|
|
if (a%pr(1) /= 0) liwork = liwork + n * ik
|
|
|
|
|
|
|
|
if (a%pl(1) /= 0) liwork = liwork + m * ik
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (present(work)) then
|
|
|
|
if (present(work)) then
|
|
|
|
if (size(work) >= liwork) then
|
|
|
|
if (size(work) >= liwork) then
|
|
|
@ -574,12 +649,6 @@ subroutine psb_zspmv(alpha,a,x,beta,y,desc_a,info,&
|
|
|
|
goto 9999
|
|
|
|
goto 9999
|
|
|
|
end if
|
|
|
|
end if
|
|
|
|
|
|
|
|
|
|
|
|
if(desc_a%ovrlap_elem(1) /= -1) then
|
|
|
|
|
|
|
|
info = 3070
|
|
|
|
|
|
|
|
call psb_errpush(info,name)
|
|
|
|
|
|
|
|
goto 9999
|
|
|
|
|
|
|
|
end if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
! checking for vectors correctness
|
|
|
|
! checking for vectors correctness
|
|
|
|
call psb_chkvect(m,ik,size(x),ix,jx,desc_a,info,iix,jjx)
|
|
|
|
call psb_chkvect(m,ik,size(x),ix,jx,desc_a,info,iix,jjx)
|
|
|
|
if (info == 0)&
|
|
|
|
if (info == 0)&
|
|
|
@ -598,19 +667,34 @@ subroutine psb_zspmv(alpha,a,x,beta,y,desc_a,info,&
|
|
|
|
goto 9999
|
|
|
|
goto 9999
|
|
|
|
end if
|
|
|
|
end if
|
|
|
|
|
|
|
|
|
|
|
|
xp => x(iix:lldx)
|
|
|
|
xp => x(1:lldx)
|
|
|
|
yp => y(iiy:lldy)
|
|
|
|
yp => y(1:lldy)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (desc_a%ovrlap_elem(1) /= -1) then
|
|
|
|
|
|
|
|
!
|
|
|
|
|
|
|
|
! Non-empty overlap, need special care.
|
|
|
|
|
|
|
|
! This is a bit kludgy, but works; is there a better way?
|
|
|
|
|
|
|
|
!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
!
|
|
|
|
|
|
|
|
! First do a product, with the overlap entries zeroed out.
|
|
|
|
|
|
|
|
!
|
|
|
|
|
|
|
|
iwork(1:nrow) = x(1:nrow)
|
|
|
|
|
|
|
|
ip = 1
|
|
|
|
|
|
|
|
do while(desc_a%ovrlap_elem(ip) /= -ione)
|
|
|
|
|
|
|
|
iwork(desc_a%ovrlap_elem(ip+psb_ovrlp_elem_)) = zzero
|
|
|
|
|
|
|
|
ip = ip+2
|
|
|
|
|
|
|
|
end do
|
|
|
|
yp(nrow+1:ncol)=zzero
|
|
|
|
yp(nrow+1:ncol)=zzero
|
|
|
|
if (debug_level >= psb_debug_comp_) &
|
|
|
|
if (debug_level >= psb_debug_comp_) &
|
|
|
|
& write(debug_unit,*) me,' ',trim(name),' checkvect ', info
|
|
|
|
& write(debug_unit,*) me,' ',trim(name),' checkvect ', info
|
|
|
|
! local Matrix-vector product
|
|
|
|
! local Matrix-vector product
|
|
|
|
call psb_csmm(alpha,a,xp,beta,yp,info,trans=trans_)
|
|
|
|
call psb_csmm(alpha,a,iwork,beta,yp,info,trans=trans_)
|
|
|
|
if (debug_level >= psb_debug_comp_) &
|
|
|
|
if (debug_level >= psb_debug_comp_) &
|
|
|
|
& write(debug_unit,*) me,' ',trim(name),' csmm ', info
|
|
|
|
& write(debug_unit,*) me,' ',trim(name),' csmm ', info
|
|
|
|
if(info /= 0) then
|
|
|
|
if(info /= 0) then
|
|
|
|
info = 4010
|
|
|
|
info = 4010
|
|
|
|
ch_err='zcsmm'
|
|
|
|
ch_err='psb_csmm'
|
|
|
|
call psb_errpush(info,name,a_err=ch_err)
|
|
|
|
call psb_errpush(info,name,a_err=ch_err)
|
|
|
|
goto 9999
|
|
|
|
goto 9999
|
|
|
|
end if
|
|
|
|
end if
|
|
|
@ -627,6 +711,63 @@ subroutine psb_zspmv(alpha,a,x,beta,y,desc_a,info,&
|
|
|
|
goto 9999
|
|
|
|
goto 9999
|
|
|
|
end if
|
|
|
|
end if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
!
|
|
|
|
|
|
|
|
! Then do a local product with only the overlap entries,
|
|
|
|
|
|
|
|
! and don't do a remote update.
|
|
|
|
|
|
|
|
!
|
|
|
|
|
|
|
|
iwork(1:nrow) = zzero
|
|
|
|
|
|
|
|
ip = 1
|
|
|
|
|
|
|
|
do while(desc_a%ovrlap_elem(ip) /= -ione)
|
|
|
|
|
|
|
|
iwork(desc_a%ovrlap_elem(ip+psb_ovrlp_elem_)) =&
|
|
|
|
|
|
|
|
& x(desc_a%ovrlap_elem(ip+psb_ovrlp_elem_))
|
|
|
|
|
|
|
|
ip = ip+2
|
|
|
|
|
|
|
|
end do
|
|
|
|
|
|
|
|
!yp(nrow+1:ncol)=zzero
|
|
|
|
|
|
|
|
if (debug_level >= psb_debug_comp_) &
|
|
|
|
|
|
|
|
& write(debug_unit,*) me,' ',trim(name),' checkvect ', info
|
|
|
|
|
|
|
|
! local Matrix-vector product
|
|
|
|
|
|
|
|
call psb_csmm(alpha,a,iwork,zone,yp,info,trans=trans_)
|
|
|
|
|
|
|
|
if (debug_level >= psb_debug_comp_) &
|
|
|
|
|
|
|
|
& write(debug_unit,*) me,' ',trim(name),' csmm ', info
|
|
|
|
|
|
|
|
if(info /= 0) then
|
|
|
|
|
|
|
|
info = 4010
|
|
|
|
|
|
|
|
ch_err='psb_csmm'
|
|
|
|
|
|
|
|
call psb_errpush(info,name,a_err=ch_err)
|
|
|
|
|
|
|
|
goto 9999
|
|
|
|
|
|
|
|
end if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
else
|
|
|
|
|
|
|
|
!
|
|
|
|
|
|
|
|
! Empty overlap, go ahead
|
|
|
|
|
|
|
|
!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
yp(nrow+1:ncol)=zzero
|
|
|
|
|
|
|
|
if (debug_level >= psb_debug_comp_) &
|
|
|
|
|
|
|
|
& write(debug_unit,*) me,' ',trim(name),' checkvect ', info
|
|
|
|
|
|
|
|
! local Matrix-vector product
|
|
|
|
|
|
|
|
call psb_csmm(alpha,a,xp,beta,yp,info,trans=trans_)
|
|
|
|
|
|
|
|
if (debug_level >= psb_debug_comp_) &
|
|
|
|
|
|
|
|
& write(debug_unit,*) me,' ',trim(name),' csmm ', info
|
|
|
|
|
|
|
|
if(info /= 0) then
|
|
|
|
|
|
|
|
info = 4010
|
|
|
|
|
|
|
|
ch_err='psb_csmm'
|
|
|
|
|
|
|
|
call psb_errpush(info,name,a_err=ch_err)
|
|
|
|
|
|
|
|
goto 9999
|
|
|
|
|
|
|
|
end if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (doswap_)&
|
|
|
|
|
|
|
|
& call psi_swaptran(ior(psb_swap_send_,psb_swap_recv_),&
|
|
|
|
|
|
|
|
& zone,yp,desc_a,iwork,info)
|
|
|
|
|
|
|
|
if (debug_level >= psb_debug_comp_) &
|
|
|
|
|
|
|
|
& write(debug_unit,*) me,' ',trim(name),' swaptran ', info
|
|
|
|
|
|
|
|
if(info /= 0) then
|
|
|
|
|
|
|
|
info = 4010
|
|
|
|
|
|
|
|
ch_err='PSI_dSwapTran'
|
|
|
|
|
|
|
|
call psb_errpush(info,name,a_err=ch_err)
|
|
|
|
|
|
|
|
goto 9999
|
|
|
|
|
|
|
|
end if
|
|
|
|
|
|
|
|
end if
|
|
|
|
end if
|
|
|
|
end if
|
|
|
|
|
|
|
|
|
|
|
|
if (aliw) deallocate(iwork,stat=info)
|
|
|
|
if (aliw) deallocate(iwork,stat=info)
|
|
|
|