Merged ovtrans -r 2649:2653 into trunk.

psblas3-type-indexed
Salvatore Filippone 17 years ago
parent 47411166b0
commit b275558225

@ -90,17 +90,21 @@ subroutine psb_dspmm(alpha,a,x,beta,y,desc_a,info,&
integer :: ictxt, np, me,& integer :: ictxt, np, me,&
& err_act, n, iix, jjx, ia, ja, iia, jja, ix, iy, ik, ijx, ijy,& & err_act, n, iix, jjx, ia, ja, iia, jja, ix, iy, ik, ijx, ijy,&
& m, nrow, ncol, lldx, lldy, liwork, iiy, jjy,& & m, nrow, ncol, lldx, lldy, liwork, iiy, jjy,&
& i, ib, ib1 & i, ib, ib1, ip
integer, parameter :: nb=4 integer, parameter :: nb=4
real(kind(1.d0)),pointer :: xp(:,:), yp(:,:), iwork(:) real(kind(1.d0)), pointer :: xp(:,:), yp(:,:), iwork(:)
real(kind(1.d0)), allocatable :: wrkt(:,:)
character :: trans_ character :: trans_
character(len=20) :: name, ch_err character(len=20) :: name, ch_err
logical :: aliw, doswap_ logical :: aliw, doswap_
integer :: debug_level, debug_unit
name='psb_dspmm' name='psb_dspmm'
if(psb_get_errstatus() /= 0) return if(psb_get_errstatus() /= 0) return
info=0 info=0
call psb_erractionsave(err_act) call psb_erractionsave(err_act)
debug_unit = psb_get_debug_unit()
debug_level = psb_get_debug_level()
ictxt=psb_cd_get_context(desc_a) ictxt=psb_cd_get_context(desc_a)
@ -163,8 +167,7 @@ subroutine psb_dspmm(alpha,a,x,beta,y,desc_a,info,&
! check for presence/size of a work area ! check for presence/size of a work area
liwork= 2*ncol liwork= 2*ncol
if (a%pr(1) /= 0) liwork = liwork + n * ik
if (a%pl(1) /= 0) liwork = liwork + m * ik
if (present(work)) then if (present(work)) then
if (size(work) >= liwork) then if (size(work) >= liwork) then
aliw =.false. aliw =.false.
@ -264,6 +267,7 @@ subroutine psb_dspmm(alpha,a,x,beta,y,desc_a,info,&
end if end if
else else
! Matrix is transposed ! Matrix is transposed
if((ja /= iy).or.(ia /= ix)) then if((ja /= iy).or.(ia /= ix)) then
! this case is not yet implemented ! this case is not yet implemented
@ -272,11 +276,6 @@ subroutine psb_dspmm(alpha,a,x,beta,y,desc_a,info,&
goto 9999 goto 9999
end if end if
if(desc_a%ovrlap_elem(1) /= -1) then
info = 3070
call psb_errpush(info,name)
goto 9999
end if
! checking for vectors correctness ! checking for vectors correctness
call psb_chkvect(m,ik,size(x,1),ix,ijx,desc_a,info,iix,jjx) call psb_chkvect(m,ik,size(x,1),ix,ijx,desc_a,info,iix,jjx)
@ -296,6 +295,84 @@ subroutine psb_dspmm(alpha,a,x,beta,y,desc_a,info,&
goto 9999 goto 9999
end if end if
if (desc_a%ovrlap_elem(1) /= -1) then
!
! Non-empty overlap, need special care.
! This is a bit kludgy, but works; is there a better way?
!
allocate(wrkt(ncol,ik),stat=info)
if (info /= 0) then
info=4010
ch_err='Allocate'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
!
! First do a product, with the overlap entries zeroed out.
!
wrkt(1:nrow,1:ik) = x(1:nrow,1:ik)
ip = 1
do while(desc_a%ovrlap_elem(ip) /= -ione)
wrkt(desc_a%ovrlap_elem(ip+psb_ovrlp_elem_),1:ik) = dzero
ip = ip+2
end do
y(nrow+1:ncol,:)=dzero
! local Matrix-vector product
call psb_csmm(alpha,a,wrkt(:,1:ik),beta,y(:,1:ik),info,trans=trans_)
if (debug_level >= psb_debug_comp_) &
& write(debug_unit,*) me,' ',trim(name),' csmm ', info
if(info /= 0) then
info = 4010
ch_err='psb_csmm'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
if (doswap_)&
& call psi_swaptran(ior(psb_swap_send_,psb_swap_recv_),&
& ik,done,y(:,1:ik),desc_a,iwork,info)
if (debug_level >= psb_debug_comp_) &
& write(debug_unit,*) me,' ',trim(name),' swaptran ', info
if(info /= 0) then
info = 4010
ch_err='PSI_dSwapTran'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
!
! Then do a local product with only the overlap entries,
! and don't do a remote update.
!
wrkt(1:nrow,1:ik) = dzero
ip = 1
do while(desc_a%ovrlap_elem(ip) /= -ione)
wrkt(desc_a%ovrlap_elem(ip+psb_ovrlp_elem_),1:ik) = &
& x(desc_a%ovrlap_elem(ip+psb_ovrlp_elem_),1:ik)
ip = ip+2
end do
! y(nrow+1:ncol,:)=dzero
if (debug_level >= psb_debug_comp_) &
& write(debug_unit,*) me,' ',trim(name),' checkvect ', info
! local Matrix-vector product
call psb_csmm(alpha,a,wrkt(:,1:ik),done,y(:,1:ik),info,trans=trans_)
if (debug_level >= psb_debug_comp_) &
& write(debug_unit,*) me,' ',trim(name),' csmm ', info
if(info /= 0) then
info = 4010
ch_err='psb_csmm'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
else
!
! Empty overlap, go ahead
!
y(iiy+nrow+1-1:iiy+ncol,1:ik)=dzero y(iiy+nrow+1-1:iiy+ncol,1:ik)=dzero
! local Matrix-vector product ! local Matrix-vector product
@ -323,8 +400,9 @@ subroutine psb_dspmm(alpha,a,x,beta,y,desc_a,info,&
end if end if
end if end if
end if
if(aliw) deallocate(iwork) if (aliw) deallocate(iwork)
nullify(iwork) nullify(iwork)
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
@ -425,7 +503,7 @@ subroutine psb_dspmv(alpha,a,x,beta,y,desc_a,info,&
integer :: ictxt, np, me,& integer :: ictxt, np, me,&
& err_act, n, iix, jjx, ia, ja, iia, jja, ix, iy, ik, & & err_act, n, iix, jjx, ia, ja, iia, jja, ix, iy, ik, &
& m, nrow, ncol, lldx, lldy, liwork, jx, jy, iiy, jjy,& & m, nrow, ncol, lldx, lldy, liwork, jx, jy, iiy, jjy,&
& ib & ib, ip
integer, parameter :: nb=4 integer, parameter :: nb=4
real(kind(1.d0)),pointer :: iwork(:), xp(:), yp(:) real(kind(1.d0)),pointer :: iwork(:), xp(:), yp(:)
character :: trans_ character :: trans_
@ -486,8 +564,6 @@ subroutine psb_dspmv(alpha,a,x,beta,y,desc_a,info,&
iwork => null() iwork => null()
! check for presence/size of a work area ! check for presence/size of a work area
liwork= 2*ncol liwork= 2*ncol
if (a%pr(1) /= 0) liwork = liwork + n * ik
if (a%pl(1) /= 0) liwork = liwork + m * ik
if (present(work)) then if (present(work)) then
if (size(work) >= liwork) then if (size(work) >= liwork) then
@ -574,12 +650,6 @@ subroutine psb_dspmv(alpha,a,x,beta,y,desc_a,info,&
goto 9999 goto 9999
end if end if
if(desc_a%ovrlap_elem(1) /= -1) then
info = 3070
call psb_errpush(info,name)
goto 9999
end if
! checking for vectors correctness ! checking for vectors correctness
call psb_chkvect(m,ik,size(x),ix,jx,desc_a,info,iix,jjx) call psb_chkvect(m,ik,size(x),ix,jx,desc_a,info,iix,jjx)
if (info == 0)& if (info == 0)&
@ -598,8 +668,80 @@ subroutine psb_dspmv(alpha,a,x,beta,y,desc_a,info,&
goto 9999 goto 9999
end if end if
xp => x(iix:lldx) xp => x(1:lldx)
yp => y(iiy:lldy) yp => y(1:lldy)
if (desc_a%ovrlap_elem(1) /= -1) then
!
! Non-empty overlap, need special care.
! This is a bit kludgy, but works; is there a better way?
!
!
! First do a product, with the overlap entries zeroed out.
!
iwork(1:nrow) = x(1:nrow)
ip = 1
do while(desc_a%ovrlap_elem(ip) /= -ione)
iwork(desc_a%ovrlap_elem(ip+psb_ovrlp_elem_)) = dzero
ip = ip+2
end do
yp(nrow+1:ncol)=dzero
if (debug_level >= psb_debug_comp_) &
& write(debug_unit,*) me,' ',trim(name),' checkvect ', info
! local Matrix-vector product
call psb_csmm(alpha,a,iwork,beta,yp,info,trans=trans_)
if (debug_level >= psb_debug_comp_) &
& write(debug_unit,*) me,' ',trim(name),' csmm ', info
if(info /= 0) then
info = 4010
ch_err='psb_csmm'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
if (doswap_) &
& call psi_swaptran(ior(psb_swap_send_,psb_swap_recv_),&
& done,yp,desc_a,iwork,info)
if (debug_level >= psb_debug_comp_) &
& write(debug_unit,*) me,' ',trim(name),' swaptran ', info
if(info /= 0) then
info = 4010
ch_err='PSI_dSwapTran'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
!
! Then do a local product with only the overlap entries,
! and don't do a remote update.
!
iwork(1:nrow) = dzero
ip = 1
do while(desc_a%ovrlap_elem(ip) /= -ione)
iwork(desc_a%ovrlap_elem(ip+psb_ovrlp_elem_)) =&
& x(desc_a%ovrlap_elem(ip+psb_ovrlp_elem_))
ip = ip+2
end do
!yp(nrow+1:ncol)=dzero
if (debug_level >= psb_debug_comp_) &
& write(debug_unit,*) me,' ',trim(name),' checkvect ', info
! local Matrix-vector product
call psb_csmm(alpha,a,iwork,done,yp,info,trans=trans_)
if (debug_level >= psb_debug_comp_) &
& write(debug_unit,*) me,' ',trim(name),' csmm ', info
if(info /= 0) then
info = 4010
ch_err='psb_csmm'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
else
!
! Empty overlap, go ahead
!
yp(nrow+1:ncol)=dzero yp(nrow+1:ncol)=dzero
if (debug_level >= psb_debug_comp_) & if (debug_level >= psb_debug_comp_) &
@ -610,7 +752,7 @@ subroutine psb_dspmv(alpha,a,x,beta,y,desc_a,info,&
& write(debug_unit,*) me,' ',trim(name),' csmm ', info & write(debug_unit,*) me,' ',trim(name),' csmm ', info
if(info /= 0) then if(info /= 0) then
info = 4010 info = 4010
ch_err='dcsmm' ch_err='psb_csmm'
call psb_errpush(info,name,a_err=ch_err) call psb_errpush(info,name,a_err=ch_err)
goto 9999 goto 9999
end if end if
@ -626,7 +768,7 @@ subroutine psb_dspmv(alpha,a,x,beta,y,desc_a,info,&
call psb_errpush(info,name,a_err=ch_err) call psb_errpush(info,name,a_err=ch_err)
goto 9999 goto 9999
end if end if
end if
end if end if
if (aliw) deallocate(iwork,stat=info) if (aliw) deallocate(iwork,stat=info)

@ -37,7 +37,7 @@
! !
! sub( Y ) := alpha * Pr * A' * Pr * sub( X ) + beta * sub( Y ), ! sub( Y ) := alpha * Pr * A' * Pr * sub( X ) + beta * sub( Y ),
! !
! ! where:
! !
! sub( X ) denotes: X(1:N,JX:JX+K-1), ! sub( X ) denotes: X(1:N,JX:JX+K-1),
! !
@ -90,17 +90,21 @@ subroutine psb_zspmm(alpha,a,x,beta,y,desc_a,info,&
integer :: ictxt, np, me,& integer :: ictxt, np, me,&
& err_act, n, iix, jjx, ia, ja, iia, jja, ix, iy, ik, ijx, ijy,& & err_act, n, iix, jjx, ia, ja, iia, jja, ix, iy, ik, ijx, ijy,&
& m, nrow, ncol, lldx, lldy, liwork, iiy, jjy,& & m, nrow, ncol, lldx, lldy, liwork, iiy, jjy,&
& i, ib, ib1 & i, ib, ib1,ip
integer, parameter :: nb=4 integer, parameter :: nb=4
complex(kind(1.d0)),pointer :: xp(:,:), yp(:,:), iwork(:) complex(kind(1.d0)),pointer :: xp(:,:), yp(:,:), iwork(:)
complex(kind(1.d0)), allocatable :: wrkt(:,:)
character :: trans_ character :: trans_
character(len=20) :: name, ch_err character(len=20) :: name, ch_err
logical :: aliw, doswap_ logical :: aliw, doswap_
integer :: debug_level, debug_unit
name='psb_zspmm' name='psb_zspmm'
if(psb_get_errstatus() /= 0) return if(psb_get_errstatus() /= 0) return
info=0 info=0
call psb_erractionsave(err_act) call psb_erractionsave(err_act)
debug_unit = psb_get_debug_unit()
debug_level = psb_get_debug_level()
ictxt=psb_cd_get_context(desc_a) ictxt=psb_cd_get_context(desc_a)
@ -163,8 +167,7 @@ subroutine psb_zspmm(alpha,a,x,beta,y,desc_a,info,&
! check for presence/size of a work area ! check for presence/size of a work area
liwork= 2*ncol liwork= 2*ncol
if (a%pr(1) /= 0) liwork = liwork + n * ik
if (a%pl(1) /= 0) liwork = liwork + m * ik
if (present(work)) then if (present(work)) then
if (size(work) >= liwork) then if (size(work) >= liwork) then
aliw =.false. aliw =.false.
@ -264,6 +267,7 @@ subroutine psb_zspmm(alpha,a,x,beta,y,desc_a,info,&
end if end if
else else
! Matrix is transposed ! Matrix is transposed
if((ja /= iy).or.(ia /= ix)) then if((ja /= iy).or.(ia /= ix)) then
! this case is not yet implemented ! this case is not yet implemented
@ -272,11 +276,6 @@ subroutine psb_zspmm(alpha,a,x,beta,y,desc_a,info,&
goto 9999 goto 9999
end if end if
if(desc_a%ovrlap_elem(1) /= -1) then
info = 3070
call psb_errpush(info,name)
goto 9999
end if
! checking for vectors correctness ! checking for vectors correctness
call psb_chkvect(m,ik,size(x,1),ix,ijx,desc_a,info,iix,jjx) call psb_chkvect(m,ik,size(x,1),ix,ijx,desc_a,info,iix,jjx)
@ -296,6 +295,83 @@ subroutine psb_zspmm(alpha,a,x,beta,y,desc_a,info,&
goto 9999 goto 9999
end if end if
if (desc_a%ovrlap_elem(1) /= -1) then
!
! Non-empty overlap, need special care.
! This is a bit kludgy, but works; is there a better way?
!
allocate(wrkt(ncol,ik),stat=info)
if (info /= 0) then
info=4010
ch_err='Allocate'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
!
! First do a product, with the overlap entries zeroed out.
!
wrkt(1:nrow,1:ik) = x(1:nrow,1:ik)
ip = 1
do while(desc_a%ovrlap_elem(ip) /= -ione)
wrkt(desc_a%ovrlap_elem(ip+psb_ovrlp_elem_),1:ik) = zzero
ip = ip+2
end do
y(nrow+1:ncol,:)=zzero
! local Matrix-vector product
call psb_csmm(alpha,a,wrkt(:,1:ik),beta,y(:,1:ik),info,trans=trans_)
if (debug_level >= psb_debug_comp_) &
& write(debug_unit,*) me,' ',trim(name),' csmm ', info
if(info /= 0) then
info = 4010
ch_err='psb_csmm'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
if (doswap_)&
& call psi_swaptran(ior(psb_swap_send_,psb_swap_recv_),&
& ik,zone,y(:,1:ik),desc_a,iwork,info)
if (debug_level >= psb_debug_comp_) &
& write(debug_unit,*) me,' ',trim(name),' swaptran ', info
if(info /= 0) then
info = 4010
ch_err='PSI_dSwapTran'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
!
! Then do a local product with only the overlap entries,
! and don't do a remote update.
!
wrkt(1:nrow,1:ik) = zzero
ip = 1
do while(desc_a%ovrlap_elem(ip) /= -ione)
wrkt(desc_a%ovrlap_elem(ip+psb_ovrlp_elem_),1:ik) = &
& x(desc_a%ovrlap_elem(ip+psb_ovrlp_elem_),1:ik)
ip = ip+2
end do
! y(nrow+1:ncol,:)=zzero
if (debug_level >= psb_debug_comp_) &
& write(debug_unit,*) me,' ',trim(name),' checkvect ', info
! local Matrix-vector product
call psb_csmm(alpha,a,wrkt(:,1:ik),zone,y(:,1:ik),info,trans=trans_)
if (debug_level >= psb_debug_comp_) &
& write(debug_unit,*) me,' ',trim(name),' csmm ', info
if(info /= 0) then
info = 4010
ch_err='psb_csmm'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
else
!
! Empty overlap, go ahead
!
y(iiy+nrow+1-1:iiy+ncol,1:ik)=zzero y(iiy+nrow+1-1:iiy+ncol,1:ik)=zzero
! local Matrix-vector product ! local Matrix-vector product
@ -323,8 +399,9 @@ subroutine psb_zspmm(alpha,a,x,beta,y,desc_a,info,&
end if end if
end if end if
end if
if(aliw) deallocate(iwork) if (aliw) deallocate(iwork)
nullify(iwork) nullify(iwork)
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
@ -425,7 +502,7 @@ subroutine psb_zspmv(alpha,a,x,beta,y,desc_a,info,&
integer :: ictxt, np, me,& integer :: ictxt, np, me,&
& err_act, n, iix, jjx, ia, ja, iia, jja, ix, iy, ik, & & err_act, n, iix, jjx, ia, ja, iia, jja, ix, iy, ik, &
& m, nrow, ncol, lldx, lldy, liwork, jx, jy, iiy, jjy,& & m, nrow, ncol, lldx, lldy, liwork, jx, jy, iiy, jjy,&
& ib & ib, ip
integer, parameter :: nb=4 integer, parameter :: nb=4
complex(kind(1.d0)),pointer :: iwork(:), xp(:), yp(:) complex(kind(1.d0)),pointer :: iwork(:), xp(:), yp(:)
character :: trans_ character :: trans_
@ -486,8 +563,6 @@ subroutine psb_zspmv(alpha,a,x,beta,y,desc_a,info,&
iwork => null() iwork => null()
! check for presence/size of a work area ! check for presence/size of a work area
liwork= 2*ncol liwork= 2*ncol
if (a%pr(1) /= 0) liwork = liwork + n * ik
if (a%pl(1) /= 0) liwork = liwork + m * ik
if (present(work)) then if (present(work)) then
if (size(work) >= liwork) then if (size(work) >= liwork) then
@ -574,12 +649,6 @@ subroutine psb_zspmv(alpha,a,x,beta,y,desc_a,info,&
goto 9999 goto 9999
end if end if
if(desc_a%ovrlap_elem(1) /= -1) then
info = 3070
call psb_errpush(info,name)
goto 9999
end if
! checking for vectors correctness ! checking for vectors correctness
call psb_chkvect(m,ik,size(x),ix,jx,desc_a,info,iix,jjx) call psb_chkvect(m,ik,size(x),ix,jx,desc_a,info,iix,jjx)
if (info == 0)& if (info == 0)&
@ -598,8 +667,80 @@ subroutine psb_zspmv(alpha,a,x,beta,y,desc_a,info,&
goto 9999 goto 9999
end if end if
xp => x(iix:lldx) xp => x(1:lldx)
yp => y(iiy:lldy) yp => y(1:lldy)
if (desc_a%ovrlap_elem(1) /= -1) then
!
! Non-empty overlap, need special care.
! This is a bit kludgy, but works; is there a better way?
!
!
! First do a product, with the overlap entries zeroed out.
!
iwork(1:nrow) = x(1:nrow)
ip = 1
do while(desc_a%ovrlap_elem(ip) /= -ione)
iwork(desc_a%ovrlap_elem(ip+psb_ovrlp_elem_)) = zzero
ip = ip+2
end do
yp(nrow+1:ncol)=zzero
if (debug_level >= psb_debug_comp_) &
& write(debug_unit,*) me,' ',trim(name),' checkvect ', info
! local Matrix-vector product
call psb_csmm(alpha,a,iwork,beta,yp,info,trans=trans_)
if (debug_level >= psb_debug_comp_) &
& write(debug_unit,*) me,' ',trim(name),' csmm ', info
if(info /= 0) then
info = 4010
ch_err='psb_csmm'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
if (doswap_) &
& call psi_swaptran(ior(psb_swap_send_,psb_swap_recv_),&
& zone,yp,desc_a,iwork,info)
if (debug_level >= psb_debug_comp_) &
& write(debug_unit,*) me,' ',trim(name),' swaptran ', info
if(info /= 0) then
info = 4010
ch_err='PSI_dSwapTran'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
!
! Then do a local product with only the overlap entries,
! and don't do a remote update.
!
iwork(1:nrow) = zzero
ip = 1
do while(desc_a%ovrlap_elem(ip) /= -ione)
iwork(desc_a%ovrlap_elem(ip+psb_ovrlp_elem_)) =&
& x(desc_a%ovrlap_elem(ip+psb_ovrlp_elem_))
ip = ip+2
end do
!yp(nrow+1:ncol)=zzero
if (debug_level >= psb_debug_comp_) &
& write(debug_unit,*) me,' ',trim(name),' checkvect ', info
! local Matrix-vector product
call psb_csmm(alpha,a,iwork,zone,yp,info,trans=trans_)
if (debug_level >= psb_debug_comp_) &
& write(debug_unit,*) me,' ',trim(name),' csmm ', info
if(info /= 0) then
info = 4010
ch_err='psb_csmm'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
else
!
! Empty overlap, go ahead
!
yp(nrow+1:ncol)=zzero yp(nrow+1:ncol)=zzero
if (debug_level >= psb_debug_comp_) & if (debug_level >= psb_debug_comp_) &
@ -610,7 +751,7 @@ subroutine psb_zspmv(alpha,a,x,beta,y,desc_a,info,&
& write(debug_unit,*) me,' ',trim(name),' csmm ', info & write(debug_unit,*) me,' ',trim(name),' csmm ', info
if(info /= 0) then if(info /= 0) then
info = 4010 info = 4010
ch_err='zcsmm' ch_err='psb_csmm'
call psb_errpush(info,name,a_err=ch_err) call psb_errpush(info,name,a_err=ch_err)
goto 9999 goto 9999
end if end if
@ -626,7 +767,7 @@ subroutine psb_zspmv(alpha,a,x,beta,y,desc_a,info,&
call psb_errpush(info,name,a_err=ch_err) call psb_errpush(info,name,a_err=ch_err)
goto 9999 goto 9999
end if end if
end if
end if end if
if (aliw) deallocate(iwork,stat=info) if (aliw) deallocate(iwork,stat=info)

Loading…
Cancel
Save