diff --git a/krylov/Makefile b/krylov/Makefile index 754058e4..cfe5cbe5 100644 --- a/krylov/Makefile +++ b/krylov/Makefile @@ -6,7 +6,7 @@ LIBDIR=../lib OBJS=psb_krylov_mod.o psb_dcgstab.o psb_dcg.o psb_dcgs.o \ psb_dbicg.o psb_dcgstabl.o psb_dgmresr.o\ - psb_zcgstab.o psb_zcgs.o + psb_zcgstab.o psb_zcgs.o psb_zgmresr.o LIBMOD=psb_krylov_mod$(.mod) diff --git a/krylov/psb_dbicg.f90 b/krylov/psb_dbicg.f90 index bfa58a29..09a4ab23 100644 --- a/krylov/psb_dbicg.f90 +++ b/krylov/psb_dbicg.f90 @@ -98,7 +98,6 @@ subroutine psb_dbicg(a,prec,b,x,eps,desc_a,info,& real(kind(1.d0)) ::rerr integer ::litmax, liter, naux, m, mglob, it, itrace_,& & np,me, n_row, n_col, istop_, err_act - character ::diagl, diagu logical, parameter :: debug = .false. logical, parameter :: exchange=.true., noexchange=.false. integer, parameter :: irmax = 8 @@ -198,8 +197,6 @@ subroutine psb_dbicg(a,prec,b,x,eps,desc_a,info,& itrace_ = 0 end if - diagl = 'u' - diagu = 'u' itx = 0 if (istop_ == 1) then diff --git a/krylov/psb_dcg.f90 b/krylov/psb_dcg.f90 index dfa6623f..752c94fa 100644 --- a/krylov/psb_dcg.f90 +++ b/krylov/psb_dcg.f90 @@ -98,7 +98,6 @@ Subroutine psb_dcg(a,prec,b,x,eps,desc_a,info,& & sigma integer :: litmax, liter, istop_, naux, m, mglob, it, itx, itrace_,& & np,me, n_col, isvch, ich, ictxt, n_row,err_act, int_err(5) - character :: diagl, diagu logical, parameter :: exchange=.true., noexchange=.false. character(len=20) :: name diff --git a/krylov/psb_dcgs.f90 b/krylov/psb_dcgs.f90 index 2899f77c..ecc5e81a 100644 --- a/krylov/psb_dcgs.f90 +++ b/krylov/psb_dcgs.f90 @@ -95,7 +95,6 @@ Subroutine psb_dcgs(a,prec,b,x,eps,desc_a,info,& Real(Kind(1.d0)) :: rerr Integer :: litmax, naux, m, mglob, it, itrace_,int_err(5),& & np,me, n_row, n_col,istop_, err_act - Character ::diagl, diagu Logical, Parameter :: exchange=.True., noexchange=.False. Integer, Parameter :: irmax = 8 Integer :: itx, i, isvch, ich, ictxt @@ -193,8 +192,6 @@ Subroutine psb_dcgs(a,prec,b,x,eps,desc_a,info,& ! Ensure global coherence for convergence checks. call psb_set_coher(ictxt,isvch) - diagl = 'u' - diagu = 'u' itx = 0 if (istop_ == 1) then diff --git a/krylov/psb_dcgstab.F90 b/krylov/psb_dcgstab.F90 index aa90c89e..77227d65 100644 --- a/krylov/psb_dcgstab.F90 +++ b/krylov/psb_dcgstab.F90 @@ -95,7 +95,6 @@ Subroutine psb_dcgstab(a,prec,b,x,eps,desc_a,info,& Real(Kind(1.d0)) :: rerr Integer :: litmax, naux, m, mglob, it,itrace_,& & np,me, n_row, n_col - Character ::diagl, diagu Logical, Parameter :: debug = .false. Logical, Parameter :: exchange=.True., noexchange=.False., debug1 = .False. Integer, Parameter :: irmax = 8 @@ -204,9 +203,6 @@ Subroutine psb_dcgstab(a,prec,b,x,eps,desc_a,info,& itrace_ = 0 End If - diagl = 'U' - diagu = 'U' - ! Ensure global coherence for convergence checks. call psb_set_coher(ictxt,isvch) diff --git a/krylov/psb_dcgstabl.f90 b/krylov/psb_dcgstabl.f90 index 5a9a9749..850e2453 100644 --- a/krylov/psb_dcgstabl.f90 +++ b/krylov/psb_dcgstabl.f90 @@ -103,7 +103,6 @@ Subroutine psb_dcgstabl(a,prec,b,x,eps,desc_a,info,& Real(Kind(1.d0)) :: rerr Integer :: litmax, naux, m, mglob, it, itrace_,& & np,me, n_row, n_col, nl, err_act - Character ::diagl, diagu Logical, Parameter :: exchange=.True., noexchange=.False. Integer, Parameter :: irmax = 8 Integer :: itx, i, isvch, ich, ictxt,istop_,j, int_err(5) @@ -233,8 +232,6 @@ Subroutine psb_dcgstabl(a,prec,b,x,eps,desc_a,info,& goto 9999 End If - diagl = 'u' - diagu = 'u' itx = 0 restart: Do !!$ diff --git a/krylov/psb_dgmresr.f90 b/krylov/psb_dgmresr.f90 index c43d1ee4..20741afb 100644 --- a/krylov/psb_dgmresr.f90 +++ b/krylov/psb_dgmresr.f90 @@ -105,11 +105,10 @@ Subroutine psb_dgmresr(a,prec,b,x,eps,desc_a,info,& !!$ local data Real(Kind(1.d0)), allocatable, target :: aux(:),w(:),w1(:), v(:,:) Real(Kind(1.d0)), allocatable :: c(:),s(:), h(:,:), rs(:),rst(:),xt(:) - Real(Kind(1.d0)) :: rerr, scal, gm + Real(Kind(1.d0)) :: rerr, scal, gm, rti, rti1 Integer ::litmax, liter, naux, m, mglob, it,k, itrace_,& & np,me, n_row, n_col, nl, int_err(5) - Character :: diagl, diagu - Logical, Parameter :: exchange=.True., noexchange=.False. + Logical, Parameter :: exchange=.True., noexchange=.False., use_drot=.true. Integer, Parameter :: irmax = 8 Integer :: itx, i, isvch, ich, ictxt,istop_, err_act Logical, Parameter :: debug = .false. @@ -227,8 +226,6 @@ Subroutine psb_dgmresr(a,prec,b,x,eps,desc_a,info,& goto 9999 End If - diagl = 'u' - diagu = 'u' itx = 0 restart: Do @@ -253,6 +250,7 @@ Subroutine psb_dgmresr(a,prec,b,x,eps,desc_a,info,& End If rs(1) = psb_genrm2(v(:,1),desc_a,info) + rs(2:) = dzero if (info.ne.0) Then info=4011 call psb_errpush(info,name) @@ -310,21 +308,34 @@ Subroutine psb_dgmresr(a,prec,b,x,eps,desc_a,info,& h(i+1,i) = psb_genrm2(w,desc_a,info) scal=done/h(i+1,i) call psb_geaxpby(scal,w,dzero,v(:,i+1),desc_a,info) - do k=2,i - dt = h(k-1,i) - h(k-1,i) = c(k-1)*dt + s(k-1)*h(k,i) - h(k,i) = -s(k-1)*dt + c(k-1)*h(k,i) - enddo - gm = safe_dn2(h(i,i),h(i+1,i)) - if (debug) write(0,*) 'GM : ',gm - gm = max(gm,epstol) - - c(i) = h(i,i)/gm - s(i) = h(i+1,i)/gm - rs(i+1) = -s(i)*rs(i) - rs(i) = c(i)*rs(i) - h(i,i) = c(i)*h(i,i)+s(i)*h(i+1,i) - + if (use_drot) then + do k=2,i + call drot(1,h(k-1,i),1,h(k,i),1,c(k-1),s(k-1)) + enddo + + rti = h(i,i) + rti1 = h(i+1,i) + call drotg(rti,rti1,c(i),s(i)) + call drot(1,h(i,i),1,h(i+1,i),1,c(i),s(i)) + h(i+1,i) = dzero + call drot(1,rs(i),1,rs(i+1),1,c(i),s(i)) + + else + do k=2,i + dt = h(k-1,i) + h(k-1,i) = c(k-1)*dt + s(k-1)*h(k,i) + h(k,i) = -s(k-1)*dt + c(k-1)*h(k,i) + enddo + gm = safe_dn2(h(i,i),h(i+1,i)) + if (debug) write(0,*) 'GM : ',gm + gm = max(gm,epstol) + + c(i) = h(i,i)/gm + s(i) = h(i+1,i)/gm + rs(i+1) = -s(i)*rs(i) + rs(i) = c(i)*rs(i) + h(i,i) = c(i)*h(i,i)+s(i)*h(i+1,i) + endif if (istop_ == 1) then ! ! build x and then compute the residual and its infinity norm diff --git a/krylov/psb_krylov_mod.f90 b/krylov/psb_krylov_mod.f90 index 4891b685..dde1cf72 100644 --- a/krylov/psb_krylov_mod.f90 +++ b/krylov/psb_krylov_mod.f90 @@ -127,7 +127,6 @@ Module psb_krylov_mod &itmax,iter,err,itrace,irst,istop) use psb_base_mod use psb_prec_mod -!!$ parameters Type(psb_dspmat_type), Intent(in) :: a Type(psb_desc_type), Intent(in) :: desc_a type(psb_dprec_type), intent(in) :: prec @@ -139,6 +138,21 @@ Module psb_krylov_mod Integer, Optional, Intent(out) :: iter Real(Kind(1.d0)), Optional, Intent(out) :: err end subroutine psb_dgmresr + Subroutine psb_zgmresr(a,prec,b,x,eps,desc_a,info,& + &itmax,iter,err,itrace,irst,istop) + use psb_base_mod + use psb_prec_mod + Type(psb_zspmat_type), Intent(in) :: a + Type(psb_desc_type), Intent(in) :: desc_a + type(psb_zprec_type), intent(in) :: prec + complex(Kind(1.d0)), Intent(in) :: b(:) + complex(Kind(1.d0)), Intent(inout) :: x(:) + Real(Kind(1.d0)), Intent(in) :: eps + integer, intent(out) :: info + Integer, Optional, Intent(in) :: itmax, itrace, irst,istop + Integer, Optional, Intent(out) :: iter + Real(Kind(1.d0)), Optional, Intent(out) :: err + end subroutine psb_zgmresr end interface interface psb_cgs @@ -146,7 +160,6 @@ Module psb_krylov_mod &itmax,iter,err,itrace,istop) use psb_base_mod use psb_prec_mod -!!$ parameters type(psb_dspmat_type), intent(in) :: a type(psb_desc_type), intent(in) :: desc_a type(psb_dprec_type), intent(in) :: prec @@ -266,10 +279,10 @@ contains !!$ &itmax,iter,err,itrace,istop) case('BICGSTAB') call psb_bicgstab(a,prec,b,x,eps,desc_a,info,& - &itmax,iter,err,itrace,istop) -!!$ case('RGMRES') -!!$ call psb_rgmres(a,prec,b,x,eps,desc_a,info,& -!!$ &itmax,iter,err,itrace,irst,istop) + & itmax,iter,err,itrace,istop) + case('RGMRES') + call psb_rgmres(a,prec,b,x,eps,desc_a,info,& + & itmax,iter,err,itrace,irst,istop) !!$ case('BICGSTABL') !!$ call psb_bicgstabl(a,prec,b,x,eps,desc_a,info,& !!$ &itmax,iter,err,itrace,irst,istop) diff --git a/krylov/psb_zcgs.f90 b/krylov/psb_zcgs.f90 index a0cfbfa2..c76b1e3f 100644 --- a/krylov/psb_zcgs.f90 +++ b/krylov/psb_zcgs.f90 @@ -95,7 +95,6 @@ Subroutine psb_zcgs(a,prec,b,x,eps,desc_a,info,& Real(Kind(1.d0)) :: rerr Integer :: litmax, naux, m, mglob, it, itrace_,int_err(5),& & np,me, n_row, n_col,istop_, err_act - Character ::diagl, diagu Logical, Parameter :: exchange=.True., noexchange=.False. Integer, Parameter :: irmax = 8 Integer :: itx, i, isvch, ictxt @@ -196,8 +195,6 @@ Subroutine psb_zcgs(a,prec,b,x,eps,desc_a,info,& ! Ensure global coherence for convergence checks. call psb_set_coher(ictxt,isvch) - diagl = 'u' - diagu = 'u' itx = 0 if (istop_ == 1) then diff --git a/krylov/psb_zcgstab.f90 b/krylov/psb_zcgstab.f90 index 07ae123d..b52b65ec 100644 --- a/krylov/psb_zcgstab.f90 +++ b/krylov/psb_zcgstab.f90 @@ -96,7 +96,6 @@ Subroutine psb_zcgstab(a,prec,b,x,eps,desc_a,info,& Real(Kind(1.d0)) :: rerr Integer :: litmax, naux, m, mglob, it,itrace_,& & np,me, n_row, n_col - Character ::diagl, diagu Logical, Parameter :: debug = .false. Logical, Parameter :: exchange=.True., noexchange=.False., debug1 = .False. Integer, Parameter :: irmax = 8 @@ -184,8 +183,6 @@ Subroutine psb_zcgstab(a,prec,b,x,eps,desc_a,info,& itrace_ = 0 End If - diagl = 'U' - diagu = 'U' ! Ensure global coherence for convergence checks. call psb_set_coher(ictxt,isvch) diff --git a/krylov/psb_zgmresr.f90 b/krylov/psb_zgmresr.f90 new file mode 100644 index 00000000..4c4295a5 --- /dev/null +++ b/krylov/psb_zgmresr.f90 @@ -0,0 +1,574 @@ +!!$ +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ +!!$ Contributions to this routine: +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the PSBLAS group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +!!$ CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC +!!$ C C +!!$ C References: C +!!$ C [1] Duff, I., Marrone, M., Radicati, G., and Vittoli, C. C +!!$ C Level 3 basic linear algebra subprograms for sparse C +!!$ C matrices: a user level interface C +!!$ C ACM Trans. Math. Softw., 23(3), 379-401, 1997. C +!!$ C C +!!$ C C +!!$ C [2] S. Filippone, M. Colajanni C +!!$ C PSBLAS: A library for parallel linear algebra C +!!$ C computation on sparse matrices C +!!$ C ACM Trans. on Math. Softw., 26(4), 527-550, Dec. 2000. C +!!$ C C +!!$ C [3] M. Arioli, I. Duff, M. Ruiz C +!!$ C Stopping criteria for iterative solvers C +!!$ C SIAM J. Matrix Anal. Appl., Vol. 13, pp. 138-144, 1992 C +!!$ C C +!!$ C C +!!$ C [4] R. Barrett et al C +!!$ C Templates for the solution of linear systems C +!!$ C SIAM, 1993 C +!!$ C C +!!$ C C +!!$ C [5] G. Sleijpen, D. Fokkema C +!!$ C BICGSTAB(L) for linear equations involving unsymmetric C +!!$ C matrices with complex spectrum C +!!$ C Electronic Trans. on Numer. Analysis, Vol. 1, pp. 11-32, C +!!$ C Sep. 1993 C +!!$ C C +!!$ C C +!!$ CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC +! File: psb_dgmresr.f90 +! +! Subroutine: psb_dgmres +! This subroutine implements the restarted GMRES method with right +! preconditioning. +! +! Parameters: +! a - type(). The sparse matrix containing A. +! prec - type(). The data structure containing the preconditioner. +! b - real,dimension(:). The right hand side. +! x - real,dimension(:). The vector of unknowns. +! eps - real. The error tolerance. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! itmax - integer(optional). The maximum number of iterations. +! iter - integer(optional). The number of iterations performed. +! err - real(optional). The error on return. +! itrace - integer(optional). The unit to write messages onto. +! irst - integer(optional). The restart value. +! istop - integer(optional). The stopping criterium. +! +Subroutine psb_zgmresr(a,prec,b,x,eps,desc_a,info,& + & itmax,iter,err,itrace,irst,istop) + use psb_base_mod + use psb_prec_mod + implicit none + +!!$ Parameters + Type(psb_zspmat_type), Intent(in) :: a + Type(psb_zprec_type), Intent(in) :: prec + Type(psb_desc_type), Intent(in) :: desc_a + complex(Kind(1.d0)), Intent(in) :: b(:) + complex(Kind(1.d0)), Intent(inout) :: x(:) + Real(Kind(1.d0)), Intent(in) :: eps + integer, intent(out) :: info + Integer, Optional, Intent(in) :: itmax, itrace, irst,istop + Integer, Optional, Intent(out) :: iter + Real(Kind(1.d0)), Optional, Intent(out) :: err +!!$ local data + complex(Kind(1.d0)), allocatable, target :: aux(:),w(:),w1(:), v(:,:) + complex(Kind(1.d0)), allocatable :: c(:),s(:), h(:,:), rs(:),rst(:),xt(:) + Real(Kind(1.d0)) :: rerr, gm,tmp + complex(kind(1.d0)) :: rti, rti1, scal + Integer ::litmax, liter, naux, m, mglob, it,k, itrace_,& + & np,me, n_row, n_col, nl, int_err(5) + Logical, Parameter :: exchange=.True., noexchange=.False. + Integer, Parameter :: irmax = 8 + Integer :: itx, i, isvch, ich, ictxt,istop_, err_act + Logical, Parameter :: debug = .false. + Real(Kind(1.d0)) :: rni, xni, bni, ani,bn2, dt + real(kind(1.d0)), external :: dznrm2 + character(len=20) :: name + + info = 0 + name = 'psb_dgmres' + call psb_erractionsave(err_act) + + If (debug) Write(0,*) 'entering psb_dgmres' + ictxt = psb_cd_get_context(desc_a) + Call psb_info(ictxt, me, np) + + If (debug) Write(0,*) 'psb_dgmres: from gridinfo',np,me + + mglob = psb_cd_get_global_rows(desc_a) + n_row = psb_cd_get_local_rows(desc_a) + n_col = psb_cd_get_local_cols(desc_a) + + if (present(istop)) then + istop_ = istop + else + istop_ = 1 + endif + ! + ! ISTOP_ = 1: Normwise backward error, infinity norm + ! ISTOP_ = 2: ||r||/||b||, 2-norm + ! + + if ((istop_ < 1 ).or.(istop_ > 2 ) ) then + write(0,*) 'psb_dgmres: invalid istop',istop_ + info=5001 + int_err(1)=istop_ + err=info + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + If (Present(itmax)) Then + litmax = itmax + Else + litmax = 1000 + Endif + + If (Present(itrace)) Then + itrace_ = itrace + Else + itrace_ = 0 + End If + + If (Present(irst)) Then + nl = irst + If (debug) Write(0,*) 'present: irst: ',irst,nl + Else + nl = 10 + If (debug) Write(0,*) 'not present: irst: ',irst,nl + Endif + if (nl <=0 ) then + write(0,*) 'psb_dgmres: invalid irst ',nl + info=5001 + int_err(1)=nl + err=info + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + call psb_chkvect(mglob,1,size(x,1),1,1,desc_a,info) + if(info /= 0) then + info=4010 + call psb_errpush(info,name,a_err='psb_chkvect on X') + goto 9999 + end if + call psb_chkvect(mglob,1,size(b,1),1,1,desc_a,info) + if(info /= 0) then + info=4010 + call psb_errpush(info,name,a_err='psb_chkvect on B') + goto 9999 + end if + + + naux=4*n_col + Allocate(aux(naux),h(nl+1,nl+1),& + &c(nl+1),s(nl+1),rs(nl+1), rst(nl+1),stat=info) + + if (info == 0) Call psb_geall(v,desc_a,info,n=nl+1) + if (info == 0) Call psb_geall(w,desc_a,info) + if (info == 0) Call psb_geall(w1,desc_a,info) + if (info == 0) Call psb_geall(xt,desc_a,info) + if (info == 0) Call psb_geasb(v,desc_a,info) + if (info == 0) Call psb_geasb(w,desc_a,info) + if (info == 0) Call psb_geasb(w1,desc_a,info) + if (info == 0) Call psb_geasb(xt,desc_a,info) + if (info.ne.0) Then + info=4011 + call psb_errpush(info,name) + goto 9999 + End If + if (debug) write(0,*) 'Size of V,W,W1 ',size(v),size(v,1),& + &size(w),size(w,1),size(w1),size(w1,1), size(v(:,1)) + + ! Ensure global coherence for convergence checks. + call psb_set_coher(ictxt,isvch) + + if (istop_ == 1) then + ani = psb_spnrmi(a,desc_a,info) + bni = psb_geamax(b,desc_a,info) + else if (istop_ == 2) then + bn2 = psb_genrm2(b,desc_a,info) + endif + if (info.ne.0) Then + info=4011 + call psb_errpush(info,name) + goto 9999 + End If + + itx = 0 + restart: Do + + ! compute r0 = b-ax0 + ! check convergence + ! compute v1 = r0/||r0||_2 + + If (debug) Write(0,*) 'restart: ',itx,it + it = 0 + Call psb_geaxpby(zone,b,zzero,v(:,1),desc_a,info) + if (info.ne.0) Then + info=4011 + call psb_errpush(info,name) + goto 9999 + End If + + Call psb_spmm(-zone,a,x,zone,v(:,1),desc_a,info,work=aux) + if (info.ne.0) Then + info=4011 + call psb_errpush(info,name) + goto 9999 + End If + + rs(1) = psb_genrm2(v(:,1),desc_a,info) + rs(2:) = zzero + if (info.ne.0) Then + info=4011 + call psb_errpush(info,name) + goto 9999 + End If + scal=done/rs(1) ! rs(1) MIGHT BE VERY SMALL - USE DSCAL TO DEAL WITH IT? + + If (debug) Write(0,*) 'on entry to amax: b: ',Size(b),rs(1),scal + + ! + ! check convergence + ! + if (istop_ == 1) then + rni = psb_geamax(v(:,1),desc_a,info) + xni = psb_geamax(x,desc_a,info) + rerr = rni/(ani*xni+bni) + else if (istop_ == 2) then + rni = psb_genrm2(v(:,1),desc_a,info) + rerr = rni/bn2 + endif + if (info.ne.0) Then + info=4011 + call psb_errpush(info,name) + goto 9999 + End If + + If (rerr<=eps) Then + Exit restart + End If + + If (itrace_ > 0) then + if ((mod(itx,itrace_)==0).and.(me == 0))& + & write(*,'(a,i4,3(2x,es10.4))') 'gmres(l): ',itx,rerr + end If + + v(:,1) = v(:,1) * scal + + If (itx.Ge.litmax) Exit restart + + ! + ! inner iterations + ! + + inner: Do i=1,nl + itx = itx + 1 + + call psb_precaply(prec,v(:,i),w1,desc_a,info) + Call psb_spmm(zone,a,w1,zzero,w,desc_a,info,work=aux) + ! + + do k = 1, i + h(k,i) = psb_gedot(v(:,k),w,desc_a,info) + call psb_geaxpby(-h(k,i),v(:,k),zone,w,desc_a,info) + end do + h(i+1,i) = psb_genrm2(w,desc_a,info) + scal=done/h(i+1,i) + call psb_geaxpby(scal,w,zzero,v(:,i+1),desc_a,info) + do k=2,i + call zrot(1,h(k-1,i),1,h(k,i),1,real(c(k-1)),s(k-1)) + enddo + + rti = h(i,i) + rti1 = h(i+1,i) + call zrotg(rti,rti1,tmp,s(i)) + c(i) = cmplx(tmp,zzero) + call zrot(1,h(i,i),1,h(i+1,i),1,real(c(i)),s(i)) + h(i+1,i) = zzero + call zrot(1,rs(i),1,rs(i+1),1,real(c(i)),s(i)) + + if (istop_ == 1) then + ! + ! build x and then compute the residual and its infinity norm + ! + rst = rs + xt = zzero + call ztrsm('l','u','n','n',i,1,zone,h,size(h,1),rst,size(rst,1)) + if (debug) write(0,*) 'Rebuild x-> RS:',rst(1:nl) + do k=1, i + call psb_geaxpby(rst(k),v(:,k),zone,xt,desc_a,info) + end do + call psb_precaply(prec,xt,desc_a,info) + call psb_geaxpby(zone,x,zone,xt,desc_a,info) + call psb_geaxpby(zone,b,zzero,w1,desc_a,info) + call psb_spmm(-zone,a,xt,zone,w1,desc_a,info,work=aux) + rni = psb_geamax(w1,desc_a,info) + xni = psb_geamax(xt,desc_a,info) + rerr = rni/(ani*xni+bni) + ! + + else if (istop_ == 2) then + ! + ! compute the residual 2-norm as byproduct of the solution + ! procedure of the least-squares problem + ! + rni = abs(rs(i+1)) + rerr = rni/bn2 + + endif + + if (rerr < eps ) then + + if (istop_ == 1) then + x = xt + else if (istop_ == 2) then + ! + ! build x + ! + call ztrsm('l','u','n','n',i,1,zone,h,size(h,1),rs,size(rs,1)) + if (debug) write(0,*) 'Rebuild x-> RS:',rs(21:nl) + w1 = zzero + do k=1, i + call psb_geaxpby(rs(k),v(:,k),zone,w1,desc_a,info) + end do + call psb_precaply(prec,w1,w,desc_a,info) + call psb_geaxpby(zone,w,zone,x,desc_a,info) + end if + + exit restart + + end if + + If (itrace_ > 0) then + if ((mod(itx,itrace_)==0).and.(me == 0))& + & write(*,'(a,i4,3(2x,es10.4))') 'gmres(l): ',itx,rerr + end If + + end Do inner + + if (istop_ == 1) then + x = xt + else if (istop_ == 2) then + ! + ! build x + ! + call ztrsm('l','u','n','n',nl,1,zone,h,size(h,1),rs,size(rs,1)) + if (debug) write(0,*) 'Rebuild x-> RS:',rs(21:nl) + w1 = zzero + do k=1, nl + call psb_geaxpby(rs(k),v(:,k),zone,w1,desc_a,info) + end do + call psb_precaply(prec,w1,w,desc_a,info) + call psb_geaxpby(zone,w,zone,x,desc_a,info) + end if + + End Do restart + If (itrace_ > 0) then + if (me == 0) write(*,'(a,i4,3(2x,es10.4))') 'gmres(l): ',itx,rerr + end If + + If (Present(err)) err=rerr + If (Present(iter)) iter = itx + If ((rerr>eps).and. (me == 0)) Then + Write(0,*) 'gmresr(l) failed to converge to ',eps,& + & ' in ',itx,' iterations ' + End If + + + Deallocate(aux,h,c,s,rs,rst, stat=info) + Call psb_gefree(v,desc_a,info) + Call psb_gefree(w,desc_a,info) + Call psb_gefree(w1,desc_a,info) + Call psb_gefree(xt,desc_a,info) + + ! restore external global coherence behaviour + call psb_restore_coher(ictxt,isvch) + + if (info /= 0) then + info=4011 + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.psb_act_abort_) then + call psb_error() + return + end if + return + + +contains + function safe_dn2(a,b) + real(kind(1.d0)), intent(in) :: a, b + real(kind(1.d0)) :: safe_dn2 + real(kind(1.d0)) :: t + + t = max(abs(a),abs(b)) + if (t==0.d0) then + safe_dn2 = 0.d0 + else + safe_dn2 = t * sqrt(abs(a/t)**2 + abs(b/t)**2) + endif + return + end function safe_dn2 + + +End Subroutine psb_zgmresr + + +subroutine zrot( n, cx, incx, cy, incy, c, s ) + ! + ! -- lapack auxiliary routine (version 3.0) -- + ! univ. of tennessee, univ. of california berkeley, nag ltd., + ! courant institute, argonne national lab, and rice university + ! october 31, 1992 + ! + ! .. scalar arguments .. + integer incx, incy, n + real(kind(1.d0)) c + complex(kind(1.d0)) s + ! .. + ! .. array arguments .. + complex(kind(1.d0)) cx( * ), cy( * ) + ! .. + ! + ! purpose + ! ======= + ! + ! zrot applies a plane rotation, where the cos (c) is real and the + ! sin (s) is complex, and the vectors cx and cy are complex. + ! + ! arguments + ! ========= + ! + ! n (input) integer + ! the number of elements in the vectors cx and cy. + ! + ! cx (input/output) complex*16 array, dimension (n) + ! on input, the vector x. + ! on output, cx is overwritten with c*x + s*y. + ! + ! incx (input) integer + ! the increment between successive values of cy. incx <> 0. + ! + ! cy (input/output) complex*16 array, dimension (n) + ! on input, the vector y. + ! on output, cy is overwritten with -conjg(s)*x + c*y. + ! + ! incy (input) integer + ! the increment between successive values of cy. incx <> 0. + ! + ! c (input) double precision + ! s (input) complex*16 + ! c and s define a rotation + ! [ c s ] + ! [ -conjg(s) c ] + ! where c*c + s*conjg(s) = 1.0. + ! + ! ===================================================================== + ! + ! .. local scalars .. + integer i, ix, iy + complex(kind(1.d0)) stemp + ! .. + ! .. intrinsic functions .. + intrinsic dconjg + ! .. + ! .. executable statements .. + ! + if( n.le.0 ) return + if( incx.eq.1 .and. incy.eq.1 ) then + ! + ! code for both increments equal to 1 + ! + do i = 1, n + stemp = c*cx(i) + s*cy(i) + cy(i) = c*cy(i) - dconjg(s)*cx(i) + cx(i) = stemp + end do + else + ! + ! code for unequal increments or equal increments not equal to 1 + ! + ix = 1 + iy = 1 + if( incx.lt.0 )ix = ( -n+1 )*incx + 1 + if( incy.lt.0 )iy = ( -n+1 )*incy + 1 + do i = 1, n + stemp = c*cx(ix) + s*cy(iy) + cy(iy) = c*cy(iy) - dconjg(s)*cx(ix) + cx(ix) = stemp + ix = ix + incx + iy = iy + incy + end do + end if + return + return +end subroutine zrot +! +! +subroutine zrotg(ca,cb,c,s) + complex(kind(1.d0)) ca,cb,s + real(kind(1.d0)) c + real(kind(1.d0)) norm,scale + complex(kind(1.d0)) alpha + ! + if (cdabs(ca) == 0.0d0) then + ! + c = 0.0d0 + s = (1.0d0,0.0d0) + ca = cb + return + end if + ! + + scale = cdabs(ca) + cdabs(cb) + norm = scale*dsqrt((cdabs(ca/dcmplx(scale,0.0d0)))**2 +& + & (cdabs(cb/dcmplx(scale,0.0d0)))**2) + alpha = ca /cdabs(ca) + c = cdabs(ca) / norm + s = alpha * dconjg(cb) / norm + ca = alpha * norm + ! + + return +end subroutine zrotg