Fixed documentation. Also going for GetRow in SYMBMM. NUMBMM to be completed.

19 years ago · e98bf6f89c
parent 4a4ad11ca4
commit e98bf6f89c
14 changed files with 6854 additions and 6067 deletions
--- a/docs/pdf/commrout.tex
+++ b/docs/pdf/commrout.tex
@ -10,7 +10,7 @@ routines not tied to a discretization space see~\ref{sec:toolsrout}.
 \subroutine{psb\_halo}{Halo Data Communication}    
-These subroutines restore a consistent status for the halo
+These subroutines gathers the values of the halo
 elements, and  (optionally) scale the result:
 \[ x \leftarrow \alpha x \]
@ -80,8 +80,7 @@ An integer value that contains an error code.
 \subroutine{psb\_ovrl}{Overlap Update}    
-These subroutines restore a consistent status for the overlap
+These subroutines applies an overlap operator to the input vector:
 elements:
 \[ x \leftarrow Q x \]
 where:
@ -152,8 +151,8 @@ An integer value that contains an error code.
 \section*{Usage notes}
 \begin{enumerate}
-\item If there is no overlap in the data distribution, no operations
+\item If there is no overlap in the data distribution associated with
-are performed;
+  the descriptor, no operations are performed;
 \item The operator $P^{T}$ performs the reduction sum of overlap
 elements; it is a ``prolongation'' operator $P^T$ that
 replicates overlap elements, accounting for the physical replication
--- a/docs/pdf/datastruct.tex
+++ b/docs/pdf/datastruct.tex
@ -2,11 +2,11 @@
 \label{sec:datastruct}
 %\ifthenelse{\boolean{mtc}}{\minitoc}{}
-In this chapter are illustrated data structures used for definition of
+In this chapter we  illustrate the  data structures used for definition of
-routines interfaces. This include data structure for sparse matrix,
+routines interfaces. They  include data structures for sparse matrices,
-communication descriptor and preconditioner. These data structures are used for
+communication descriptors and preconditioners.%%  These data structures
-calling PSBLAS routines in Fortran~90 language and will be used to next
+%% are used for calling PSBLAS routines in Fortran~90 language and will
-chapters containing these callings. 
+%% be used to next chapters containing these callings.  
 All the data types and subroutine interfaces are defined in the module
 \verb|psb_sparse_mod|; this will have to be included by every user
@ -23,11 +23,10 @@ Every structure of this type is associated to a sparse matrix, it
 contains data about general matrix informations and elements to be
 exchanged among processes.  
-It is not necessary for the user to
+It is not necessary for the user to know the internal structure of
-know the internal structure of \verb|psb_desc_type|, it is set in
+\verb|psb_desc_type|, it is set in a transparent mode by the tools
-fully-transparent mode by PSBLAS-TOOLS routines when inserting a new
+routines of Sec.~\ref{sec:toolsrout} while creating a new sparse
-sparse matrix, however the definition of the descriptor is the
+matrix; nevertheless we include its description for the curious reader:  
 following.  
 \begin{description}
 \item[{\bf matrix\_data}] includes general information about matrix and
 process grid. More precisely:
@ -90,16 +89,16 @@ process then element $i$ contains local index correpondent to global variable $i
 else element $i$ contains -1 (NULL) value.\\
 Specified as: a pointer to an integer array of rank one.
 \end{description}
-FORTRAN95 interface for \verb|psb_desc_type| structures is therefore defined
+The Fortran95 definition  for \verb|psb_desc_type| structures is 
 as follows:
 \begin{figure}[h!]
  \begin{Sbox}
    \begin{minipage}[tl]{0.9\textwidth}
 \begin{verbatim} 
 type psb_desc_type 
-   integer, pointer :: matrix_data(:), halo_index(:)
+   integer, pointer :: matrix_data(:)=>null(), halo_index(:)=>null()
-   integer, pointer :: overlap_elem(:), overlap_index(:)
+   integer, pointer :: overlap_elem(:)=>null(), overlap_index(:)=>null()
-   integer, pointer :: loc_to_glob(:), glob_to_loc(:)
+   integer, pointer :: loc_to_glob(:)=>null(), glob_to_loc(:)=>null()
 end type psb_desc_type 
 \end{verbatim}
    \end{minipage}
@ -152,10 +151,9 @@ state, which can take the following values:
 \label{sec:spmat}
 The \hypertarget{spdata}{{\tt psb\_spmat\_type}} data structure
 contains all information about local portion of the sparse matrix and   
-its storage mode. Many of this fields are set in fully-transparent
+its storage mode. Most  of these fields are set  by the tools
-mode by PSBLAS-TOOLS routines when inserting a new sparse matrix, user
+routines when inserting a new sparse matrix; the user needs only
-must set only fields which describe matrix storage mode. \\
+choose, if he/she so whishes, a specific matrix storage mode. \\
 Fields contained in Sparse matrix structures are:
 \begin{description}
 \item[{\bf aspk}] Contains values of the local distributed sparse
 matrix.\\
@ -201,8 +199,9 @@ type psb_dspmat_type
   character   :: fida(5)
   character   :: descra(10)
   integer     :: infoa(psb_ifa_size_)
-   real(kind(1.d0)), pointer :: aspk(:)
+   real(kind(1.d0)), pointer :: aspk(:)=>null()
-   integer, pointer :: ia1(:), ia2(:), pr(:), pl(:)
+   integer, pointer :: ia1(:)=>null(), ia2(:)=>null()
   integer, pointer :: pr(:)=>null(), pl(:)=>null()
 end type psb_dspmat_type
 \end{verbatim}
    \end{minipage}
@ -273,12 +272,16 @@ values:
 \subsection{Preconditioner data structure}
 \label{sec:prec}
-PSBLAS-2.0 offers the possibility to use many different types of
+Our library  offers support for  many different types of
 preconditioning schemes. Besides the simple well known preconditioners
-like Diagonal Scaling or Block Jacobi (with ILU(0) incomplete
+like Diagonal Scaling or Block Jacobi with either  incomplete
-factorization) also more complex preconditioning methods are
+factorization ILU(0) or complete LU factorization.  We also provide an
-implemented like the Additive Schwarz and Two-Level ones. A
+experimental package of  complex 
-preconditioner is held in the \hypertarget{precdata}{{\tt
+preconditioning methods like the Additive Schwarz and Multilevel
 Additive Schwarz; these last preconditioners will be described in a
 separate document. 
 A preconditioner is held in the \hypertarget{precdata}{{\tt
    psb\_prec\_type}} data structure which depends on the
 \verb|psb_base_prec| reported in 
 figure~\ref{fig:prectype}. The \verb|psb_base_prec| 
--- a/docs/pdf/figures/points.pdf
+++ b/docs/pdf/figures/points.pdf
--- a/docs/pdf/intro.tex
+++ b/docs/pdf/intro.tex
@ -26,12 +26,12 @@ proposal for BLAS on dense matrices~\cite{BLAS1,BLAS2,BLAS3}.
 The applicability of sparse iterative solvers to many different areas
 causes some terminology problems because the same concept may be
 denoted through different names depending on the application area. The
-PSBLAS features presented in this section will be discussed mainly in terms of finite
+PSBLAS features presented in this document will be discussed referring
-difference discretizations of Partial Differential Equations (PDEs).
+to a   finite difference discretization of a Partial Differential
-However, the scope of the library is wider than that: for example, it
+Equation (PDE). However, the scope of the library is wider than
-can be applied to finite element discretizations of PDEs, and even to
+that: for example, it can be applied to finite element discretizations
-different classes of problems such as nonlinear optimization, for
+of PDEs, and even to different classes of problems such as nonlinear
-example in optimal control problems.
+optimization, for example in optimal control problems.
 The design of a solver for sparse linear systems is driven by many
 conflicting objectives, such as limiting occupation of storage
@ -75,6 +75,9 @@ Message Passing Interface code is encapsulated within the BLACS
 layer. However, in some cases, MPI routines are directly used either
 to improve efficiency or to implement communication patterns for which
 the BLACS package doesn't provide any method. 
 In any case we provide wrappers around the BLACS routines so that the
 user does not need to delve into their details (see Sec.~\ref{sec:toolsrout}).
 %% We assume that the user program has initialized a BLACS process grid
 %% with one column and as many rows as there are processes; the PSBLAS
 %% initialization routines will take the communication context for this
@ -86,6 +89,121 @@ the BLACS package doesn't provide any method.
 \caption{PSBLAS library components hierarchy.\label{fig:psblas}}
 \end{figure}
 The type of linear system matrices that we address  typically arise in the
 numerical solution of PDEs;  in such a context,
 it is necessary to pay special attention to the
 structure of the problem from which the application originates. 
 The nonzero pattern of a matrix arising from the
 discretization of a PDE is influenced by various factors, such as the
 shape of the  domain, the discretization strategy, and
 the equation/unknown ordering. The matrix itself can be interpreted as
 the  adjacency matrix of the graph associated with the discretization
 mesh. 
 The distribution of the coefficient matrix for the linear system is
 based on the ``owner computes'' rule: 
 the variable associated to each mesh point is assigned to a process
 that will  own the corresponding row in the coefficient matrix and
 will  carry out all related computations. This allocation strategy 
 is equivalent to a partition of the discretization mesh into {\em
 sub-domains}. 
 Our library  supports any distribution that keeps together 
 the coefficients of each matrix row; there are no other constraints on
 the variable assignment. 
 This choice is consistent with  data distributions commonly used in
 ScaLAPACK  such as  \verb|CYCLIC(N)| and \verb|BLOCK|, 
 as well as completely arbitrary assignments of
 equation indices to processes. In particular it is consistent with the
 usage of graph partitioning tools commonly available in the
 literature, e.g. METIS~\cite{METIS}.
 Dense vectors  conform  to sparse
 matrices, that is, the entries of a vector follow the same distribution
 of the matrix rows.  
 We assume that the sparse matrix is built in parallel, where each
 process generates its own portion. We never require that the entire
 matrix be available on a single node. However, it is possible
 to hold the entire matrix in one process and distribute it
 explicitly\footnote{In our prototype implementation  we provide 
 sample scatter/gather routines.}, even though  the resulting 
 bottleneck would make this option unattractive in most  cases. 
 \subsection{Basic Nomenclature}
 Our computational model implies that the data allocation on the
 parallel distributed memory machine is guided by the structure of the
 physical model, and specifically by the discretization mesh of the
 PDE. 
 Each point of the discretization mesh will have (at least) one
 associated equation/variable, and therefore one index. We say that
 point  $i$ {\em depends\/} on point $j$ if the  equation for a
 variable associated with $i$ contains a term in $j$,  or equivalently
 if $a_{ij} \ne0$.  
 After the partition of the discretization mesh into {\em sub-domains\/}
 assigned to the parallel processes,
 we classify the  points of a given sub-domain as following.
 \begin{description}
 \item[Internal.] An internal point of
 a given domain {\em depends} only on  points of the
 same domain. 
 If all points of a domain are assigned to one
 process, then a computational step (e.g., a
 matrix-vector product) of the 
 equations associated with the internal points  requires no data
 items from other domains and no communications.
 \item[Boundary.] A point of
 a given domain is a boundary point if it {\em depends} on  points
 belonging to other domains.
 \item[Halo.] A halo point for a given domain is a point belonging to
 another domain such that there is a boundary point which {\em depends\/}
 on it. Whenever performing a computational step, such as a
 matrix-vector product, the values associated with halo points are
 requested from other domains. A boundary point of a given 
 domain is a halo point for (at least) another domain; therefore
 the cardinality of the boundary points set denotes the amount of data
 sent to other domains. 
 \item[Overlap.] An overlap point is a boundary point assigned to
 multiple domains. Any operation that involves an overlap point
 has to be replicated for each assignment. 
 \end{description}
 Overlap points do not usually exist in the basic data
 distribution, but they are a feature of Domain Decomposition
 Schwarz preconditioners which we are in the process of including in
 our distribution~\cite{PARA04,APNUM06}. 
 We denote the sets of  internal, boundary and halo points for a given
 subdomain  by $\cal I$, $\cal B$ and $\cal H$.
 Each subdomain is assigned to one process; each process usually
 owns one subdomain, although the user may choose to assign more than
 one subdomain to a process.  If each process $i$ owns one
 subdomain, the number of rows in the local sparse matrix is
 $|{\cal I}_i| + |{\cal B}_i|$, and the number of local columns
 (i.e. those for which there exists at least one non-zero entry in the
 local rows)  is $|{\cal I}_i| + |{\cal B}_i| +|{\cal H}_i|$.
 \begin{figure}[h] 
 \begin{center}
 \leavevmode
 \rotatebox{-90}{\includegraphics[scale=0.45]{figures/points}}
 \end{center}
 \caption{Point classfication.\label{fig:points}}
 \end{figure}
 This classification of mesh points guides the naming scheme that we
 adopted in the library internals and in the data structures. We
 explicitly note that ``Halo'' points are also often called ``ghost''
 points in the literature. 
 \subsection{Library contents}
 The PSBLAS library consists of various classes of subroutines:
 \begin{description}
 \item[Computational routines] comprising:
@ -231,8 +349,8 @@ multiple time steps, the following structure may be more appropriate:
 \item Call the iterative method of choice, e.g. \verb|psb_bicgstab|
 \end{enumerate}
 \end{enumerate}
-The insertion routines will be called as many times as needed; it is
+The insertion routines will be called as many times as needed; 
-clear that they only need be called on the data that is actually
+they only need to  be called on the data that is actually
 allocated to the current process, i.e. each process generates its own
 data. 
--- a/docs/pdf/precs.tex
+++ b/docs/pdf/precs.tex
@ -9,8 +9,8 @@ many parameters that is possible to adjust to fit the user's needs:
 \begin{itemize}
 \item Diagonal Scaling
 \item Block Jacobi with ILU(0) factorization
-\item Additive Schwarz with the Restricted Additive Schwarz and
+%% \item Additive Schwarz with the Restricted Additive Schwarz and
-  Additive Schwarz with Harmonic extensions;
+%%   Additive Schwarz with Harmonic extensions;
 \end{itemize}
 The PSBLAS library is incorporating a package of two-level Additive
  Schwarz preconditioners called MD2P4; this is actually a family of 
@ -66,21 +66,21 @@ $ptype$ string as follows\footnote{The string is case-insensitive}:
  block-diagonal of matrix $A$, where block boundaries are determined
  by the data allocation boundaries for each process; requires no
  communication. Only $ILU(0)$ is currently implemented. 
-\item[AS] Additive Schwarz preconditioner (see~\cite{PARA04}); in this
+%% \item[AS] Additive Schwarz preconditioner (see~\cite{PARA04}); in this
-  case the user may specify additional flags through the integer
+%%   case the user may specify additional flags through the integer
-  vector \verb|ir| as follows:
+%%   vector \verb|ir| as follows:
-\begin{description}
+%% \begin{description}
-\item[$iv(1)$] Number of overlap levels, an integer $novr>=0$, default
+%% \item[$iv(1)$] Number of overlap levels, an integer $novr>=0$, default
-  $novr=1$.  
+%%   $novr=1$.  
-\item[$iv(2)$] Restriction operator, legal values: \verb|psb_halo_|,
+%% \item[$iv(2)$] Restriction operator, legal values: \verb|psb_halo_|,
-  \verb|psb_none_|; default: \verb|psb_halo_|
+%%   \verb|psb_none_|; default: \verb|psb_halo_|
-\item[$iv(3)$] Prolongation operator, legal values: \verb|psb_none_|,
+%% \item[$iv(3)$] Prolongation operator, legal values: \verb|psb_none_|,
-  \verb|psb_sum_|, \verb|psb_avg_|, default: \verb|psb_none_|
+%%   \verb|psb_sum_|, \verb|psb_avg_|, default: \verb|psb_none_|
-\item[$iv(4)$] Factorization type, legal values: \verb|f_ilu_n_|,
+%% \item[$iv(4)$] Factorization type, legal values: \verb|f_ilu_n_|,
-  \verb|f_slu_|, \verb|f_umf_|, default: \verb|f_ilu_n_|.
+%%   \verb|f_slu_|, \verb|f_umf_|, default: \verb|f_ilu_n_|.
-\end{description}
+%% \end{description}
-Note that the default corresponds to a Restricted Additive Schwarz
+%% Note that the default corresponds to a Restricted Additive Schwarz
-preconditioner with $ILU(0)$ and 1 level of overlap.
+%% preconditioner with $ILU(0)$ and 1 level of overlap.
 %% \item[2L] Second level of a multilevel preconditioner, see below
 %% \end{description}
 %% If a multilevel preconditioner is desired, the user should call
--- a/docs/pdf/title.tex
+++ b/docs/pdf/title.tex
@ -5,7 +5,7 @@
 \ifx\pdfoutput\undefined % We're not running pdftex
 \else
-\pdfbookmark{PSBLAS-v2.0 User's Guide}{title}
+\pdfbookmark{PSBLAS-v2.0.1 User's Guide}{title}
 \fi
 \newlength{\centeroffset}
 \setlength{\centeroffset}{-0.5\oddsidemargin}
@ -15,7 +15,7 @@
 \vspace*{\stretch{1}}
 \noindent\hspace*{\centeroffset}\makebox[0pt][l]{\begin{minipage}{\textwidth}
 \flushright
-{\Huge\bfseries PSBLAS-2.0 User's guide
+{\Huge\bfseries PSBLAS-2.0.1 User's guide
 }
 \noindent\rule[-1ex]{\textwidth}{5pt}\\[2.5ex]
 \hfill\emph{\Large A reference guide for the Parallel Sparse BLAS library}
--- a/docs/pdf/toolsrout.tex
+++ b/docs/pdf/toolsrout.tex
@ -412,7 +412,8 @@ Specified as: an integer variable.
 Scope:{\bf local}.\\
 Type:{\bf required}.\\
 Specified as: a structured data of type \descdata.
-\item[nnz] the number of nonzeroes in the local part of the assembled matrix.\\
+\item[nnz] An estimate of the number of nonzeroes in the local
  part of the assembled matrix.\\ 
 Scope: {\bf global}.\\
 Type: {\bf optional}.\\
 Specified as: an integer value. 
@ -437,7 +438,7 @@ Specified as: an integer variable.
 \item Providing a good estimate for the number of nonzeroes $nnz$ in
  the assembled matrix may substantially improve performance in the
  matrix build phase, as it will reduce or eliminate the need for
-  multiple data allocations. 
+  (potentially multiple) data reallocations. 
 \end{enumerate}
--- a/docs/pdf/userguide.tex
+++ b/docs/pdf/userguide.tex
@ -24,7 +24,7 @@
 \relax
 \pdfcompresslevel=0             %-- 0 = none, 9 = best
 \pdfinfo{                       %-- Info dictionary of PDF output  /Author (Alfredo Buttari)
-  /Title (Parallel Sparse BLAS V. 2.0)
+  /Title (Parallel Sparse BLAS V. 2.0.1)
  /Subject (Parallel Sparse Basic Linear Algebra Subroutines)
  /Keywords (Computer Science Linear Algebra Fluid Dynamics Parallel Linux MPI PSBLAS Iterative Solvers Preconditioners)
  /Creator (pdfLaTeX)
@ -201,6 +201,13 @@ in G.~Joubert, A.~Murli, F.~Peters, M.~Vanneschi, editors,
 Parallel Computing - Advances \& Current Issues,
 pp.~441--448, Imperial College Press, 2002. 
 %
 \bibitem{METIS}
 Karypis, G. and Kumar, V.,
 {\em {METIS}: Unstructured Graph Partitioning and Sparse Matrix
  Ordering System}.
 Minneapolis, MN 55455: University of Minnesota, Department of
  Computer Science, 1995. 
 Internet Address: {\verb|http://www.cs.umn.edu/~karypis|}.
 \bibitem{machiels}
 {Machiels, L. and Deville, M.}
 {\em Fortran 90: An entry to object-oriented programming for the solution
--- a/docs/userguide.pdf
+++ b/docs/userguide.pdf
--- a/src/serial/psb_dnumbmm.f90
+++ b/src/serial/psb_dnumbmm.f90
@ -40,12 +40,48 @@ subroutine psb_dnumbmm(a,b,c)
  real(kind(1.d0)), allocatable :: temp(:)
  integer                       :: info
-  allocate(temp(max(a%m,a%k,b%m,b%k)),stat=info)
+  interface psb_sp_getrow
    subroutine psb_dspgetrow(irw,a,nz,ia,ja,val,info,iren,lrw)
      use psb_spmat_type
      type(psb_dspmat_type), intent(in) :: a
      integer, intent(in)       :: irw
      integer, intent(out)      :: nz
      integer, intent(inout)    :: ia(:), ja(:)
      real(kind(1.d0)),  intent(inout)    :: val(:)
      integer, intent(in), target, optional :: iren(:)
      integer, intent(in), optional :: lrw
      integer, intent(out)  :: info
    end subroutine psb_dspgetrow
  end interface
  allocate(temp(max(a%m,a%k,b%m,b%k)),stat=info)
  if (info /= 0) then
    return
  endif
  call psb_realloc(size(c%ia1),c%aspk,info)
  if (.true.) then 
    call numbmm(a%m,a%k,b%k,a%ia2,a%ia1,0,a%aspk,&
         & b%ia2,b%ia1,0,b%aspk,&
         & c%ia2,c%ia1,0,c%aspk,temp)
  else
    call inner_numbmm(a,b,c,temp,info)
  end if
  deallocate(temp) 
  return
 contains 
  subroutine inner_numbmm(a,b,c,temp,info)
    type(psb_dspmat_type) :: a,b,c
    integer               :: info
    real(kind(1.d0))      :: temp(:)
    integer, allocatable  :: iarw(:), iacl(:),ibrw(:),ibcl(:)
    real(kind(1.d0)), allocatable :: aval(:),bval(:)
    integer  :: maxlmn,i,j,m,n,k,l,istart,length,nazr,nbzr,jj,ii,minlm,minmn
  end subroutine inner_numbmm
 end subroutine psb_dnumbmm
--- a/src/serial/psb_dspgetrow.f90
+++ b/src/serial/psb_dspgetrow.f90
@ -68,7 +68,7 @@ subroutine psb_dspgetrow(irw,a,nz,ia,ja,val,info,iren,lrw)
    end subroutine psb_dspgtblk
  end interface
-  integer               :: lrw_, ierr(2), err_act
+  integer               :: lrw_, ierr(5), err_act
  type(psb_dspmat_type) :: b
  integer, pointer      :: iren_(:)
  character(len=20)     :: name, ch_err
--- a/src/serial/psb_dsymbmm.f90
+++ b/src/serial/psb_dsymbmm.f90
@ -49,16 +49,37 @@ subroutine psb_dsymbmm(a,b,c)
    end subroutine symbmm
  end interface
  interface psb_sp_getrow
    subroutine psb_dspgetrow(irw,a,nz,ia,ja,val,info,iren,lrw)
      use psb_spmat_type
      type(psb_dspmat_type), intent(in) :: a
      integer, intent(in)       :: irw
      integer, intent(out)      :: nz
      integer, intent(inout)    :: ia(:), ja(:)
      real(kind(1.d0)),  intent(inout)    :: val(:)
      integer, intent(in), target, optional :: iren(:)
      integer, intent(in), optional :: lrw
      integer, intent(out)  :: info
    end subroutine psb_dspgetrow
  end interface
  if (b%m /= a%k) then 
    write(0,*) 'Mismatch in SYMBMM: ',a%m,a%k,b%m,b%k
  endif
  allocate(itemp(max(a%m,a%k,b%m,b%k)),stat=info)    
  if (info /= 0) then 
    return
  endif
  nze = max(a%m+1,2*a%m)
  call psb_sp_reall(c,nze,info)
 !!$  write(0,*) 'SYMBMM90 ',size(c%pl),size(c%pr)
  if (.false.) then 
    call symbmm(a%m,a%k,b%k,a%ia2,a%ia1,0,&
         & b%ia2,b%ia1,0,&
         & c%ia2,c%ia1,0,itemp)
  else 
    call inner_symbmm(a,b,c,itemp,info)
  endif
  c%pl(1) = 0
  c%pr(1) = 0
  c%m=a%m
@ -67,4 +88,85 @@ subroutine psb_dsymbmm(a,b,c)
  c%descra='GUN'
  deallocate(itemp) 
  return
 contains
  subroutine inner_symbmm(a,b,c,index,info)
    type(psb_dspmat_type) :: a,b,c
    integer               :: index(:),info
    integer, allocatable  :: iarw(:), iacl(:),ibrw(:),ibcl(:)
    real(kind(1.d0)), allocatable :: aval(:),bval(:)
    integer  :: maxlmn,i,j,m,n,k,l,istart,length,nazr,nbzr,jj,ii,minlm,minmn
    n = a%m
    m = a%k 
    l = b%k 
    maxlmn = max(l,m,n)
    allocate(iarw(maxlmn),iacl(maxlmn),ibrw(maxlmn),ibcl(maxlmn),&
         & aval(maxlmn),bval(maxlmn), stat=info)
    if (info /= 0) then 
      return
    endif
    if (size(c%ia2) < n+1) then 
      call psb_realloc(n+1,c%ia2,info)
    endif
    do i=1,maxlmn
      index(i)=0
    end do
      c%ia2(1)=1
      minlm = min(l,m)
      minmn = min(m,n)
      main: do  i=1,n
        istart=-1
        length=0
        call psb_sp_getrow(i,a,nazr,iarw,iacl,aval,info)
        do jj=1, nazr
          j=iacl(jj)
          if ((j<1).or.(j>m)) then 
            write(0,*) ' SymbMM: Problem with A ',i,jj,j,m
          endif
          call psb_sp_getrow(j,b,nbzr,ibrw,ibcl,bval,info)
          do k=1,nbzr
            if ((ibcl(k)<1).or.(ibcl(k)>maxlmn)) then 
                write(0,*) 'Problem in SYMBMM 1:',j,k,ibcl(k),maxlmn
            else
              if(index(ibcl(k)).eq.0) then
                index(ibcl(k))=istart
                istart=ibcl(k)
                length=length+1
              endif
            endif
          end do
        end do
        c%ia2(i+1)=c%ia2(i)+length
        if (c%ia2(i+1) > size(c%ia1)) then 
          if (n > (2*i)) then 
            nze = max(c%ia2(i+1), c%ia2(i)*((n+i-1)/i))
          else
            nze = max(c%ia2(i+1), nint((dble(c%ia2(i))*(dble(n)/i)))   )
          endif 
          call psb_realloc(nze,c%ia1,info)
        end if 
        do j= c%ia2(i),c%ia2(i+1)-1
          c%ia1(j)=istart
          istart=index(istart)
          index(c%ia1(j))=0
        end do
        call isr(length,c%ia1(c%ia2(i)))
        index(i) = 0
      end do main
  end subroutine inner_symbmm
 end subroutine psb_dsymbmm
--- a/src/serial/psb_zspgetrow.f90
+++ b/src/serial/psb_zspgetrow.f90
@ -68,7 +68,7 @@ subroutine psb_zspgetrow(irw,a,nz,ia,ja,val,info,iren,lrw)
    end subroutine psb_zspgtblk
  end interface
-  integer               :: lrw_, ierr(2), err_act
+  integer               :: lrw_, ierr(5), err_act
  type(psb_zspmat_type) :: b
  integer, pointer      :: iren_(:)
  character(len=20)     :: name, ch_err
--- a/src/serial/psb_zsymbmm.f90
+++ b/src/serial/psb_zsymbmm.f90
@ -49,16 +49,37 @@ subroutine psb_zsymbmm(a,b,c)
    end subroutine symbmm
  end interface
  interface psb_sp_getrow
    subroutine psb_zspgetrow(irw,a,nz,ia,ja,val,info,iren,lrw)
      use psb_spmat_type
      type(psb_zspmat_type), intent(in) :: a
      integer, intent(in)       :: irw
      integer, intent(out)      :: nz
      integer, intent(inout)    :: ia(:), ja(:)
      complex(kind(1.d0)),  intent(inout)    :: val(:)
      integer, intent(in), target, optional :: iren(:)
      integer, intent(in), optional :: lrw
      integer, intent(out)  :: info
    end subroutine psb_zspgetrow
  end interface
  if (b%m /= a%k) then 
    write(0,*) 'Mismatch in SYMBMM: ',a%m,a%k,b%m,b%k
  endif
  allocate(itemp(max(a%m,a%k,b%m,b%k)),stat=info)    
  if (info /= 0) then 
    return
  endif
  nze = max(a%m+1,2*a%m)
  call psb_sp_reall(c,nze,info)
 !!$  write(0,*) 'SYMBMM90 ',size(c%pl),size(c%pr)
  if (.false.) then 
    call symbmm(a%m,a%k,b%k,a%ia2,a%ia1,0,&
         & b%ia2,b%ia1,0,&
         & c%ia2,c%ia1,0,itemp)
  else 
    call inner_symbmm(a,b,c,itemp,info)
  endif
  c%pl(1) = 0
  c%pr(1) = 0
  c%m=a%m
@ -67,4 +88,82 @@ subroutine psb_zsymbmm(a,b,c)
  c%descra='GUN'
  deallocate(itemp) 
  return
 contains
  subroutine inner_symbmm(a,b,c,index,info)
    type(psb_zspmat_type) :: a,b,c
    integer               :: index(:),info
    integer, allocatable  :: iarw(:), iacl(:),ibrw(:),ibcl(:)
    complex(kind(1.d0)), allocatable :: aval(:),bval(:)
    integer  :: maxlmn,i,j,m,n,k,l,istart,length,nazr,nbzr,jj,ii,minlm,minmn
    n = a%m
    m = a%k 
    l = b%k 
    maxlmn = max(l,m,n)
    allocate(iarw(maxlmn),iacl(maxlmn),ibrw(maxlmn),ibcl(maxlmn),&
         & aval(maxlmn),bval(maxlmn), stat=info)
    if (info /= 0) then 
      return
    endif
    if (size(c%ia2) < n+1) then 
      call psb_realloc(n+1,c%ia2,info)
    endif
    do i=1,maxlmn
      index(i)=0
    end do
      c%ia2(1)=1
      minlm = min(l,m)
      minmn = min(m,n)
      main: do  i=1,n
        istart=-1
        length=0
        call psb_sp_getrow(i,a,nazr,iarw,iacl,aval,info)
        do jj=1, nazr
          j=iacl(jj)
          if ((j<1).or.(j>m)) then 
            write(0,*) ' SymbMM: Problem with A ',i,jj,j,m
          endif
          call psb_sp_getrow(j,b,nbzr,ibrw,ibcl,bval,info)
          do k=1,nbzr
            if ((ibcl(k)<1).or.(ibcl(k)>maxlmn)) then 
                write(0,*) 'Problem in SYMBMM 1:',j,k,ibcl(k),maxlmn
            else
              if(index(ibcl(k)).eq.0) then
                index(ibcl(k))=istart
                istart=ibcl(k)
                length=length+1
              endif
            endif
          end do
        end do
        c%ia2(i+1)=c%ia2(i)+length
        if (c%ia2(i+1) > size(c%ia1)) then 
          if (n > (2*i)) then 
            nze = max(c%ia2(i+1), c%ia2(i)*((n+i-1)/i))
          else
            nze = max(c%ia2(i+1), nint((dble(c%ia2(i))*(dble(n)/i)))   )
          endif 
          call psb_realloc(nze,c%ia1,info)
        end if 
        do j= c%ia2(i),c%ia2(i+1)-1
          c%ia1(j)=istart
          istart=index(istart)
          index(c%ia1(j))=0
        end do
        call isr(length,c%ia1(c%ia2(i)))
        index(i) = 0
      end do main
  end subroutine inner_symbmm
 end subroutine psb_zsymbmm