diff --git a/Changelog b/Changelog new file mode 100644 index 00000000..4baabbaa --- /dev/null +++ b/Changelog @@ -0,0 +1,114 @@ +Changelog. A lot less detailed than usual, at least for past +history. + +2005/05/04: Now enabled SuperLU complete factorization as basis for AS. + +2005/04/29: First version with decoupled 2-level. + +2005/04/06: Started work on decoupling the preconditioner aggregation + for 2-level from the main factorization. + +2005/03/30: First version of new DSC/SP allocate/insert/assembly + routines. +2005/03/17: First version of RGMRES. To be refined. + +2005/03/08: dSwapTran aligned with dSwapData. Taken out SwapOverlap. + also moved onto iSwapX. + +2005/03/07: dSwapData rewritten to achieve: 1. better performance; + 2. more flexible functionality. It is now possible to + avoid SwapOvrlap entirely, relying on just SwapData. + SwapTran is still alive, since it reads the descriptors in + "transpose" mode. Also, added work areas to preconditioner + routine, to avoid excessive allocation in the halo/overlap + exchange. + +2005/03/04: Had to put in a workaround for a gfortran bug: + tolower/toupper cannot be functions. + +2005/02/09: Explicit storage choice for the smoother. This seems + to be changing a little bit the actual preconditioner. + To be evaluated further. + +2005/02/08: Renamed F90_PSPREC to PSB_PRCAPLY and Preconditioner to + PSB_PRCBLD. Changed the way PRCAPLY decides what to do. + Still needs a PSB_PRCSET to be called before PRCBLD. + +2005/01/28: Started moving functionalities to a SERIAL F90 layer. Also + defined a new COMM layer, to enable implementing SPMM + directly in F90. + +2005/01/20: Finally taken out a direct call to the F77 DCSDP from + SPASB. + +2005/01/18: After much work, we now have 2-level Additive Schwarz + prototype implemented and working. We now start a major + code cleanup that will take some time. Mainly we want to + move a lot of the serial F77 functionality into a new F95 + serial layer, to simplify the parallel F95 code. + +2004/11/25: Following the introduction of Additive Shwarz and + variants, we have now renamed DECOMP_ and friends as + DESC_; this makes things more readable. Sooner or later + we're going to merge this into mainline, but this version + is still very much in a state of flux. + +2004/07/18: For use with gfortran we need to declare the pointer + components with NULL() initialization. This rules out + VAST and PGI. + +2004/07/15: First development version with gfortran from the current + snapshot of gcc 3.5.0. + It is now possible in PSI_dSwapData to opt for + SEND|RECEIVE|SYNC data exchange; plan is to extend to all + data exchange functions, plus making it available as an + option from the F90 level. + +2004/07/06: Merged in a lot of stuff coming mainly from the ASM + development; full merge will have to wait a little more. + Among other things: + use of psimod + new choice parms for overlap + new data exchange for swapdata, to be extended. + multicolumn CSMM. + use psrealloc + new format for marking a matrix as suitable for update. + + +2003/12/09: Changed DSALLOC and DSASB to make sure whenever a dense + matrix is allocated it is also zeroed out. + +2003/10/13: Added call to BLACS_SET in the solvers to ensure global + heterogeneous coherence in the combine operations. + +2003/09/30: Added LOC_TO_GLOB and GLOB_TO_LOC support routines. + +2003/09/30: Changed interface for smart update capabilities: choose + with optional parameters in ASB routines. + +2003/09/16: IFC 7.0 had a strange behaviour in the test programs: + sometimes the declaration of PARTS dummy argument with an + INTERFACE would not work, requiring an EXTERNAL + declaration. The proper INTERFACE works now with 7.1. + +2003/03/10: Halo data exchange in F90_PSHALO can now be applied to + integer data; create appropriate support routines. + +2002/12/05: Initial version of Fileread sample programs. + +2002/11/19: Fixes for JAD preconditioner. + +2002/11/19: Methods for patterns: create a descriptor without a + matrix. + +2001/11/16: Reviewed the interfaces: in the tools section we really + need the POINTER attribute for dense vectors, but not in + the computational routines; taking it out allows more + flexibility. + +2001/09/16: Smart update capabilities. + +2001/03/16: Renumbering routines. + +2001/01/14: Added extensions to compute multiple DOTs and AMAXs at once; + diff --git a/Make.inc b/Make.inc new file mode 100644 index 00000000..ae5835bd --- /dev/null +++ b/Make.inc @@ -0,0 +1,79 @@ +.mod=.mod +.SUFFIXES: .f90 $(.mod) + + +####################### Section 1 ####################### +# Define your compilers and compiler flags here # +########################################################## +F90=ifort +FC=ifort +CC=icc +F77=$(FC) +F90COPT=-g -CB -no_cpprt +FCOPT=-g -CB -no_cpprt +CCOPT=-g -CB -no_cpprt + +####################### Section 2 ####################### +# Define your linker and linker flags here # +########################################################## +F90LINK=/usr/local/mpich-intel/bin/mpif90 -g -CB -no_cpprt +FLINK=mpif77 -g -CB -no_cpprt +MPF90=/usr/local/mpich-intel/bin/mpif90 -g -CB -no_cpprt +MPCC=/usr/local/mpich-intel/bin/mpicc -g -CB -no_cpprt + +####################### Section 3 ####################### +# Specify paths to libraries # +########################################################## +BLAS=-lblas-intel -L$(HOME)/NUMERICAL/LIB +BLACS=-lmpiblacs-intel -L$(HOME)/NUMERICAL/LIB + + +####################### Section 4 ####################### +# Other useful tools&defines # +########################################################## +CDEFINES=-DAdd_ +AR=ar -cur +RANLIB=ranlib + + + +####################### Section 5 ####################### +# Do not edit this # +########################################################## +LIBDIR = lib +PSBLASLIB = libpsblas.a +TOOLSLIB = libpsbtools.a +COMMLIB = libpsbcomm.a +METHDLIB = libpsbmethd.a +PRECLIB = libpsbprec.a + +TYPEMODS = psb_spmat_type$(.mod) psb_desc_type$(.mod) psb_prec_type$(.mod) psb_realloc_mod$(.mod) +CONSTMODS = psb_tools_const$(.mod) +BLASMODS = $(TYPEMODS) psb_psblas_mod$(.mod) psb_comm_mod$(.mod) +METHDMODS = psb_methd_mod$(.mod) +TOOLSMODS = $(CONSTMODS) psi_mod$(.mod) psb_tools_mod$(.mod) psb_serial_mod$(.mod) +PRECMODS = psb_prec_mod$(.mod) +ERRORMODS = psb_error_mod$(.mod) +F90MODS= $(BLASMODS) $(PRECMODS) $(METHDMODS) $(TOOLSMODS) $(ERRORMODS) string$(.mod) + +MODS=$(LIBDIR)/psb_tools_const$(.mod) $(LIBDIR)/psb_spmat_type$(.mod) $(LIBDIR)/psb_realloc_mod$(.mod) \ + $(LIBDIR)/psb_desc_type$(.mod) $(LIBDIR)/psb_prec_type$(.mod) $(LIBDIR)/parts.f90 \ + $(LIBDIR)/psb_serial_mod$(.mod) $(LIBDIR)/psb_comm_mod$(.mod) $(LIBDIR)/psb_error_mod$(.mod) + +# Under Linux/gmake there is a rule interpreting .mod as Modula source! +$(.mod).o: + +.f.o: + $(FC) $(FCOPT) -I $(INCDIRS) -c $< +.c.o: + $(CC) $(CCOPT) -I $(INCDIRS) $(CDEFINES) -c $< +.f$(.mod): + $(F90) $(FCOPT) -I $(INCDIRS) -c $< +.f90$(.mod): + $(F90) $(F90COPT) -I $(INCDIRS) -c $< +.f90.o: + $(F90) $(F90COPT) -I $(INCDIRS) -c $< + + + + diff --git a/Make.inc.g95 b/Make.inc.g95 new file mode 100644 index 00000000..417f962b --- /dev/null +++ b/Make.inc.g95 @@ -0,0 +1,54 @@ +# Using GNU gfortran (from GCC 3.5.0) +.mod=.mod +.SUFFIXES: .f90 $(.mod) +F90=/usr/local/g95-install/bin/g95 +FC=/usr/local/g95-install/bin/g95 +F77=$(FC) +F90COPT=-O3 -ggdb -fbounds-check +FCOPT=-O3 -ggdb -fbounds-check +CC=gcc +CCOPT=-O3 -ggdb +F90LINK=/usr/local/mpich-g95/bin/mpif90 +FLINK=mpif77 +MPF90=$(F90LINK) +MPCC=/usr/local/mpich-g95/bin/mpicc +# +# +BLAS=-lblas -L$(HOME)/LIB +BLACS=-lmpiblacsg95 -L$(HOME)/LIB +# +CDEFINES=-DAddDouble_ +AR=ar -cur +RANLIB=ranlib + +TYPEMODS = typesp$(.mod) typedesc$(.mod) typeprec$(.mod) realloc$(.mod) +CONSTMODS = tools_const$(.mod) +BLASMODS = $(TYPEMODS) f90psblas$(.mod) f90comm$(.mod) +METHDMODS = f90methd$(.mod) +TOOLSMODS = $(CONSTMODS) psimod$(.mod) f90tools$(.mod) f90serial$(.mod) string$(.mod) +PRECMODS = f90prec$(.mod) +F90MODS= $(BLASMODS) $(PRECMODS) $(METHDMODS) $(TOOLSMODS) f90sparse$(.mod) + +MODS=$(LIBDIR)/tools_const$(.mod) $(LIBDIR)/typesp$(.mod) $(LIBDIR)/realloc$(.mod) \ + $(LIBDIR)/typedesc$(.mod) $(LIBDIR)/typeprec$(.mod) $(LIBDIR)/parts.f90 \ + $(LIBDIR)/f90serial$(.mod) $(LIBDIR)/f90comm$(.mod) $(LIBDIR)/string$(.mod) + + + +# Under Linux/gmake there is a rule interpreting .mod as Modula source! +$(.mod).o: + +.f.o: + $(FC) $(FCOPT) $(INCDIRS) -c $< +.c.o: + $(CC) $(CCOPT) $(INCDIRS) $(CDEFINES) -c $< +.f$(.mod): + $(F90) $(FCOPT) $(INCDIRS) -c $< +.f90$(.mod): + $(F90) $(F90COPT) $(INCDIRS) -c $< +.f90.o: + $(F90) $(F90COPT) $(INCDIRS) -c $< + + + + diff --git a/Make.inc.gfortran b/Make.inc.gfortran new file mode 100644 index 00000000..9b313eb5 --- /dev/null +++ b/Make.inc.gfortran @@ -0,0 +1,62 @@ +# Using GNU gfortran (from GCC 3.5.0) +.mod=.mod +.SUFFIXES: .f90 $(.mod) +F90=/usr/local/gfortran/bin/gfortran +FC=/usr/local/gfortran/bin/gfortran +F77=$(FC) +F90COPT=-O3 -ffast-math -march=pentium4 -msse2 -mfpmath=sse +FCOPT=-O3 -ffast-math -march=pentium4 -msse2 -mfpmath=sse +CC=/usr/local/gfortran/bin/gcc +CCOPT=-O3 -ffast-math -march=pentium4 -msse2 -mfpmath=sse +F90LINK=/usr/local/mpich-gfortran/bin/mpif90 +FLINK=mpif77 +MPF90=$(F90LINK) +MPCC=/usr/local/mpich-gfortran/bin/mpicc +# +# +BLAS=-lblas -L$(HOME)/LIB +BLACS=-lmpiblacs-gfortran -L$(HOME)/LIB +# +# Comment these, and uncomment SLUDEF below if you don't want SuperLU. +SLUDIR=/usr/local/SuperLU_3.0 +SLU=-lslu_lx_gfort -L$(SLUDIR) +SLUDEF=-DHave_SLU_ -I$(SLUDIR) +# SLUDEF= +CDEFINES=-DAdd_ $(SLUDEF) + +AR=ar -cur +RANLIB=ranlib + +TYPEMODS = typesp$(.mod) typedesc$(.mod) typeprec$(.mod) realloc$(.mod) +CONSTMODS = tools_const$(.mod) +BLASMODS = $(TYPEMODS) f90psblas$(.mod) f90comm$(.mod) +METHDMODS = f90methd$(.mod) +TOOLSMODS = $(CONSTMODS) psimod$(.mod) f90tools$(.mod) f90serial$(.mod) string$(.mod) +PRECMODS = f90prec$(.mod) +ERRORMODS = errormod$(.mod) +F90MODS= $(BLASMODS) $(PRECMODS) $(METHDMODS) $(TOOLSMODS) f90sparse$(.mod) $(ERRORMODS) + +MODS=$(LIBDIR)/tools_const$(.mod) $(LIBDIR)/typesp$(.mod) $(LIBDIR)/realloc$(.mod) \ + $(LIBDIR)/typedesc$(.mod) $(LIBDIR)/typeprec$(.mod) $(LIBDIR)/parts.f90 \ + $(LIBDIR)/f90serial$(.mod) $(LIBDIR)/f90comm$(.mod) $(LIBDIR)/string$(.mod)\ + $(LIBDIR)/errormod$(.mod) + + + +# Under Linux/gmake there is a rule interpreting .mod as Modula source! +$(.mod).o: + +.f.o: + $(FC) $(FCOPT) $(INCDIRS) -c $< +.c.o: + $(CC) $(CCOPT) $(INCDIRS) $(CDEFINES) -c $< +.f$(.mod): + $(F90) $(FCOPT) $(INCDIRS) -c $< +.f90$(.mod): + $(F90) $(F90COPT) $(INCDIRS) -c $< +.f90.o: + $(F90) $(F90COPT) $(INCDIRS) -c $< + + + + diff --git a/Make.inc.ifc71 b/Make.inc.ifc71 new file mode 100644 index 00000000..73023219 --- /dev/null +++ b/Make.inc.ifc71 @@ -0,0 +1,54 @@ +# Using Intel Fortran compiler version 7.0 +.mod=.mod +.SUFFIXES: .f90 $(.mod) +F90=${IFC7}/bin/ifc +FC=${IFC7}/bin/ifc +F77=$(FC) +F90COPT=-O3 +FCOPT=-O3 +CC=gcc +CCOPT=-O3 +F90LINK=/usr/local/mpich-ifc71/bin/mpif90 +FLINK=mpif77 +MPF90=/usr/local/mpich-ifc71/bin/mpif90 +MPCC=/usr/local/mpich-ifc71/bin/mpicc +# +# +BLAS=-lblas -L$(HOME)/LIB +BLACS=-lmpiblacsifc71 -L$(HOME)/LIB +SLU=-lslu_lx_ifc8 +# +CDEFINES=-DAdd_ +AR=ar -cur +RANLIB=ranlib + +TYPEMODS = TYPESP$(.mod) TYPEDESC$(.mod) TYPEPREC$(.mod) REALLOC$(.mod) +CONSTMODS = TOOLS_CONST$(.mod) +BLASMODS = $(TYPEMODS) F90PSBLAS$(.mod) F90COMM$(.mod) +METHDMODS = F90METHD$(.mod) +TOOLSMODS = $(CONSTMODS) PSIMOD$(.mod) F90TOOLS$(.mod) F90SERIAL$(.mod) STRING$(.mod) +PRECMODS = F90PREC$(.mod) +F90MODS= $(BLASMODS) $(PRECMODS) $(METHDMODS) $(TOOLSMODS) F90SPARSE$(.mod) + +MODS=$(LIBDIR)/TOOLS_CONST$(.mod) $(LIBDIR)/TYPESP$(.mod) $(LIBDIR)/REALLOC$(.mod) \ + $(LIBDIR)/TYPEDESC$(.mod) $(LIBDIR)/parts.f90 $(LIBDIR)/STRING$(.mod) + + + +# Under Linux/gmake there is a rule interpreting .mod as Modula source! +$(.mod).o: + +.f.o: + $(FC) $(FCOPT) $(INCDIRS) -c $< +.c.o: + $(CC) $(CCOPT) $(INCDIRS) $(CDEFINES) -c $< +.f$(.mod): + $(F90) $(FCOPT) $(INCDIRS) -c $< +.f90$(.mod): + $(F90) $(F90COPT) $(INCDIRS) -c $< +.f90.o: + $(F90) $(F90COPT) $(INCDIRS) -c $< + + + + diff --git a/Make.inc.ifc8 b/Make.inc.ifc8 new file mode 100644 index 00000000..857a66e7 --- /dev/null +++ b/Make.inc.ifc8 @@ -0,0 +1,63 @@ + +# Using Intel Fortran compiler version 8.0 +.mod=.mod +.SUFFIXES: .f90 $(.mod) +F90=${IFC8}/bin/ifort +FC=${IFC8}/bin/ifort +F77=$(FC) +F90COPT=-O3 +FCOPT=-O3 +CC=gcc +CCOPT=-O3 -g +F90LINK=/usr/local/mpich-ifc80/bin/mpif90 +FLINK=mpif77 +MPF90=/usr/local/mpich-ifc80/bin/mpif90 +MPCC=/usr/local/mpich-ifc80/bin/mpicc +# +# +BLAS=-lblas -L$(HOME)/LIB +BLACS=-lmpiblacsifc80 -L$(HOME)/LIB +# Comment these, and uncomment SLUDEF below if you don't want SuperLU. +SLUDIR=/usr/local/SuperLU_3.0 +SLU=-lslu_lx_ifc8 -L$(SLUDIR) +SLUDEF=-DHave_SLU_ -I$(SLUDIR) +# SLUDEF= +CDEFINES=-DAdd_ $(SLUDEF) + + +AR=ar -cur +RANLIB=ranlib + +TYPEMODS = typesp$(.mod) typedesc$(.mod) typeprec$(.mod) realloc$(.mod) +CONSTMODS = tools_const$(.mod) +BLASMODS = $(TYPEMODS) f90psblas$(.mod) f90comm$(.mod) +METHDMODS = f90methd$(.mod) +TOOLSMODS = $(CONSTMODS) psimod$(.mod) f90tools$(.mod) f90serial$(.mod) string$(.mod) +PRECMODS = f90prec$(.mod) +ERRORMODS = errormod$(.mod) +F90MODS= $(BLASMODS) $(PRECMODS) $(METHDMODS) $(TOOLSMODS) f90sparse$(.mod) $(ERRORMODS) + +MODS=$(LIBDIR)/tools_const$(.mod) $(LIBDIR)/typesp$(.mod) $(LIBDIR)/realloc$(.mod) \ + $(LIBDIR)/typedesc$(.mod) $(LIBDIR)/typeprec$(.mod) $(LIBDIR)/parts.f90 \ + $(LIBDIR)/f90serial$(.mod) $(LIBDIR)/f90comm$(.mod) $(LIBDIR)/string$(.mod)\ + $(LIBDIR)/errormod$(.mod) + + + +# Under Linux/gmake there is a rule interpreting .mod as Modula source! +$(.mod).o: + +.f.o: + $(FC) $(FCOPT) $(INCDIRS) -c $< +.c.o: + $(CC) $(CCOPT) $(INCDIRS) $(CDEFINES) -c $< +.f$(.mod): + $(F90) $(FCOPT) $(INCDIRS) -c $< +.f90$(.mod): + $(F90) $(F90COPT) $(INCDIRS) -c $< +.f90.o: + $(F90) $(F90COPT) $(INCDIRS) -c $< + + + + diff --git a/Make.inc.lahey b/Make.inc.lahey new file mode 100644 index 00000000..56c24ba9 --- /dev/null +++ b/Make.inc.lahey @@ -0,0 +1,52 @@ +# Using Lahey F95 +.mod=.mod +.SUFFIXES: .f90 $(.mod) +F90=lf95 +FC=lf95 +F77=$(FC) +FCOPT= -O +F90COPT= -O +CC=gcc +CCOPT=-O2 -g -ggdb -pg +F90LINK=mpif90 +FLINK=mpif77 +# +# +BLAS=-lblas -L$(HOME)/LIB +BLACS=-lmpiblacslh -L$(HOME)/LIB + +# +CDEFINES=-DAdd_ +AR=ar -cur +RANLIB=ranlib + +TYPEMODS = typesp$(.mod) typedesc$(.mod) +CONSTMODS = tools_const$(.mod) +BLASMODS = $(TYPEMODS) f90psblas$(.mod) +METHDMODS = f90methd$(.mod) +TOOLSMODS = $(CONSTMODS) f90tools$(.mod) +F90MODS= $(BLASMODS) $(METHDMODS) $(TOOLSMODS) f90sparse$(.mod) + +MODS=$(LIBDIR)/tools_const$(.mod) $(LIBDIR)/typesp$(.mod) \ + $(LIBDIR)/typedesc$(.mod) $(LIBDIR)/parts.f90 + + + + +# Under Linux/gmake there is a rule interpreting .mod as Modula source! +$(.mod).o: + +.f.o: + $(FC) $(FCOPT) $(INCDIRS) -c $< +.c.o: + $(CC) $(CCOPT) $(INCDIRS) $(CDEFINES) -c $< +.f$(.mod): + $(F90) $(FCOPT) $(INCDIRS) -c $< +.f90$(.mod): + $(F90) $(F90COPT) $(INCDIRS) -c $< +.f90.o: + $(F90) $(F90COPT) $(INCDIRS) -c $< + + + + diff --git a/Make.inc.nagf95 b/Make.inc.nagf95 new file mode 100644 index 00000000..04c2d066 --- /dev/null +++ b/Make.inc.nagf95 @@ -0,0 +1,58 @@ +# Using GNU gfortran (from GCC 3.5.0) +.mod=.mod +.SUFFIXES: .f90 $(.mod) +F90=/opt/nag/bin/f95 +FC=/opt/nag/bin/f95 +F77=$(FC) +F90COPT=-O3 -mismatch +FCOPT=-O3 -dusty +CC=gcc +CCOPT=-O3 +F90LINK=$(HOME)/mpich-nag/bin/mpif90 +FLINK=$(HOME)/mpich-nag/bin/mpif77 +MPF90=$(F90LINK) -mismatch +MPCC=$(HOME)/mpich-nag/bin/mpicc +# +# +BLAS=-lblasnag -L$(HOME)/LIB +BLACS=-lmpiblacs-nag -L$(HOME)/LIB +# +CDEFINES=-DAdd_ -DHave_SLU_ +SLUDIR=$(HOME)/SuperLU_3.0 +SLUINC=-I$(SLUDIR) +SLU=-lslu_lx_nag -L$(SLUDIR) + +AR=ar -cur +RANLIB=ranlib + +TYPEMODS = typesp$(.mod) typedesc$(.mod) typeprec$(.mod) realloc$(.mod) +CONSTMODS = tools_const$(.mod) +BLASMODS = $(TYPEMODS) f90psblas$(.mod) f90comm$(.mod) +METHDMODS = f90methd$(.mod) +TOOLSMODS = $(CONSTMODS) psimod$(.mod) f90tools$(.mod) f90serial$(.mod) string$(.mod) +PRECMODS = f90prec$(.mod) +F90MODS= $(BLASMODS) $(PRECMODS) $(METHDMODS) $(TOOLSMODS) f90sparse$(.mod) + +MODS=$(LIBDIR)/tools_const$(.mod) $(LIBDIR)/typesp$(.mod) $(LIBDIR)/realloc$(.mod) \ + $(LIBDIR)/typedesc$(.mod) $(LIBDIR)/typeprec$(.mod) $(LIBDIR)/parts.f90 \ + $(LIBDIR)/f90serial$(.mod) $(LIBDIR)/f90comm$(.mod) $(LIBDIR)/string$(.mod) + + + +# Under Linux/gmake there is a rule interpreting .mod as Modula source! +$(.mod).o: + +.f.o: + $(FC) $(FCOPT) $(INCDIRS) -c $< +.c.o: + $(CC) $(CCOPT) $(INCDIRS) $(CDEFINES) -c $< +.f$(.mod): + $(F90) $(FCOPT) $(INCDIRS) -c $< +.f90$(.mod): + $(F90) $(F90COPT) $(INCDIRS) -c $< +.f90.o: + $(F90) $(F90COPT) $(INCDIRS) -c $< + + + + diff --git a/Make.inc.pgf90 b/Make.inc.pgf90 new file mode 100644 index 00000000..f008e348 --- /dev/null +++ b/Make.inc.pgf90 @@ -0,0 +1,54 @@ +# Using PGI Fortran compilers +.mod=.mod +.SUFFIXES: .f90 $(.mod) +F90=pgf90 +FC=pgf90 +F77=$(FC) +F90COPT=-fast -g77libs +FCOPT=-fast -g77libs +CC=gcc +CCOPT=-O2 -g -ggdb -pg +F90LINK=/usr/local/mpich-pgi/bin/mpif90 +FLINK=/usr/local/mpich-pgi/bin/mpif77 +MPF90=/usr/local/mpich-pgi/bin/mpif90 +MPCC=/usr/local/mpich-pgi/bin/mpicc +# +# +BLAS=-lblas -L$(HOME)/LIB +BLACS=-lmpiblacspgi -L$(HOME)/LIB + +# +CDEFINES=-DAdd_ +AR=ar -cur +RANLIB=ranlib + +TYPEMODS = typesp$(.mod) typedesc$(.mod) typeprec$(.mod) realloc$(.mod) +CONSTMODS = tools_const$(.mod) +BLASMODS = $(TYPEMODS) f90psblas$(.mod) +METHDMODS = f90methd$(.mod) +TOOLSMODS = $(CONSTMODS) psimod$(.mod) f90tools$(.mod) +PRECMODS = f90prec$(.mod) +F90MODS= $(BLASMODS) $(PRECMODS) $(METHDMODS) $(TOOLSMODS) f90sparse$(.mod) + +MODS=$(LIBDIR)/tools_const$(.mod) $(LIBDIR)/typesp$(.mod) $(LIBDIR)/realloc$(.mod) \ + $(LIBDIR)/typedesc$(.mod) $(LIBDIR)/typeprec$(.mod) $(LIBDIR)/parts.f90 + + + +# Under Linux/gmake there is a rule interpreting .mod as Modula source! +$(.mod).o: + +.f.o: + $(FC) $(FCOPT) $(INCDIRS) -c $< +.c.o: + $(CC) $(CCOPT) $(INCDIRS) $(CDEFINES) -c $< +.f$(.mod): + $(F90) $(FCOPT) $(INCDIRS) -c $< +.f90$(.mod): + $(F90) $(F90COPT) $(INCDIRS) -c $< +.f90.o: + $(F90) $(F90COPT) $(INCDIRS) -c $< + + + + diff --git a/Make.inc.rs6k b/Make.inc.rs6k new file mode 100644 index 00000000..344bf64f --- /dev/null +++ b/Make.inc.rs6k @@ -0,0 +1,50 @@ +# Using XLF +.mod=.mod +.SUFFIXES: .f90 $(.mod) +F90=xlf95 -qsuffix=f=f90 +FC=xlf +F77=$(FC) +FCOPT=-O3 +CC=xlc +CCOPT=-O3 +F90LINK=mpxlf90 +MPF90=mpxlf95 -qsuffix=f=f90 +FLINK=mpxlf77 +MPCC=mpxlc +# +# +BLAS=-lessl +BLACS=-lmpiblacs -L$(HOME)/LIB +# +CDEFINES=-DNoChange +AR=ar -cur +RANLIB=ranlib + +TYPEMODS = typesp$(.mod) typedesc$(.mod) typeprec$(.mod) realloc$(.mod) +CONSTMODS = tools_const$(.mod) +BLASMODS = $(TYPEMODS) f90psblas$(.mod) f90comm$(.mod) +METHDMODS = f90methd$(.mod) +TOOLSMODS = $(CONSTMODS) psimod$(.mod) f90tools$(.mod) f90serial$(.mod) +PRECMODS = f90prec$(.mod) +F90MODS= $(BLASMODS) $(PRECMODS) $(METHDMODS) $(TOOLSMODS) f90sparse$(.mod) + +MODS=$(LIBDIR)/tools_const$(.mod) $(LIBDIR)/typesp$(.mod) $(LIBDIR)/realloc$(.mod) \ + $(LIBDIR)/typedesc$(.mod) $(LIBDIR)/typeprec$(.mod) $(LIBDIR)/parts.f90 \ + $(LIBDIR)/f90serial$(.mod) $(LIBDIR)/f90comm$(.mod) + + + +.f.o: + $(FC) $(FCOPT) $(INCDIRS) -c $< +.c.o: + $(CC) $(CCOPT) $(INCDIRS) $(CDEFINES) -c $< +.f$(.mod): + $(FC) $(FCOPT) $(INCDIRS) -c $< +.f90$(.mod): + $(F90) $(FCOPT) $(INCDIRS) -c $< +.f90.o: + $(F90) $(FCOPT) $(INCDIRS) -c $< + + + + diff --git a/Make.inc.vast b/Make.inc.vast new file mode 100644 index 00000000..3c8eade2 --- /dev/null +++ b/Make.inc.vast @@ -0,0 +1,53 @@ +# Using VAST F90 +.mod=.vo +.SUFFIXES: .f90 $(.mod) +F90=/usr/local/VASTF90/f90 +FC=/usr/local/VASTF90/f90 +F77=$(FC) +FCOPT=-O2 -ff90 -g -ggdb -pg +#-march=pentium4 -mfpmath=sse #You may want to use these +CC=gcc +CCOPT=-O2 -g -ggdb -pg +F90LINK=/usr/local/mpich-vast/bin/mpif90 +FLINK=mpif77 +MPF90=/usr/local/mpich-vast/bin/mpif90 +MPCC=/usr/local/mpich-vast/bin/mpicc +# +# +BLAS=-lblas -L$(HOME)/LIB +BLACS=-lmpiblacsvast -L$(HOME)/LIB +# +CDEFINES=-DAdd_ +AR=ar -cur +RANLIB=ranlib + +TYPEMODS = typesp$(.mod) typedesc$(.mod) typeprec$(.mod) realloc$(.mod) +CONSTMODS = tools_const$(.mod) +BLASMODS = $(TYPEMODS) f90psblas$(.mod) +METHDMODS = f90methd$(.mod) +TOOLSMODS = $(CONSTMODS) psimod$(.mod) f90tools$(.mod) +PRECMODS = f90prec$(.mod) +F90MODS= $(BLASMODS) $(PRECMODS) $(METHDMODS) $(TOOLSMODS) f90sparse$(.mod) + +MODS=$(LIBDIR)/tools_const$(.mod) $(LIBDIR)/typesp$(.mod) $(LIBDIR)/realloc$(.mod) \ + $(LIBDIR)/typedesc$(.mod) $(LIBDIR)/typeprec$(.mod) $(LIBDIR)/parts.f90 + + + +# Under Linux/gmake there is a rule interpreting .mod as Modula source! +$(.mod).o: + +.f.o: + $(FC) $(FCOPT) $(INCDIRS) -c $< +.c.o: + $(CC) $(CCOPT) $(INCDIRS) $(CDEFINES) -c $< +.f$(.mod): + $(F90) $(FCOPT) $(INCDIRS) -c $< +.f90$(.mod): + $(F90) $(FCOPT) $(INCDIRS) -c $< +.f90.o: + $(F90) $(FCOPT) $(INCDIRS) -c $< + + + + diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..cea20e2f --- /dev/null +++ b/Makefile @@ -0,0 +1,11 @@ +include Make.inc + +lib: + ( [ -d lib ] || mkdir lib) + (cd src; make lib) +clean: + (cd src; make clean) +veryclean: + (cd src; make veryclean) + (cd lib; /bin/rm -f *.a *$(.mod) V*.inc *.pc *.pcl) + diff --git a/README b/README new file mode 100644 index 00000000..6b699c67 --- /dev/null +++ b/README @@ -0,0 +1,143 @@ +This directory contains the PSBLAS library, version 1.0 +The library is described in: +S. Filippone, M. Colajanni +PSBLAS: A library for parallel linear algebra computation on sparse matrices +ACM Trans. on Math. Software, 26(4), Dec. 2000, pp. 527-550. + +PLATFORMS: + +For the F77 compiler, we assume it supports DOUBLE COMPLEX and DO WHILE/ENDDO. +Practically all compilers do nowadays. +The compilation process relies on the choice of an appropriate +Make.inc file; we have tested with AIX XLF, Intel ifc/Linux, Lahey +F95/Linux and Nag f95/Linux. If you succeed in compiling with other +compiler/operating systems please let us know. + +IBM SP2. +The library has been tested on an IBM SP2 with XLC and XLF +compilers, and a version of the BLACS based on MPI. +The rather baroque setting + F90=xlf90 -qsuffix=f=f90 +in Make.inc.rs6k takes care of the f90 extension. +WARNING: xlf 8.1 introduced a performance bug, whereas a Fortan 90 +code calling a Fortan 77 code would incur spurious array copies; +please make sure your system has the PTF xlf 8102 installed. + + +LINUX: + +There finally exist a GNU Fortran 95 implementation: we are using the +development snapshots from GCC 3.5.0 since July 2004, and it appears +to work. This is on its way to become the reference platform. + +We compiled with egcs G77/GCC and mpich-1.2.0, 1.2.1, 1.2.2 and 1.2.4. +With some versions of GCC, g77 chokes on SRC/SERIAL/dcsdp.f; this +problem seems to have disappeared in recent GCC distributions. + + + +Vast F90/Linux and PGI f90/Linux are now obsolete, in that we have a +NULL() initialization for pointers that is outsied strict F90; they +cannot be used any longer. +For the PGI compilers, we used them in conjunction with gcc, NOT +pgcc. Note that with pgi 3.6 we have horrible performance, due to +spurious array copies when calling Fortran 77 codes from Fortran 90; +this is fixed in version 4. + +The Lahey version we got access to (6.0 and 6.1) seems to suffer from +the same extra copies problem; this is most apparent in the matrix +build process. + +For the Intel compilers, we used ifc version 7.0 and 7.1; with version 6.0 +you need to change the way modules are handled, but we recommend to migrate +to the new version anyway. Moreover, with versions prior to 7.1, there +is a strange error in pargen/ppde90: the compiler did not like the +INTERFACE for the dummy argument subroutine PARTS, it wanted an +EXTERNAL specification. Again, please move to 7.1. + + +Testing of NAG f95 versions is still incomplete. + + + +DOCUMENTATION + +See userguidef90.ps. +Please consult the sample programs, especially TEST/pargen/ppde90.f90. + + +OTHER SOFTWARE CREDITS + +We include our modified implementation of some of the Sparker (serial +sparse BLAS) material, e.g. Jagged diagonal, plus a number of +extensions of our own design. The original file spblas.f can be +downloaded from matisa.cc.rl.ac.uk; of course any bugs in our +implementation are our own to fix. The main reference for the serial +sparse BLAS is: +Duff, I., Marrone, M., Radicati, G., and Vittoli, C. +Level 3 basic linear algebra subprograms for sparse matrices: a user +level interface +ACM Trans. Math. Softw., 23(3), 379-401, 1997. + +We have had good results with the METIS library, which can be +obtained from +http://www-users.cs.umn.edu/~karypis/metis/metis/main.html + + + +NEW: + +- Reviewed the interfaces: in the tools section we really need the + POINTER attribute for dense vectors, but not in the computational + routines; taking it out allows more flexibility. Besides, this acts + as a workaround for a bug on Linux/VAST: as of version + "vf90 Personal V3.4N5" the DOT and NRM functions had to be + transformed into subroutines whenever the vectors had the POINTER + attribute; now both subroutine and function versions work. + +- Added more methods: CGS, BiCGSTAB(L), BiCG + +- We now have a new Preconditioner F90 routine; diagonal scaling works + with COO, CSR and JAD; ILU works with JAD, but is more expensive in terms + of memory space + +- We added some extensions to compute multiple DOTs and AMAXs at once; + they can be useful in solving vector equations (e.g. momentum + equation in fluid dynamics). +- Halo data exchange in F90_PSHALO can now be applied to integer data; + +- There is an update capability for cases where the same sparsity + pattern is reused (e.g. multiple time steps over a discretization + mesh which maintains a constant topology) + +- We added a test program to read and solve sparse matrices from files in + Matrix-Market format (for details see http://math.nist.gov/MatrixMarket/) + + + + +TODO: + + +1. Extend the C/F77 interface for character data (some Fortran + compilers don't just pass a pointer and a hidden length!) + +2. Provide more general factorizations. + + + + +The PSBLAS team. + + +Credits: +Salvatore Filippone +Michele Colajanni +Alfredo Buttari +Fabio Cerioni +Stefano Maiolatesi +Dario Pascucci + + + + diff --git a/docs/userguidef90.ps b/docs/userguidef90.ps new file mode 100644 index 00000000..3813d078 --- /dev/null +++ b/docs/userguidef90.ps @@ -0,0 +1,5018 @@ +%!PS-Adobe-2.0 +%%Creator: dvips(k) 5.86 Copyright 1999 Radical Eye Software +%%Title: userguidef90.dvi +%%Pages: 90 +%%PageOrder: Ascend +%%BoundingBox: 0 0 596 842 +%%EndComments +%DVIPSWebPage: (www.radicaleye.com) +%DVIPSCommandLine: dvips -o userguidef90.ps userguidef90.dvi +%DVIPSParameters: dpi=600, compressed +%DVIPSSource: TeX output 2003.10.06:1521 +%%BeginProcSet: texc.pro +%! +/TeXDict 300 dict def TeXDict begin/N{def}def/B{bind def}N/S{exch}N/X{S +N}B/A{dup}B/TR{translate}N/isls false N/vsize 11 72 mul N/hsize 8.5 72 +mul N/landplus90{false}def/@rigin{isls{[0 landplus90{1 -1}{-1 1}ifelse 0 +0 0]concat}if 72 Resolution div 72 VResolution div neg scale isls{ +landplus90{VResolution 72 div vsize mul 0 exch}{Resolution -72 div hsize +mul 0}ifelse TR}if Resolution VResolution vsize -72 div 1 add mul TR[ +matrix currentmatrix{A A round sub abs 0.00001 lt{round}if}forall round +exch round exch]setmatrix}N/@landscape{/isls true N}B/@manualfeed{ +statusdict/manualfeed true put}B/@copies{/#copies X}B/FMat[1 0 0 -1 0 0] +N/FBB[0 0 0 0]N/nn 0 N/IEn 0 N/ctr 0 N/df-tail{/nn 8 dict N nn begin +/FontType 3 N/FontMatrix fntrx N/FontBBox FBB N string/base X array +/BitMaps X/BuildChar{CharBuilder}N/Encoding IEn N end A{/foo setfont}2 +array copy cvx N load 0 nn put/ctr 0 N[}B/sf 0 N/df{/sf 1 N/fntrx FMat N +df-tail}B/dfs{div/sf X/fntrx[sf 0 0 sf neg 0 0]N df-tail}B/E{pop nn A +definefont setfont}B/Cw{Cd A length 5 sub get}B/Ch{Cd A length 4 sub get +}B/Cx{128 Cd A length 3 sub get sub}B/Cy{Cd A length 2 sub get 127 sub} +B/Cdx{Cd A length 1 sub get}B/Ci{Cd A type/stringtype ne{ctr get/ctr ctr +1 add N}if}B/id 0 N/rw 0 N/rc 0 N/gp 0 N/cp 0 N/G 0 N/CharBuilder{save 3 +1 roll S A/base get 2 index get S/BitMaps get S get/Cd X pop/ctr 0 N Cdx +0 Cx Cy Ch sub Cx Cw add Cy setcachedevice Cw Ch true[1 0 0 -1 -.1 Cx +sub Cy .1 sub]/id Ci N/rw Cw 7 add 8 idiv string N/rc 0 N/gp 0 N/cp 0 N{ +rc 0 ne{rc 1 sub/rc X rw}{G}ifelse}imagemask restore}B/G{{id gp get/gp +gp 1 add N A 18 mod S 18 idiv pl S get exec}loop}B/adv{cp add/cp X}B +/chg{rw cp id gp 4 index getinterval putinterval A gp add/gp X adv}B/nd{ +/cp 0 N rw exit}B/lsh{rw cp 2 copy get A 0 eq{pop 1}{A 255 eq{pop 254}{ +A A add 255 and S 1 and or}ifelse}ifelse put 1 adv}B/rsh{rw cp 2 copy +get A 0 eq{pop 128}{A 255 eq{pop 127}{A 2 idiv S 128 and or}ifelse} +ifelse put 1 adv}B/clr{rw cp 2 index string putinterval adv}B/set{rw cp +fillstr 0 4 index getinterval putinterval adv}B/fillstr 18 string 0 1 17 +{2 copy 255 put pop}for N/pl[{adv 1 chg}{adv 1 chg nd}{1 add chg}{1 add +chg nd}{adv lsh}{adv lsh nd}{adv rsh}{adv rsh nd}{1 add adv}{/rc X nd}{ +1 add set}{1 add clr}{adv 2 chg}{adv 2 chg nd}{pop nd}]A{bind pop} +forall N/D{/cc X A type/stringtype ne{]}if nn/base get cc ctr put nn +/BitMaps get S ctr S sf 1 ne{A A length 1 sub A 2 index S get sf div put +}if put/ctr ctr 1 add N}B/I{cc 1 add D}B/bop{userdict/bop-hook known{ +bop-hook}if/SI save N @rigin 0 0 moveto/V matrix currentmatrix A 1 get A +mul exch 0 get A mul add .99 lt{/QV}{/RV}ifelse load def pop pop}N/eop{ +SI restore userdict/eop-hook known{eop-hook}if showpage}N/@start{ +userdict/start-hook known{start-hook}if pop/VResolution X/Resolution X +1000 div/DVImag X/IEn 256 array N 2 string 0 1 255{IEn S A 360 add 36 4 +index cvrs cvn put}for pop 65781.76 div/vsize X 65781.76 div/hsize X}N +/p{show}N/RMat[1 0 0 -1 0 0]N/BDot 260 string N/Rx 0 N/Ry 0 N/V{}B/RV/v{ +/Ry X/Rx X V}B statusdict begin/product where{pop false[(Display)(NeXT) +(LaserWriter 16/600)]{A length product length le{A length product exch 0 +exch getinterval eq{pop true exit}if}{pop}ifelse}forall}{false}ifelse +end{{gsave TR -.1 .1 TR 1 1 scale Rx Ry false RMat{BDot}imagemask +grestore}}{{gsave TR -.1 .1 TR Rx Ry scale 1 1 false RMat{BDot} +imagemask grestore}}ifelse B/QV{gsave newpath transform round exch round +exch itransform moveto Rx 0 rlineto 0 Ry neg rlineto Rx neg 0 rlineto +fill grestore}B/a{moveto}B/delta 0 N/tail{A/delta X 0 rmoveto}B/M{S p +delta add tail}B/b{S p tail}B/c{-4 M}B/d{-3 M}B/e{-2 M}B/f{-1 M}B/g{0 M} +B/h{1 M}B/i{2 M}B/j{3 M}B/k{4 M}B/w{0 rmoveto}B/l{p -4 w}B/m{p -3 w}B/n{ +p -2 w}B/o{p -1 w}B/q{p 1 w}B/r{p 2 w}B/s{p 3 w}B/t{p 4 w}B/x{0 S +rmoveto}B/y{3 2 roll p a}B/bos{/SS save N}B/eos{SS restore}B end + +%%EndProcSet +TeXDict begin 39158280 55380996 1000 600 600 (userguidef90.dvi) +@start +%DVIPSBitmapFont: Fa cmex10 10 1 +/Fa 1 113 df<1B301B781BF8A2F201F0A2F203E0A2F207C0A2F20F80A2F21F00A21A3E +A262A262A24F5AA24F5AA24F5AA262190FA24FC7FCA2193EA261A261A24E5AA24E5AA24E +5AA24E5AA24EC8FCA2183EA260131001305E13F800014C5A1203D80FFC4B5A121DD838FE +4B5A12F0D8407F4B5A12004DC9FC6D7E173E6D7E5F6D7E5FA26D6C495AA26D6C495AA26D +6C5C1607A26D6C495AA2027F49CAFCA291383F803EA25EEC1FC05EEC0FE0EDE1F0EC07F1 +EDF3E0A26EB45AA26E5BA26E90CBFCA25D157E157C15384D64788353>112 +D E +%EndDVIPSBitmapFont +%DVIPSBitmapFont: Fb cmsy7 7 2 +/Fb 2 50 df0 D<017F157F2601FFE0903803FFC0000701F890 +380FF1F0260F83FC90381F0038261E00FF013C7F001890263F8078130C4890261FC0E07F +007090260FE1C07F0060EB07E3913803F780486DB4C7EA01806E5A157E157F81824B7E00 +60DAF7E0EB0300913801E3F0DBC3F85B6C90260381FC13066C90260F00FE5B001C011E90 +387F803C6C017C90381FE0F82607C7F86DB45A2601FFE0010313C06C6CC86CC7FC391B7C +9942>49 D E +%EndDVIPSBitmapFont +%DVIPSBitmapFont: Fc cmmi7 7 14 +/Fc 14 122 df<1238127C12FE12FFA2127F123B1203A31206A3120C1218123812701220 +08127A8614>59 D<903B3FFFF01FFFF8A2D901FCC7EAFE004A5CA2010314015F5CA20107 +14035F5CA2010F14075F5CA2011F140F91B65AA2913880000F013F141F5F91C7FCA24914 +3F94C7FC137EA201FE5C167E5BA2000115FE5E5BA200031401B539C07FFFE0A235287DA7 +36>72 D<000FB712E05A9039800FE007D81E009038C001C05A0038011F1300123000705C +00601501023F148012E0481400A2C74890C7FCA2147EA214FEA25CA21301A25CA21303A2 +5CA21307A25CA2130FA25CA2131F001FB57EA22B287DA727>84 D97 +D +99 D<130E131F5BA2133E131C90C7FCA7EA03E0487EEA0C78EA187C1230A212605B12C0 +A2EA01F0A3485AA2485AA2EBC180EA0F81A2381F0300A213066C5A131CEA07F06C5A1128 +7DA617>105 D<1407EC0F80141FA21500140E91C7FCA7EB03E0EB07F8EB0C3C1318EB30 +3E136013C0A248485AA2C7FCA25CA4495AA4495AA4495AA4495AA21238D87C1FC7FC12FC +133E485AEA70F8EA7FE0EA1F80193380A61B>I<133EEA07FEA2EA007CA213FCA25BA212 +01A25BA21203EC07809038E01FC0EC38600007EB61E014C3EBC187EBC307D80FC613C090 +38CC038001B8C7FC13E0487E13FEEB3F80EB0FC0486C7E1303003E1460A2127EECC0C012 +7CECC18012FC903801E30038F800FE0070137C1B297CA723>I<3B07801FC007E03B0FE0 +7FF01FF83B18F0E0F8783C3B30F1807CE03E903AFB007D801ED860FEEB3F005B49133E00 +C14A133E5B1201A24848495BA35F4848485A1830EE01F0A23C0F8003E003E060A218C093 +3801E180271F0007C013E3933800FF00000E6D48137C341B7D993B>109 +D<3907801FC0390FE07FF03918F0E0F83930F1807CEBFB00D860FE133C5B5B00C1147C5B +1201A248485BA34A5AEA07C01660EC03E0A23A0F8007C0C0A2EDC180913803C300D81F00 +13C7EC01FE000EEB00F8231B7D9929>I<9038F007C03901FC1FF039031E78780006EBE0 +3C90381FC01C000CEB801E14005B0018141F133E1200137E153E137CA213FC157C5B1578 +000114F0A2EC01E0EC03C03903FC07809038FE1F00EBE7FCEBE1F0D807E0C7FCA25BA212 +0FA25B121FEAFFF8A22025809922>112 D<3807803E390FE0FF803818F3C13930F703C0 +EBFE073860FC0F13F8158039C1F0070091C7FC1201A2485AA4485AA4485AA448C8FCA212 +0E1A1B7D991F>114 D<90387C03C03901FF0FF03907079C30390E03B078000CEBF0F800 +1813E1123015F0396007C0E015001200A2495AA449C7FC15301238007C1460EAFC3E15C0 +EAF87E39F06F03803970C70700383F83FE381F01F81D1B7D9926>120 +DI +E +%EndDVIPSBitmapFont +%DVIPSBitmapFont: Fd cmr7 7 3 +/Fd 3 59 df<140EB3A2B812E0A3C7000EC8FCB3A22B2B7DA333>43 +D<13381378EA01F8121F12FE12E01200B3AB487EB512F8A215267BA521>49 +D<1238127C12FEA3127C12381200AB1238127C12FEA3127C123807197B9813>58 +D E +%EndDVIPSBitmapFont +%DVIPSBitmapFont: Fe cmbx12 14.4 52 +/Fe 52 125 df12 D<151E153E157E15FCEC01F8EC07F0EC0FE0EC1FC01580143F +EC7F0014FE1301495A5C1307495AA2495A133F5C137FA2495AA24890C7FCA25A5BA21207 +A2485AA3121F5BA3123FA25BA3127FA55B12FFB3A3127F7FA5123FA37FA2121FA37F120F +A36C7EA21203A27F7EA26C7FA26D7EA2133F80131F6D7EA26D7E1303806D7E1300147FEC +3F80141F15C0EC0FE0EC07F0EC01F8EC00FC157E153E151E1F7973D934>40 +D<127012F8127C127E7EEA1FC06C7E6C7E12037F6C7E6C7E7F6D7E133F806D7EA26D7E80 +130780A26D7EA26D7EA215807FA215C0A2EC7FE0A315F0143FA315F8A2141FA315FCA514 +0F15FEB3A315FC141FA515F8A3143FA215F0A3147F15E0A3ECFFC0A21580A25B1500A249 +5AA2495AA25C130F5C495AA2495A5C137F49C7FC5B485A485A5B1207485A485A48C8FC12 +7E127C5A12701F7979D934>I45 D<913803FFC0023F13FC91B6 +FC010315C0010F018113F0903A1FFC003FF849486D7E49486D7E49486D7E48496D138048 +496D13C0A24817E04890C813F0A34817F8A24817FC49157FA3007F17FEA600FF17FFB3A5 +007F17FEA6003F17FCA26D15FFA26C17F8A36C17F0A26C6D4913E0A26C6D4913C06C1780 +6E5B6C6D4913006D6C495AD91FFCEB3FF8903A0FFF81FFF06D90B55A01011580D9003F01 +FCC7FC020313C0384F7BCD43>48 D<157815FC14031407141F14FF130F0007B5FCB6FCA2 +147F13F0EAF800C7FCB3B3B3A6007FB712FEA52F4E76CD43>II<91380FFF8091B512F8010314FE010F6E7E4901037F90267FF8007F4948EB3F +F048496D7E484980486F7E484980824817805A91C714C05A7013E0A218F0B5FCA318F8A6 +18FCA46C5DA37EA25E6C7F6C5DA26C5D6C7F6C6D137B6C6D13F390387FF803011FB512E3 +6D14C30103028313F89039007FFE03EC00401500A218F05EA3D801F816E0487E486C16C0 +487E486D491380A218005E5F4C5A91C7FC6C484A5A494A5A49495B6C48495BD803FC010F +5B9027FF807FFEC7FC6C90B55A6C6C14F06D14C0010F49C8FC010013F0364F7ACD43>57 +D<171F4D7E4D7EA24D7EA34C7FA24C7FA34C7FA34C7FA24C7FA34C8083047F80167E8304 +FE804C7E03018116F8830303814C7E03078116E083030F814C7E031F81168083033F8293 +C77E4B82157E8403FE824B800201835D840203834B800207835D844AB87EA24A83A3DA3F +80C88092C97E4A84A2027E8202FE844A82010185A24A820103854A82010785A24A82010F +855C011F717FEBFFFCB600F8020FB712E0A55B547BD366>65 DI<932601FFFCEC01C0047FD9FFC013030307B600F813 +07033F03FE131F92B8EA803F0203DAE003EBC07F020F01FCC7383FF0FF023F01E0EC0FF9 +4A01800203B5FC494848C9FC4901F8824949824949824949824949824990CA7E494883A2 +484983485B1B7F485B481A3FA24849181FA3485B1B0FA25AA298C7FC5CA2B5FCAE7EA280 +A2F307C07EA36C7FA21B0F6C6D1980A26C1A1F6C7F1C006C6D606C6D187EA26D6C606D6D +4C5A6D6D16036D6D4C5A6D6D4C5A6D01FC4C5A6D6DEE7F806D6C6C6C4BC7FC6E01E0EC07 +FE020F01FEEC1FF80203903AFFE001FFF0020091B612C0033F93C8FC030715FCDB007F14 +E0040101FCC9FC525479D261>IIII<932601FFFCEC01C0047FD9FFC0 +13030307B600F81307033F03FE131F92B8EA803F0203DAE003EBC07F020F01FCC7383FF0 +FF023F01E0EC0FF94A01800203B5FC494848C9FC4901F882494982494982494982494982 +4990CA7E494883A2484983485B1B7F485B481A3FA24849181FA3485B1B0FA25AA298C8FC +5CA2B5FCAE6C057FB712E0A280A36C94C7003FEBC000A36C7FA36C7FA27E6C7FA26C7F6C +7FA26D7E6D7F6D7F6D6D5E6D7F6D01FC93B5FC6D13FF6D6C6D5C6E01F0EC07FB020F01FE +EC1FF10203903AFFF001FFE0020091B6EAC07F033FEE001F030703FC1307DB007F02E013 +01040149CAFC5B5479D26A>III76 DII<93380FFFC00303B6FC031F15E092B712 +FC0203D9FC0013FF020F01C0010F13C0023F90C7000313F0DA7FFC02007F494848ED7FFE +4901E0ED1FFF49496F7F49496F7F4990C96C7F49854948707F4948707FA24849717E4886 +4A83481B804A83481BC0A2481BE04A83A2481BF0A348497113F8A5B51AFCAF6C1BF86E5F +A46C1BF0A26E5F6C1BE0A36C6D4D13C0A26C6D4D1380A26C1B006C6D4D5A6E5E6C626D6C +4C5B6D6D4B5B6D6D4B5B6D6D4B5B6D6D4B5B6D6D4B90C7FC6D6D4B5A6D01FF02035B023F +01E0011F13F0020F01FC90B512C0020390B7C8FC020016FC031F15E0030392C9FCDB001F +13E0565479D265>II82 +D<91260FFF80130791B500F85B010702FF5B011FEDC03F49EDF07F9026FFFC006D5A4801 +E0EB0FFD4801800101B5FC4848C87E48488149150F001F824981123F4981007F82A28412 +FF84A27FA26D82A27F7F6D93C7FC14C06C13F014FF15F86CECFF8016FC6CEDFFC017F06C +16FC6C16FF6C17C06C836C836D826D82010F821303010082021F16801400030F15C0ED00 +7F040714E01600173F050F13F08383A200788200F882A3187FA27EA219E07EA26CEFFFC0 +A27F6D4B13806D17006D5D01FC4B5A01FF4B5A02C04A5A02F8EC7FF0903B1FFFC003FFE0 +486C90B65AD8FC0393C7FC48C66C14FC48010F14F048D9007F90C8FC3C5479D24B>I<00 +3FBC1280A59126C0003F9038C0007F49C71607D87FF8060113C001E08449197F49193F90 +C8171FA2007E1A0FA3007C1A07A500FC1BE0481A03A6C994C7FCB3B3AC91B912F0A55351 +7BD05E>III<003FB7D88003B7FCA5 +D8000749C8000701F8C7FC6D6D9238007F806D6E93C8FC7015FE6D17016E6D5D704A5A6E +16076E6D4A5A6E6D5D4F5A6E6D143F6E6D4A5A7191C9FC6E16FE6EECC00171485A6F5D6F +6D485A6FEBF80F71485A6F5D6F6D485AEFFF7F6F4ACAFC6F5C6F5CA2705B705B8482707F +707FA2707F7080855E4C80855E4C80DC3FCF7F058F7FEE7F074C6C7FDB01FE814C7E4B48 +6C8003076E7F4B48814C7F4B486D7F033F824C7F4BC76C7F4B6E7F4A5A4B6E804A486E80 +0207844A48814B6F7F4A4883023F824A486F7F92C96C7F02FE840101830103718090263F +FFC084B76C0103B712F8A55D527CD166>88 DI97 DI<913801FFF8021FEBFF80 +91B612F0010315FC010F9038C00FFE903A1FFE0001FFD97FFC491380D9FFF05B4817C048 +495B5C5A485BA2486F138091C7FC486F1300705A4892C8FC5BA312FFAD127F7FA27EA2EF +03E06C7F17076C6D15C07E6E140F6CEE1F806C6DEC3F006C6D147ED97FFE5C6D6CEB03F8 +010F9038E01FF0010390B55A01001580023F49C7FC020113E033387CB63C>I<4DB47E04 +07B5FCA5EE001F1707B3A4913801FFE0021F13FC91B6FC010315C7010F9038E03FE74990 +380007F7D97FFC0101B5FC49487F4849143F484980485B83485B5A91C8FC5AA3485AA412 +FFAC127FA36C7EA37EA26C7F5F6C6D5C7E6C6D5C6C6D49B5FC6D6C4914E0D93FFED90FEF +EBFF80903A0FFFC07FCF6D90B5128F0101ECFE0FD9003F13F8020301C049C7FC41547CD2 +4B>I<913803FFC0023F13FC49B6FC010715C04901817F903A3FFC007FF849486D7E4948 +6D7E4849130F48496D7E48178048497F18C0488191C7FC4817E0A248815B18F0A212FFA4 +90B8FCA318E049CAFCA6127FA27F7EA218E06CEE01F06E14037E6C6DEC07E0A26C6DEC0F +C06C6D141F6C6DEC3F806D6CECFF00D91FFEEB03FE903A0FFFC03FF8010390B55A010015 +C0021F49C7FC020113F034387CB63D>IIII<137F497E000313E0487FA2487FA76C5BA26C5B +C613806DC7FC90C8FCADEB3FF0B5FCA512017EB3B3A6B612E0A51B547BD325>I108 DII<913801FFE0021F13FE91B612C0010315F0010F9038807FFC90 +3A1FFC000FFED97FF86D6C7E49486D7F48496D7F48496D7F4A147F48834890C86C7EA248 +83A248486F7EA3007F1880A400FF18C0AC007F1880A3003F18006D5DA26C5FA26C5F6E14 +7F6C5F6C6D4A5A6C6D495B6C6D495B6D6C495BD93FFE011F90C7FC903A0FFF807FFC6D90 +B55A010015C0023F91C8FC020113E03A387CB643>I<903A3FF001FFE0B5010F13FE033F +EBFFC092B612F002F301017F913AF7F8007FFE0003D9FFE0EB1FFFC602806D7F92C76C7F +4A824A6E7F4A6E7FA2717FA285187F85A4721380AC1A0060A36118FFA2615F616E4A5BA2 +6E4A5B6E4A5B6F495B6F4990C7FC03F0EBFFFC9126FBFE075B02F8B612E06F1480031F01 +FCC8FC030313C092CBFCB1B612F8A5414D7BB54B>I<90397FE003FEB590380FFF80033F +13E04B13F09238FE1FF89139E1F83FFC0003D9E3E013FEC6ECC07FECE78014EF150014EE +02FEEB3FFC5CEE1FF8EE0FF04A90C7FCA55CB3AAB612FCA52F367CB537>114 +D<903903FFF00F013FEBFE1F90B7FC120348EB003FD80FF81307D81FE0130148487F4980 +127F90C87EA24881A27FA27F01F091C7FC13FCEBFFC06C13FF15F86C14FF16C06C15F06C +816C816C81C681013F1580010F15C01300020714E0EC003F030713F015010078EC007F00 +F8153F161F7E160FA27E17E07E6D141F17C07F6DEC3F8001F8EC7F0001FEEB01FE9039FF +C00FFC6DB55AD8FC1F14E0D8F807148048C601F8C7FC2C387CB635>I<143EA6147EA414 +FEA21301A313031307A2130F131F133F13FF5A000F90B6FCB8FCA426003FFEC8FCB3A9EE +07C0AB011FEC0F8080A26DEC1F0015806DEBC03E6DEBF0FC6DEBFFF86D6C5B021F5B0203 +13802A4D7ECB34>III<007FB500F090387FFFFEA5C66C48C7000F90C7FC6D6CEC07F86D6D +5C6D6D495A6D4B5A6F495A6D6D91C8FC6D6D137E6D6D5B91387FFE014C5A6E6C485A6EEB +8FE06EEBCFC06EEBFF806E91C9FCA26E5B6E5B6F7E6F7EA26F7F834B7F4B7F92B5FCDA01 +FD7F03F87F4A486C7E4A486C7E020F7FDA1FC0804A486C7F4A486C7F02FE6D7F4A6D7F49 +5A49486D7F01076F7E49486E7E49486E7FEBFFF0B500FE49B612C0A542357EB447>120 +DI<001FB8FC1880A3912680007F130001FCC7B5FC01F0495B +495D49495B495B4B5B48C75C5D4B5B5F003E4A90C7FC92B5FC4A5B5E4A5B5CC7485B5E4A +5B5C4A5B93C8FC91B5FC495B5D4949EB0F805B495B5D495B49151F4949140092C7FC495A +485E485B5C485E485B4A5C48495B4815074849495A91C712FFB8FCA37E31357CB43C>I< +C312F8A4850480A286>124 D E +%EndDVIPSBitmapFont +%DVIPSBitmapFont: Ff cmti10 10 51 +/Ff 51 123 df44 D51 D<133C137E13FF5AA313FE13FCEA00701300B2120EEA3F8012 +7F12FFA31300127E123C102477A319>58 D<0007B812FE4817FFA26C17FECCFCAE007FB8 +12C0B912E0A26C17C03816779F40>61 D +65 D<0107B612FCEFFF8018C0903B000FF0001FF04BEB07F81703021F15FC17014B14FE +A2023F1400A24B1301A2147F18FC92C7120318F84A140718F04AEC0FE0EF1FC00101ED3F +80EF7F004AEB01FEEE07F849B612E05F9139F80007F0EE01FC01076E7E177F4AEC3F80A2 +010F16C0171F5CA2131F173F5CA2133FEF7F805C1800017F5D4C5A91C7485A5F49140FEE +1FE0494A5A00014AB45AB748C7FC16F816C037397BB83A>II<0103B612FEEFFFC018F0903B00 +07F8000FF84BEB03FCEF00FE020F157FF03F804B141F19C0021F150F19E05D1807143F19 +F05DA2147FA292C8FCA25C180F5CA2130119E04A151FA2130319C04A153FA20107178018 +7F4A1600A2010F16FEA24A4A5A60011F15034D5A4A5D4D5A013F4B5A173F4A4AC7FC17FC +017FEC03F84C5A91C7EA1FC04949B45A007F90B548C8FCB712F016803C397CB83F>I<01 +07B8FCA3903A000FF000034BEB007F183E141F181E5DA2143FA25D181C147FA292380003 +80A24A130718004A91C7FC5E13015E4A133E167E49B512FEA25EECF8000107147C163C4A +1338A2010F147818E04A13701701011F16C016004A14031880013F150718004A5CA2017F +151E173E91C8123C177C4915FC4C5A4914070001ED7FF0B8FCA25F38397BB838>I<0107 +B712FEA3903A000FF000074B1300187C021F153CA25DA2143FA25D1838147FA292C8FCEE +03804A130718004A91C7FCA201015CA24A131E163E010314FE91B5FC5EA2903807F80016 +7C4A1378A2130FA24A1370A2011F14F0A24A90C8FCA2133FA25CA2137FA291CAFCA25BA2 +5B487EB6FCA337397BB836>I<0103B5D8F80FB512E0A390260007F8C7381FE0004B5DA2 +020F153F615DA2021F157F96C7FC5DA2023F5D605DA2027F14016092C7FCA24A1403605C +A249B7FC60A202FCC712070103150F605CA20107151F605CA2010F153F605CA2011F157F +95C8FC5CA2013F5D5F5CA2017F14015F91C7FC491403007FD9FE01B512F8B55BA243397C +B83E>72 D<0103B512F8A390390007F8005DA2140FA25DA2141FA25DA2143FA25DA2147F +A292C7FCA25CA25CA21301A25CA21303A25CA21307A25CA2130FA25CA2131FA25CA2133F +A25CA2137FA291C8FC497EB6FCA25C25397CB820>I<0207B512F0A391390007FC006F5A +A215075EA3150F5EA3151F5EA3153F5EA3157F93C7FCA35D5DA314015DA314035DA31407 +A25DA2140FA2003F5C5A141F485CA24A5A12FC00E049C8FC14FE00705B495A6C485A381E +0FC06CB4C9FCEA01F82C3B78B82C>I<0103B500F890387FFFE0A21AC090260007F8C738 +0FFC004B15E061020F4BC7FC183E4B5C18F0021F4A5A4D5A4BEB0F804DC8FC023F143C5F +4B5B4C5A027FEB07C04CC9FCED001E5E4A5BED01FCECFE0315070101497E151FECFC7C4B +7E903903FDE07FDAFFC07F1580ED003F49488014F84A131F83130F160F4A801607011F81 +A24A130383133F16014A80A2017F6E7EA291C8FC494A7F007F01FE011F13FCB55CA24339 +7CB840>I<0107B512FCA25E9026000FF8C7FC5D5D141FA25DA2143FA25DA2147FA292C8 +FCA25CA25CA21301A25CA21303A25CA21307A25CA2130F170C4A141CA2011F153C17384A +1478A2013F157017F04A14E01601017F140317C091C71207160F49EC1F80163F4914FF00 +0102071300B8FCA25E2E397BB834>I<902607FFF8923807FFF0614F13E0D9000FEFF000 +4F5AA2021F167FF1EFC0141DDA1CFCEC01CF023C16DF9538039F800238ED071FA20278ED +0E3F97C7FC0270151CA202F04B5AF0707E14E0037E14E0010117FE4D485A02C0EC0380A2 +0103ED0701610280140EA20107ED1C0305385B14006F137049160705E05B010EEC01C0A2 +011E913803800F61011CEC0700A2013C020E131F4C5C1338ED1FB80178163F04F091C8FC +01705CA201F04A5B187E00015DD807F816FEB500C09039007FFFFC151E150E4C397AB84A +>I79 +D<0107B612F817FF1880903B000FF0003FE04BEB0FF0EF03F8141FEF01FC5DA2023F15FE +A25DA2147FEF03FC92C7FCA24A15F817074A15F0EF0FE01301EF1FC04AEC3F80EFFE0001 +034A5AEE0FF091B612C04CC7FCD907F8C9FCA25CA2130FA25CA2131FA25CA2133FA25CA2 +137FA291CAFCA25BA25B1201B512FCA337397BB838>I<0103B612F017FEEFFF80903B00 +07F8003FC04BEB0FF01707020FEC03F8EF01FC5DA2021F15FEA25DA2143FEF03FC5DA202 +7FEC07F818F092C7120F18E04AEC1FC0EF3F004A14FEEE01F80101EC0FE091B6128004FC +C7FC9138FC003F0103EC0F80834A6D7E8301071403A25C83010F14075F5CA2011F140FA2 +5CA2133F161F4AECE007A2017F160F180E91C7FC49020F131C007F01FE153CB5913807F0 +78040313F0CAEAFFE0EF3F80383B7CB83D>82 D<92383FC00E913901FFF01C020713FC91 +391FC07E3C91393F001F7C027CEB0FF84A130749481303495A4948EB01F0A2495AA2011F +15E091C7FCA34915C0A36E90C7FCA2806D7E14FCECFF806D13F015FE6D6D7E6D14E00100 +80023F7F14079138007FFC150F15031501A21500A2167C120EA3001E15FC5EA3003E4A5A +A24B5AA2007F4A5A4B5A6D49C7FC6D133ED8F9F013FC39F8FC03F839F07FFFE0D8E01F13 +8026C003FCC8FC2F3D7ABA2F>I<0007B812E0A25AD9F800EB001F01C049EB07C0485AD9 +00011403121E001C5C003C17801403123800785C00701607140700F01700485CA2140FC7 +92C7FC5DA2141FA25DA2143FA25DA2147FA292C9FCA25CA25CA21301A25CA21303A25CA2 +1307A25CA2130FA25CEB3FF0007FB512F8B6FCA2333971B83B>I<003FB539800FFFFEA3 +26007F80C7EA7F8091C8EA3F00173E49153CA2491538A20001167817705BA2000316F05F +5BA2000715015F5BA2000F15035F5BA2001F150794C7FC5BA2003F5D160E5BA2007F151E +161C90C8FCA2163C4815385A16781670A216F04B5A5E1503007E4A5A4BC8FC150E6C143E +6C6C5B15F0390FC003E03907F01FC00001B5C9FC38007FFCEB1FE0373B70B83E>III<49B5D8F007B5FCA3D9000790C713E0DA03FCEC7F00187C020115786F5C4D5A0200 +5D6F495A4DC7FC6F5BEE801E5F033F5BEEC0705F92381FC1C016E3EEE780DB0FEFC8FC16 +FE6F5A5EA2150382A2150782150F151CED3CFF5D4B7EDA01E07FEDC03FDA03807FEC0700 +020E131F021E805C4A130F0270805C49481307494880130749C71203011E81133E01FE81 +D807FF1407B500E090387FFFFC93B5FC6040397CB83E>II<14F8EB07FE90381F871C90383E03FE137CEBF801120148486C5A48 +5A120FEBC001001F5CA2EA3F801403007F5C1300A21407485C5AA2140F5D48ECC1C0A214 +1F15831680143F1587007C017F1300ECFF076C485B9038038F8E391F0F079E3907FE03FC +3901F000F0222677A42A>97 D<133FEA1FFFA3C67E137EA313FE5BA312015BA312035BA3 +1207EBE0F8EBE7FE9038EF0F80390FFC07C013F89038F003E013E0D81FC013F0A21380A2 +123F1300A214075A127EA2140F12FE4814E0A2141F15C05AEC3F80A215005C147E5C3878 +01F8007C5B383C03E0383E07C0381E1F80D80FFEC7FCEA01F01C3B77B926>I<147F9038 +03FFC090380FC1E090381F0070017E13784913383901F801F83803F003120713E0120FD8 +1FC013F091C7FC485AA2127F90C8FCA35A5AA45AA3153015381578007C14F0007EEB01E0 +003EEB03C0EC0F806CEB3E00380F81F83803FFE0C690C7FC1D2677A426>II<147F +903803FFC090380FC1E090383F00F0017E13785B485A485A485A120F4913F8001F14F038 +3F8001EC07E0EC1F80397F81FF00EBFFF891C7FC90C8FC5A5AA55AA21530007C14381578 +007E14F0003EEB01E0EC03C06CEB0F806CEB3E00380781F83803FFE0C690C7FC1D2677A4 +26>IIIII<150E153F157FA3157E151C1500ABEC1F80 +EC7FC0ECF1F0EB01C090380380F813071401130F130E131EEB1C03133C013813F0A2EB00 +07A215E0A2140FA215C0A2141FA21580A2143FA21500A25CA2147EA214FEA25CA21301A2 +5CA213035C121C387E07E0A238FE0FC05C49C7FCEAF83EEA787CEA3FF0EA0FC0204883B6 +19>IIIII<147F903803 +FFC090380FC1F090381F00F8017E137C5B4848137E4848133E0007143F5B120F485AA248 +5A157F127F90C7FCA215FF5A4814FEA2140115FC5AEC03F8A2EC07F015E0140F007C14C0 +007EEB1F80003EEB3F00147E6C13F8380F83F03803FFC0C648C7FC202677A42A>I<9039 +078007C090391FE03FF090393CF0787C903938F8E03E9038787FC00170497EECFF00D9F0 +FE148013E05CEA01E113C15CA2D80003143FA25CA20107147FA24A1400A2010F5C5E5C4B +5A131F5EEC80035E013F495A6E485A5E6E48C7FC017F133EEC70FC90387E3FF0EC0F8001 +FEC9FCA25BA21201A25BA21203A25B1207B512C0A3293580A42A>I<3903C003F0390FF0 +1FFC391E783C0F381C7C703A3C3EE03F8038383FC0EB7F800078150000701300151CD8F0 +7E90C7FCEAE0FE5BA2120012015BA312035BA312075BA3120F5BA3121F5BA3123F90C9FC +120E212679A423>114 D<14FE903807FF8090380F83C090383E00E04913F00178137001 +F813F00001130313F0A215E00003EB01C06DC7FC7FEBFFC06C13F814FE6C7F6D13807F01 +0F13C01300143F141F140F123E127E00FE1480A348EB1F0012E06C133E00705B6C5B381E +03E06CB45AD801FEC7FC1C267AA422>II<13F8D803FEEB01C0D8078FEB03E0390E0F8007121E121C0038140F131F +007815C01270013F131F00F0130000E015805BD8007E133FA201FE14005B5D120149137E +A215FE120349EBFC0EA20201131E161C15F813E0163CD9F003133814070001ECF0709138 +1EF8F03A00F83C78E090393FF03FC090390FC00F00272679A42D>I<01F0130ED803FC13 +3FD8071EEB7F80EA0E1F121C123C0038143F49131F0070140FA25BD8F07E140000E08013 +FEC6485B150E12015B151E0003141C5BA2153C000714385B5DA35DA24A5A140300035C6D +48C7FC0001130E3800F83CEB7FF8EB0FC0212679A426>I<01F01507D803FC903903801F +80D8071E903907C03FC0D80E1F130F121C123C0038021F131F49EC800F00701607A24913 +3FD8F07E168000E0ED000313FEC64849130718000001147E5B03FE5B0003160E495BA217 +1E00070101141C01E05B173C1738A217781770020314F05F0003010713016D486C485A00 +0190391E7C07802800FC3C3E0FC7FC90393FF81FFE90390FE003F0322679A437>I<9039 +07E007C090391FF81FF89039787C383C9038F03E703A01E01EE0FE3803C01F018013C0D8 +070014FC481480000E1570023F1300001E91C7FC121CA2C75AA2147EA214FEA25CA21301 +A24A1370A2010314F016E0001C5B007E1401010714C000FEEC0380010F1307010EEB0F00 +39781CF81E9038387C3C393FF03FF03907C00FC027267CA427>I<13F0D803FCEB01C0D8 +071EEB03E0D80E1F1307121C123C0038140F4914C01270A249131FD8F07E148012E013FE +C648133F160012015B5D0003147E5BA215FE00075C5BA214015DA314035D14070003130F +EBF01F3901F87FE038007FF7EB1FC7EB000F5DA2141F003F5C48133F92C7FC147E147C00 +7E13FC387001F8EB03E06C485A383C1F80D80FFEC8FCEA03F0233679A428>I<903903C0 +038090380FF007D91FF81300496C5A017F130E9038FFFE1E9038F83FFC3901F007F849C6 +5A495B1401C7485A4A5A4AC7FC141E5C5C5C495A495A495A49C8FC131E5B49131C5B4848 +133C48481338491378000714F8390FF801F0391FFF07E0383E1FFFD83C0F5B00785CD870 +0790C7FC38F003FC38E000F021267BA422>I E +%EndDVIPSBitmapFont +%DVIPSBitmapFont: Fg cmsy10 10 11 +/Fg 11 121 df<007FB81280B912C0A26C17803204799641>0 D<121C127FEAFF80A5EA +7F00121C0909799917>I3 D20 D<126012F812FEEA7F80EA3FE0EA0FF8EA03FEC66C7EEB3F +E0EB0FF8EB03FE903800FF80EC3FE0EC0FF8EC03FE913800FF80ED3FE0ED0FF8ED03FE92 +3800FF80EE3FE0EE0FF8EE03FE933800FF80EF3FC0171FEF7F80933801FF00EE07FCEE1F +F0EE7FC04B48C7FCED07FCED1FF0ED7FC04A48C8FCEC07FCEC1FF0EC7FC04948C9FCEB07 +FCEB1FF0EB7FC04848CAFCEA07FCEA1FF0EA7FC048CBFC12FC1270CCFCAE007FB81280B9 +12C0A26C1780324479B441>I<1478A414F85CA213015C1303495AA2495A49CCFC5B137E +5B485A485AEA0FE0003FBA12FEBCFCA2003F19FED80FE0CCFCEA03F06C7E6C7E137E7F7F +6D7E6D7EA26D7E1301801300A2801478A4482C7BAA53>32 D54 D<126012F0B3B3B3B3A91260045377BD17>106 D<0070131C00F0131EB3 +B3B3B3A80070131C175277BD2A>I112 D<137E3801FFC03807C1E0380F0070001E1338003E131C4813 +0C141E147E5AA3143C1400A3127CA37E121E7E6C7E6C7EEA00F013FCEA03FF380F878038 +1F01E0003E13F0EB00F848137CA200FC133E5A141FA6127C143F6C133EA26C137CEA0F80 +000713F83801E1F03800FFC0EB3F00130FEB03C0EB01E0EB00F01478147C143EA3141FA3 +123C127EA3143E127812300038137C6C13786C13F0380783E03803FF8038007E00184C7A +BA25>120 D E +%EndDVIPSBitmapFont +%DVIPSBitmapFont: Fh cmmi10 10 50 +/Fh 50 123 df11 DI<121C127FEAFF80A5EA7F00121C0909798817>58 +D<121C127FEAFF80A213C0A3127F121C1200A412011380A2120313005A1206120E5A5A5A +12600A19798817>I +I<126012FCB4FCEA7FC0EA1FF0EA07FCEA01FF38007FC0EB1FF0EB07FCEB01FF9038007F +C0EC1FF0EC07FCEC01FF9138007FC0ED1FF0ED07FCED01FF9238007FC0EE1FF0EE07FCEE +01FF9338007F80EF1FC0A2EF7F80933801FF00EE07FCEE1FF0EE7FC04B48C7FCED07FCED +1FF0ED7FC04A48C8FCEC07FCEC1FF0EC7FC04948C9FCEB07FCEB1FF0EB7FC04848CAFCEA +07FCEA3FF0EA7FC048CBFC12FC1270323279AD41>62 D<1760177017F01601A21603A216 +07160FA24C7EA216331673166316C3A2ED0183A2ED0303150683150C160115181530A215 +60A215C014011580DA03007FA202061300140E140C5C021FB5FC5CA20260C7FC5C83495A +8349C8FC1306A25BA25B13385B01F01680487E000716FFB56C013F13FF5EA2383C7DBB3E +>65 D<0103B77E4916F018FC903B0007F80003FE4BEB00FFF07F80020FED3FC0181F4B15 +E0A2141FA25DA2143F19C04B143F1980027F157F190092C812FE4D5A4A4A5AEF0FF04AEC +1FC005FFC7FC49B612FC5F02FCC7B4FCEF3FC00103ED0FE0717E5C717E1307844A1401A2 +130F17035CA2131F4D5A5C4D5A133F4D5A4A4A5A4D5A017F4BC7FC4C5A91C7EA07FC49EC +3FF0B812C094C8FC16F83B397DB83F>I<9339FF8001C0030F13E0037F9038F80380913A +01FF807E07913A07F8000F0FDA1FE0EB079FDA3F80903803BF0002FFC76CB4FCD901FC80 +495A4948157E495A495A4948153E017F163C49C9FC5B1201484816385B1207485A183012 +1F4993C7FCA2485AA3127F5BA312FF90CCFCA41703A25F1706A26C160E170C171C5F6C7E +5F001F5E6D4A5A6C6C4A5A16076C6C020EC8FC6C6C143C6C6C5C6CB4495A90393FE00FC0 +010FB5C9FC010313FC9038007FC03A3D7CBA3B>I<0103B7FC4916E018F8903B0007F800 +07FE4BEB00FFF03F80020FED1FC0180F4B15E0F007F0021F1503A24B15F81801143F19FC +5DA2147FA292C8FCA25C18035CA2130119F84A1507A2130319F04A150FA2010717E0181F +4A16C0A2010FEE3F80A24AED7F00187E011F16FE4D5A4A5D4D5A013F4B5A4D5A4A4A5A05 +7FC7FC017F15FEEE03FC91C7EA0FF049EC7FC0B8C8FC16FC16C03E397DB845>I<0103B8 +12F05BA290260007F8C7123F4B1407F003E0020F150118005DA2141FA25D19C0143FA24B +1330A2027F1470190092C7126017E05C16014A495A160F49B6FCA25F9138FC000F010314 +07A24A6DC8FCA201075C18034A130660010F160693C7FC4A150E180C011F161C18184A15 +38A2013F5E18F04A4A5AA2017F15074D5A91C8123F49913803FF80B9FCA295C7FC3C397D +B83D>I<0103B812E05BA290260007F8C7123F4B140FF003C0140F18015DA2141FA25D19 +80143FA25D1760027F14E095C7FC92C75AA24A1301A24A495A16070101141F91B6FC94C8 +FCA2903903FC001F824A130EA21307A24A130CA2010F141CA24A90C9FCA2131FA25CA213 +3FA25CA2137FA291CBFC497EB612C0A33B397DB835>I<0103B5D8F803B512F8495DA290 +260007F8C73807F8004B5DA2020F150F615DA2021F151F615DA2023F153F615DA2027F15 +7F96C7FC92C8FCA24A5D605CA249B7FC60A202FCC7120101031503605CA201071507605C +A2010F150F605CA2011F151F605CA2013F153F605CA2017F157F95C8FC91C8FC496C4A7E +B690B6FCA345397DB845>72 D<0107B512FCA216F890390007F8005DA2140FA25DA2141F +A25DA2143FA25DA2147FA292C7FCA25CA25CA21301A25CA21303A25CA21307A25CA2130F +A25CA2131FA25CA2133FA25CA2137FA291C8FC497EB6FCA326397DB824>I<0203B512FC +A3DA000113006F5AA215015EA315035EA315075EA3150F5EA3151F5EA3153F5EA3157F93 +C7FCA35D5DA31401A25DA21403120FD83F805B127FEBC007D8FF805BA24A5AEB001F00FC +5C00E0495A006049C8FC007013FE383801F8381E07F03807FFC0D801FEC9FC2E3B7AB82E +>I<0103B500F8903807FFFC5BA290260007F8C813804BEDFC0019F0020F4B5AF003804B +4AC7FC180E021F1538604B5CEF0380023F4AC8FC170E4B133C1770027F5C4C5ADB0007C9 +FC160E4A5B167E4A13FE4B7E01015B92380E7F80ECFC1CED383F010301E07FECFDC04A48 +6C7EECFF00D907FC6D7E5C4A130783130F707E5C1601011F81A24A6D7EA2013F6F7EA24A +143F84137F717E91C8123F496C81B60107B512C0A26146397DB847>I<0103B6FC5B5E90 +260007FCC8FC5D5D140FA25DA2141FA25DA2143FA25DA2147FA292C9FCA25CA25CA21301 +A25CA21303A25CA2130718404A15C0A2010F150118804A1403A2011F16005F4A1406170E +013F151E171C4A143C177C017F5D160391C7120F49EC7FF0B8FCA25F32397DB839>I<90 +2603FFF893383FFF80496081D900079438FF80000206DC01BFC7FCA2020E4C5A1A7E020C +1606190CDA1C7E16FE4F5A02181630A20238166162023016C1F00181DA703F1583953803 +03F002601506A202E0ED0C076202C01518183001016D6C140F06605B028015C0A2010392 +3801801FDD03005B140092380FC00649173F4D91C8FC01065DA2010E4B5B4D137E130C6F +6C5A011C17FEDCE1805B011802E3C7FCA2013802E6130104EC5C1330ED03F8017016034C +5C01F05CD807FC4C7EB500E0D9C007B512F01680150151397CB851>I<902603FFF89138 +1FFFF8496D5CA2D90007030113006FEC007C02061678DA0EFF157081020C6D1460A2DA1C +3F15E0705CEC181F82023815016F6C5C1430150702706D1303030392C7FC02607FA2DAE0 +015C701306ECC0008201016E130EEF800C5C163F0103EDC01C041F131891C713E0160F49 +EDF03818300106140717F8010E02031370EFFC60130CEE01FE011C16E004005B011815FF +177F1338600130153FA20170151F95C8FC01F081EA07FCB512E01706A245397DB843>I< +4BB4FC031F13F09238FE01FC913903F0007EDA07C0EB1F80DA1F80EB0FC0023EC7EA07E0 +02FCEC03F0495A4948EC01F8495A4948EC00FC495A49C912FE49167E13FE49167F120148 +5AA2485AA2120F5B001F17FFA2485AA34848ED01FEA400FFEE03FC90C9FCA2EF07F8A2EF +0FF0A218E0171F18C0EF3F806C167F180017FE4C5A6C6C5D1603001F4B5A6D4A5A000FED +1F806C6C4AC7FC6D147E0003EC01F8D801FC495AD8007EEB0FC090263F807FC8FC903807 +FFF801001380383D7CBA3F>I<0103B7FC4916E018F8903B0007F80007FC4BEB00FE187F +020FED3F80F01FC05DA2021F16E0A25DA2143FF03FC05DA2027FED7F80A292C8130018FE +4A4A5A604AEC07F04D5A0101ED3FC04CB4C7FC91B612FC17E0D903FCCAFCA25CA21307A2 +5CA2130FA25CA2131FA25CA2133FA25CA2137FA291CBFC497EB6FCA33B397DB835>I<4B +B4FC031F13F09238FE01FC913903F0007EDA07C0EB1F80DA1F80EB0FC0023EC7EA07E002 +FCEC03F0495A4948EC01F8495A4948EC00FC495A013F16FE49C9FC13FE187F485A12035B +12075B120F4916FF121FA2485AA34848ED01FEA448C9EA03FCA3EF07F8A218F0170F18E0 +171F18C0EF3F807EEF7F0017FEDA07C05B6C90391FF001F8903980383803001F496C485A +9139E00C0FE0260FC0C0EB1F80D807E1D90E3FC7FC0280137ED803F1EB07F8D801F95C3A +007FC00FC0903A3FE07F0003903807FFFE0100018F5BDA000F1306170E171E705A177CEE +C1F816FF5FA25F5F6F5B6F48C7FCED00F8384B7CBA42>I<0003B812FEA25A903AF8003F +C00101C0913880007E4848163C90C7007F141C121E001C92C7FCA2485CA200305C007017 +180060130112E0485CA21403C716005DA21407A25DA2140FA25DA2141FA25DA2143FA25D +A2147FA292C9FCA25CA25CA21301A25CA21303A25CEB0FFC003FB6FC5AA237397EB831> +84 D<003FB56C48B51280485DA226007F80C7381FF00091C8EA07C0604993C7FCA24915 +06A20001160E170C5BA20003161C17185BA20007163817305BA2000F167017605BA2001F +16E05F5BA2003F15015F5BA2007F150394C8FC90C8FCA25E4815065A160E160C161C1618 +16385E127E5E4B5A6C4A5A4BC9FC6C6C131E6C6C5B6C6C13F83903F807E06CB55A6C6C48 +CAFCEB0FF0393B7BB839>I<49B500F890387FFFF095B5FC1AE0D90003018090380FFC00 +4BC713E00201ED07804EC7FC6E6C140E606F5C705B606F6C485A4D5A031F91C8FCEEE006 +5F6F6C5A5F03075B705A16F96FB45A94C9FC6F5AA36F7EA34B7FED037F9238063FC0150E +4B6C7E1538ED700F03E07F15C04A486C7EEC0300020613034A805C4A6D7E14704A130049 +4880495A49C86C7E130E011E153F017E4B7ED803FF4B7E007F01E0011FEBFFC0B5FC6144 +397EB845>88 DI<147E903803FF8090390FC1C3 +8090391F00EFC0017E137F49133F485A4848EB1F8012075B000F143F48481400A2485A5D +007F147E90C7FCA215FE485C5AA214015D48150CA21403EDF01C16181407007C1538007E +010F1330003E131F027B13706C01E113E03A0F83C0F9C03A03FF007F80D800FCEB1F0026 +267DA42C>97 D<133FEA1FFFA3C67E137EA313FE5BA312015BA312035BA31207EBE0FCEB +E3FF9038E707C0390FFE03E09038F801F001F013F8EBE000485A15FC5BA2123F90C7FCA2 +14015A127EA2140312FE4814F8A2140715F05AEC0FE0A215C0EC1F80143F00781400007C +137E5C383C01F86C485A380F07C06CB4C7FCEA01FC1E3B7CB924>II<163FED1FFFA3ED +007F167EA216FEA216FCA21501A216F8A21503A216F0A21507A2027E13E0903803FF8790 +380FC1CF90381F00EF017EEB7FC049133F485A4848131F000715805B000F143F485A1600 +485A5D127F90C7127EA215FE5A485CA21401A248ECF80CA21403161CEDF0181407007C15 +38007E010F1330003E131F027B13706C01E113E03A0F83C0F9C03A03FF007F80D800FCEB +1F00283B7DB92B>II103 +DI<14 +E0EB03F8A21307A314F0EB01C090C7FCAB13F8EA03FEEA070F000E1380121C1218123812 +30EA701F1260133F00E0130012C05BEA007EA213FE5B1201A25B12035BA20007131813E0 +1438000F133013C01470EB806014E014C01381EB838038078700EA03FEEA00F815397EB7 +1D>I<150FED3F80A2157FA31600151C92C7FCABEC0F80EC3FE0ECF0F0903801C0F84948 +7E14005B130E130C131CEB1801133801305BA2EB0003A25DA21407A25DA2140FA25DA214 +1FA25DA2143FA292C7FCA25CA2147EA214FEA25CA21301001E5B123F387F83F0A238FF87 +E0495A00FE5BD87C1FC8FCEA707EEA3FF8EA0FC0214981B722>IIIII +I<90390F8003F090391FE00FFC903939F03C1F903A70F8700F80903AE0FDE007C09038C0 +FF80030013E00001491303018015F05CEA038113015CA2D800031407A25CA20107140FA2 +4A14E0A2010F141F17C05CEE3F80131FEE7F004A137E16FE013F5C6E485A4B5A6E485A90 +397F700F80DA383FC7FC90387E1FFCEC07E001FEC9FCA25BA21201A25BA21203A25B1207 +B512C0A32C3583A42A>I<3903E001F83907F807FE390E3C1E07391C3E381F3A183F703F +800038EBE07F0030EBC0FF00705B00601500EC007E153CD8E07F90C7FCEAC07EA2120013 +FE5BA312015BA312035BA312075BA3120F5BA3121F5B0007C9FC21267EA425>114 +D<14FF010313C090380F80F090383E00380178131C153C4913FC0001130113E0A33903F0 +00F06D13007F3801FFE014FC14FF6C14806D13C0011F13E013039038003FF01407140300 +1E1301127FA24814E0A348EB03C012F800E0EB07800070EB0F006C133E001E13F83807FF +E0000190C7FC1E267CA427>II<13F8D803FE1438D8070F147C000E6D13FC121C1218003814011230D8701F5C1260 +1503EAE03F00C001005B5BD8007E1307A201FE5C5B150F1201495CA2151F120349EC80C0 +A2153F1681EE0180A2ED7F0303FF130012014A5B3A00F8079F0E90397C0E0F1C90393FFC +07F8903907F001F02A267EA430>I<01F8EB03C0D803FEEB07E0D8070F130F000E018013 +F0121C12180038140700301403D8701F130112601500D8E03F14E000C090C7FC5BEA007E +16C013FE5B1501000115805B150316001203495B1506150E150C151C151815385D00015C +6D485A6C6C485AD97E0FC7FCEB1FFEEB07F024267EA428>I<01F816F0D803FE9138E001 +F8D8070F903801F003000ED9800314FC121C12180038020713010030EDE000D8701F167C +1260030F143CD8E03F163800C001005B5BD8007E131F183001FE5C5B033F147000011760 +4991C7FCA218E000034A14C049137E17011880170318005F03FE1306170E000101015C01 +F801BF5B3B00FC039F8070903A7E0F0FC0E0903A1FFC03FFC0902703F0007FC7FC36267E +A43B>I<903907E001F090391FF807FC9039783E0E0F9039E01F1C1FD801C09038383F80 +3A03800FF07F0100EBE0FF5A000E4A1300000C157E021F133C001C4AC7FC1218A2C7123F +A292C8FCA25CA2147EA214FEA24A130CA20101141C001E1518003F5BD87F81143801835C +00FF1560010714E03AFE0E7C01C0D87C1C495A2778383E0FC7FC391FF00FFC3907C003F0 +29267EA42F>I<13F8D803FE1470D8070F14F8000EEB8001121C121800381403003015F0 +EA701F1260013F130700E0010013E012C05BD8007E130F16C013FE5B151F000115805BA2 +153F000315005BA25D157EA315FE5D1401000113033800F80790387C1FF8EB3FF9EB0FE1 +EB00035DA2000E1307D83F805B007F495AA24A5A92C7FCEB003E007C5B00705B6C485A38 +1E07C06CB4C8FCEA01FC25367EA429>II E +%EndDVIPSBitmapFont +%DVIPSBitmapFont: Fi cmbx12 12 16 +/Fi 16 121 df67 +D83 D<903801FFE0011F13FE017F +6D7E48B612E03A03FE007FF84848EB1FFC6D6D7E486C6D7EA26F7FA36F7F6C5A6C5AEA00 +F090C7FCA40203B5FC91B6FC1307013F13F19038FFFC01000313E0000F1380381FFE0048 +5A5B127F5B12FF5BA35DA26D5B6C6C5B4B13F0D83FFE013EEBFFC03A1FFF80FC7F0007EB +FFF86CECE01FC66CEB8007D90FFCC9FC322F7DAD36>97 D99 D101 D<137C48B4FC4813804813C0A24813E0A56C13C0A26C13806C1300EA007C90 +C7FCAAEB7FC0EA7FFFA512037EB3AFB6FCA518467CC520>105 D<90277F8007FEEC0FFC +B590263FFFC090387FFF8092B5D8F001B512E002816E4880913D87F01FFC0FE03FF8913D +8FC00FFE1F801FFC0003D99F009026FF3E007F6C019E6D013C130F02BC5D02F86D496D7E +A24A5D4A5DA34A5DB3A7B60081B60003B512FEA5572D7CAC5E>109 +D<90397F8007FEB590383FFF8092B512E0028114F8913987F03FFC91388F801F00039039 +9F000FFE6C139E14BC02F86D7E5CA25CA35CB3A7B60083B512FEA5372D7CAC3E>II<90397FC00FF8B590B57E02C314 +E002CF14F89139DFC03FFC9139FF001FFE000301FCEB07FF6C496D13804A15C04A6D13E0 +5C7013F0A2EF7FF8A4EF3FFCACEF7FF8A318F017FFA24C13E06E15C06E5B6E4913806E49 +13006E495A9139DFC07FFC02CFB512F002C314C002C091C7FCED1FF092C9FCADB67EA536 +407DAC3E>I<90387F807FB53881FFE0028313F0028F13F8ED8FFC91389F1FFE000313BE +6C13BC14F8A214F0ED0FFC9138E007F8ED01E092C7FCA35CB3A5B612E0A5272D7DAC2E> +114 D<90391FFC038090B51287000314FF120F381FF003383FC00049133F48C7121F127E +00FE140FA215077EA27F01E090C7FC13FE387FFFF014FF6C14C015F06C14FC6C80000380 +6C15806C7E010F14C0EB003F020313E0140000F0143FA26C141F150FA27EA26C15C06C14 +1FA26DEB3F8001E0EB7F009038F803FE90B55A00FC5CD8F03F13E026E007FEC7FC232F7C +AD2C>IIII120 D E +%EndDVIPSBitmapFont +%DVIPSBitmapFont: Fj cmtt10 10 70 +/Fj 70 123 df37 +D39 D<143814FC13011303EB07F8EB0F +F0EB1FC0EB3F80EB7F0013FE485A485A5B12075B120F5B485AA2123F90C7FCA25A127EA3 +12FE5AAC7E127EA3127F7EA27F121FA26C7E7F12077F12037F6C7E6C7E137FEB3F80EB1F +C0EB0FF0EB07F8EB03FC130113001438164272B92C>I<127012FC7E7E6C7E6C7EEA0FE0 +6C7E6C7E6C7E6C7E137F7F1480131F14C0130FEB07E0A214F01303A214F81301A314FC13 +00AC130114F8A3130314F0A2130714E0A2EB0FC0131F1480133F14005B13FE485A485A48 +5A485AEA3FC0485A48C7FC5A5A1270164279B92C>II<147814FCAF007FB612F0B712F8A46C15F0C700FCC7FC +AF147825267DAB2C>II<007FB6FCB712 +80A46C150021067B9B2C>I<121FEA3F80EA7FC0EAFFE0A5EA7FC0EA3F80EA1F000B0B70 +8A2C>I<1507ED0F80151FA2153F16005D157E15FE5D14015D14035DA214075D140F5D14 +1F5D143F92C7FC5C147E14FE5CA213015C13035C13075C130F5C131F5CA2133F91C8FC5B +137E13FE5B12015B12035B12075BA2120F5B121F5B123F90C9FC5A127E12FE5AA25A1278 +21417BB92C>II<1307497EA2131FA2 +133F137F13FF5A1207127FB5FC13DF139FEA7C1F1200B3AE007FB512E0B612F0A36C14E0 +1C3477B32C>II<000FB512FE4880A35D0180C8FCADEB83FE90389FFF8090B512E015F88190 +38FE03FE9038F000FF01C07F49EB3F8090C7121F6C15C0C8120FA2ED07E0A4123C127EB4 +FC150F16C0A248141F007EEC3F80007FEC7F006C6C5B6D485A391FF80FFC6CB55A6C5C00 +0114C06C6C90C7FCEB0FF823347CB22C>53 D57 +D<121FEA3F80EA7FC0EAFFE0A5EA7FC0EA3F80EA1F00C7FCAE121FEA3F80EA7FC0EAFFE0 +A5EA7FC0EA3F80EA1F000B2470A32C>I<007FB612F0B712F8A4003F15F0CAFCA8003FB6 +12F0B712F8A46C15F025147DA22C>61 D<127012FC7E6C7E13E06C7EEA1FFC6C7E3803FF +80C67FEB7FF0EB1FF8EB0FFEEB03FF6D13C06D6C7EEC3FF8EC0FFC6EB4FC0201138080A2 +5C02071300EC0FFCEC3FF8EC7FE049485A4990C7FCEB0FFEEB1FF8EB7FF0EBFFC000035B +D80FFEC8FC485AEA7FF0485A138048C9FC5A1270212A7BAD2C>II<14FE497EA4497FA214EFA2130781A2 +14C7A2010F7FA314C390381F83F0A590383F01F8A490387E00FCA549137E90B512FEA348 +80A29038F8003FA34848EB1F80A4000715C049130FD87FFEEBFFFC6D5AB514FE6C15FC49 +7E27347EB32C>65 D<007FB512E015F8B612FE6C8016C03903F0003FED0FE0ED07F01503 +A2ED01F8A6ED03F0A21507ED0FE0ED1FC0EDFF8090B612005D5D15FF16C09039F0001FE0 +ED07F0ED03F81501ED00FCA216FE167EA616FE16FC1501ED03F8150FED3FF0007FB612E0 +16C0B712806CECFE0015F027337FB22C>I<02FF13700107EBE0F84913F9013F13FD4913 +FFEBFF813901FE007F4848131FD807F0130F1507485A491303485A150148C7FCA25A007E +EC00F01600A212FE5AAB7E127EA3007F15F06CEC01F8A26C7EA26C6C13036D14F06C6C13 +0716E0D803FC131F6C6CEB3FC03A00FF81FF806DB512006D5B010F5B6D13F00100138025 +357DB32C>I<007FB5FCB612C015F0816C803907E003FEEC00FFED7F80153FED1FC0ED0F +E0A2150716F0150316F81501A4ED00FCACED01F8A3150316F0A2150716E0150FED1FC015 +3FED7F80EDFF00EC03FE007FB55AB65A5D15C06C91C7FC26337EB22C>I<007FB612F0B7 +12F8A37E3903F00001A7ED00F01600A4EC01E04A7EA490B5FCA5EBF003A46E5A91C8FCA5 +163C167EA8007FB612FEB7FCA36C15FC27337EB22C>I<007FB612F8B712FCA37ED803F0 +C7FCA716781600A515F04A7EA490B5FCA5EBF001A46E5A92C7FCAD387FFFE0B5FC805C7E +26337EB22C>I<903901FC038090390FFF87C04913EF017F13FF90B6FC4813073803FC01 +497E4848137F4848133F49131F121F5B003F140F90C7FCA2127EED078092C7FCA212FE5A +A8913803FFF84A13FCA27E007E6D13F89138000FC0A36C141FA27F121F6D133F120F6D13 +7F6C7E6C6C13FF6D5A3801FF076C90B5FC6D13EF011F13CF6DEB0780D901FCC7FC26357D +B32C>II<007FB512F8B612FCA36C14F8 +39000FC000B3B3A5007FB512F8B612FCA36C14F81E3379B22C>I<0107B512804914C0A3 +6D148090390003F000B3AF1218127EA2B4FCA24A5A48130F007F131F9038C07FC06CB55A +6C91C7FC6C5B000313F838007FC022347BB22C>II<387FFFE0B57EA36C5BD803F0C8FCB3AE16F0ED01F8A8007FB6FC +B7FCA36C15F025337DB22C>IIII<007FB512C0B612F88115FF6C158026 +03F00013C0153FED0FE0ED07F0A2150316F81501A6150316F01507A2ED0FE0ED3FC015FF +90B61280160015FC5D15C001F0C8FCB0387FFF80B57EA36C5B25337EB22C>I +I<387FFFFCB67E15E015F86C803907E007FE1401EC007F6F7E151FA26F7EA64B5AA2153F +4BC7FCEC01FE140790B55A5D15E081819038E007FCEC01FE1400157F81A8160FEE1F80A5 +D87FFEEB1FBFB5ECFF00815E6C486D5AC8EA01F029347EB22C>I<90381FF80790B5EA0F +804814CF000714FF5A381FF01F383FC003497E48C7FC007E147F00FE143F5A151FA46CEC +0F00007E91C7FC127F7FEA3FE0EA1FFCEBFFC06C13FC0003EBFFC06C14F06C6C7F01077F +9038007FFEEC07FF02001380153FED1FC0A2ED0FE0A20078140712FCA56CEC0FC0A26CEC +1F806D133F01E0EB7F009038FE01FF90B55A5D00F914F0D8F83F13C0D8700790C7FC2335 +7CB32C>I<007FB612FCB712FEA43AFC007E007EA70078153CC71400B3AF90383FFFFCA2 +497F6D5BA227337EB22C>I<3B7FFF803FFFC0B56C4813E0A36C496C13C03B03F00001F8 +00B3AF6D130300015DA26D130700005D6D130F017F495A6D6C485AECE0FF6DB5C7FC6D5B +010313F86D5B9038003F802B3480B22C>III<3A3FFF03FFE0484913F0148714076C6D13E03A01F800FE007F000049 +5A13FE017E5BEB7F03013F5B1487011F5B14CF010F5B14FF6D5BA26D90C7FCA26D5AA26D +5AA2497EA2497EA2497F81EB0FCF81EB1FC7EC87F0EB3F83EC03F8EB7F01017E7FEBFE00 +497F0001147E49137F000380491480151FD87FFEEBFFFC6D5AB514FE6C15FC497E27337E +B22C>II<007FB6FCB71280A46C150021067B7D2C>95 D<3801FFF0000713FE001F6D7E15E048 +809038C01FF81407EC01FC381F80000006C77EC8127EA3ECFFFE131F90B5FC1203120F48 +EB807E383FF800EA7FC090C7FC12FE5AA47E007F14FEEB8003383FE01F6CB612FC6C15FE +6C14BF0001EBFE1F3A003FF007FC27247CA32C>97 DI<903803FFE0011F13F8017F +13FE48B5FC48804848C6FCEA0FF0485A49137E4848131890C9FC5A127EA25AA8127EA212 +7F6C140F6DEB1F806C7E6D133F6C6CEB7F003907FE03FF6CB55A6C5C6C6C5B011F13E001 +0390C7FC21247AA32C>IIIIII<1307EB1FC0A2497EA36D5AA20107C7FC90 +C8FCA7387FFFC080B5FC7EA2EA0007B3A8007FB512FCB612FEA36C14FC1F3479B32C>I< +140EEC3F80A2EC7FC0A3EC3F80A2EC0E0091C7FCA748B512804814C0A37EC7120FB3B3A2 +141F003C1480007E133FB414005CEB01FEEBFFFC6C5B5C001F5B000790C7FC1A467CB32C +>II<387FFFE0B57EA37EEA0003B3B3A5007FB61280B712C0A36C158022337BB22C>I<3A +7F83F007E09039CFFC1FF83AFFDFFE3FFCD87FFF13FF91B57E3A07FE1FFC3E01FCEBF83F +496C487E01F013E001E013C0A301C01380B33B7FFC3FF87FF0027F13FFD8FFFE6D13F8D8 +7FFC4913F0023F137F2D2481A32C>I<397FF01FE039FFF87FFC9038F9FFFE01FB7F6CB6 +FC00019038F03F80ECC01F02807FEC000F5B5BA25BB3267FFFE0B5FCB500F11480A36C01 +E0140029247FA32C>II<397FF01FE039FFF8FFF801FB13FE90B6FC6C1580000190 +38F07FC09138801FE091380007F049EB03F85BED01FC491300A216FE167EA816FE6D14FC +A2ED01F86D13036DEB07F0150F9138801FE09138E07FC091B51280160001FB5B01F813F8 +EC3FC091C8FCAD387FFFE0B57EA36C5B27367FA32C>I114 D<90387FF8700003B512F8120F5A5A387FC0 +0F387E00034813015AA36CEB00F0007F140013F0383FFFC06C13FE6CEBFF80000314E0C6 +6C13F8010113FCEB0007EC00FE0078147F00FC143F151F7EA26C143F6D133E6D13FE9038 +F007FC90B5FC15F815E000F8148039701FFC0020247AA32C>I<131E133FA9007FB6FCB7 +1280A36C1500D8003FC8FCB1ED03C0ED07E0A5EC800F011FEB1FC0ECE07F6DB512801600 +01035B6D13F89038003FE0232E7EAD2C>I<3A7FF003FF80486C487FA3007F7F0001EB00 +0FB3A3151FA2153F6D137F3900FE03FF90B7FC6D15807F6D13CF902603FE07130029247F +A32C>I<3A7FFF01FFFCB514FE148314016C15FC3A03E0000F80A26D131F00011500A26D +5B0000143EA26D137E017C137CA2017E13FC013E5BA2EB3F01011F5BA21483010F5BA214 +C701075BA214EF01035BA214FF6D90C7FCA26D5A147C27247EA32C>II<3A3FFF03FFF048018713F8A36C010313F03A00FC007E +005D90387E01F8013F5BEB1F83EC87E090380FCFC0903807EF80EB03FF6D90C7FC5C6D5A +147C14FE130180903803EF80903807CFC0EB0FC7EC83E090381F01F0013F7FEB7E00017C +137C49137E0001803A7FFF01FFFC1483B514FE6C15FC140127247EA32C>I<3A7FFF01FF +FCB5008113FE148314816C010113FC3A03E0000F806C7E151F6D140012005D6D133E137C +017E137E013E137CA2013F13FC6D5BA2EB0F815DA2EB07C1ECC3E0A2EB03E3ECE7C01301 +14F75DEB00FFA292C7FC80A2143EA2147E147CA214FC5CA2EA0C01003F5BEA7F83EB87E0 +EA7E0F495A387FFF806C90C8FC6C5A6C5AEA07E027367EA32C>I<003FB612E04815F0A4 +007EC7EA1FE0ED3FC0ED7F80EDFF004A5A003C495AC7485A4A5A4A5A4A5A4A5A4AC7FCEB +01FC495AEB0FF0495A495A495A49C8FC4848EB01E04848EB03F0485A485A485A485A485A +B7FCA46C15E024247DA32C>I E +%EndDVIPSBitmapFont +%DVIPSBitmapFont: Fk cmbx12 20.74 11 +/Fk 11 117 df49 +D<92380FFFE04AB67E020F15F0027F15FE49B87E4917E0010F17F8013F8349D9C01F14FF +9027FFFC0001814801E06D6C80480180021F804890C86C8048486F8048486F8001FF6F80 +4801C06E8002F081486D18806E816E18C0B5821BE06E81A37214F0A56C5BA36C5B6C5B6C +5B000313C0C690C9FC90CA15E060A34E14C0A21B80601B0060626295B55A5F624D5C624D +5C4D91C7FC614D5B4D13F04D5B6194B55A4C49C8FC4C5B4C5B4C13E04C5B604C90C9FCEE +7FFC4C5A4B5B4B5B4B0180EC0FF04B90C8FC4B5A4B5A4B48ED1FE0EDFFE04A5B4A5B4A90 +C9FC4A48163F4A5ADA3FF017C05D4A48167F4A5A4990CA12FFD903FC160749BAFC5B4919 +805B5B90BBFC5A5A5A5A481A005A5ABCFCA462A44C7176F061>I<923801FFFE033FEBFF +F84AB7FC020F16E0023F16F84A16FE49B97E49DA003F80010F01F0010714F04901800101 +804948C880D97FF86F7F02E081496C834801FC6F148014FF486E6E14C08181481AE081A9 +6C5C1BC06C4A5C6C5C6D90C815806D5AD90FF85D90CA150062606295B55A4D5C624D5C4D +5C4D91C7FC4D13FC4D5B4CB512E0047F1480037FB548C8FC92B612F818C018F8F0FF806F +15F092C7003F13FC050713FF050114C071807213F8727F727F867214801BC07214E01BF0 +A27214F81BFCA37214FEA31BFFEBFF80000313E0487F001F13FC487FA2487FA2B67EA31B +FEA3601BFCA292C8FC6C1AF84A5D4A18F06C494B14E05C6C01C04B14C06C90C915804E14 +006C6D4B5B6C01F092B55A6C01FC4A5C27007FFFC001075C6D01FE013F14C0010F90B85A +6D4DC7FC010117F8D9003F16E0020F93C8FC020015F0030749C9FC507378F061>II<96267FFFE01670063FB6ED01F80503B700 +F01403053F04FC14074CB96C130F040706E0131F043F72133F93BA00FC137F0303DC0007 +6D13FF030F03C09039003FFF814B02FCC8000713C3037F02E0030113F792B600806F6CB5 +FC02034ACA121F4A02F8834A02E0834A4A1701027F4A8391B548CC7E494A85495C4C8549 +88494A85494A85495C8A4991CDFC90B54886A2484A1B7FA2481E3F5D481E1F5D5A1F0FA2 +485CA3481E075DA2F703F0489BC7FCA45DA2B6FCB27EA281A47EA2F703F06FF307F87EA3 +6C80A21F0F7E6F1CF07E6F1B1F7E20E06C6E1B3F816DF57FC06D80F7FF806D806D6E4F13 +006D6E616D525A826D6E4F5A6D6E4F5A6E6D6C4E5A021F6EF0FFE06E6E4D5B6E02F84D5B +6E02FE050F90C7FC02006E6CEE3FFE6F02F0EEFFFC031F02FE03035B6FDAFFC0021F13E0 +030303FF0103B55A030093B7C8FC043F18FC040718F0040118C0DC003F94C9FC050316F8 +DD003F1580DE007F01F0CAFC757A75F78C>67 D<92383FFFF80207B612E0027F15FC49B8 +7E010717E0011F83499026F0007F13FC4948C7000F7F90B502036D7E486E6D806F6D8072 +7F486E6E7F8486727FA28684A26C5C72806C5C6D90C8FC6D5AEB0FF8EB03E090CAFCA705 +07B6FC041FB7FC0303B8FC157F0203B9FC021FECFE0391B612800103ECF800010F14C049 +91C7FC017F13FC90B512F04814C0485C4891C8FC485B5A485B5C5A5CA2B5FC5CA360A36E +5DA26C5F6E5D187E6C6D846E4A48806C6D4A4814FC6C6ED90FF0ECFFFC6C02E090263FE0 +7F14FE00019139FC03FFC06C91B6487E013F4B487E010F4B1307010303F01301D9003F02 +80D9003F13FC020101F8CBFC57507ACE5E>97 D<93387FFF80030FB512FC037FECFF804A +B712E0020716F8021F16FE027FD9F8077F49B5D8C000804991C7003F13E04901FC020F7F +49496E7F49498049496E7F49496E7F90B55A48727E92C914804884485B1BC048841BE048 +5BA27313F05AA25C5AA21BF885A2B5FCA391BAFCA41BF002F8CCFCA67EA3807EA47E806C +F103F0F207F86C7F1A0F6C6E17F06C191F6F17E06C6E163F6D6DEE7FC06D6D16FF6D6D4B +13806D6D4B13006D6D6CEC0FFE6D02E0EC3FFC6D02F8ECFFF86D9126FFC00F5B023F91B6 +5A020F178002034CC7FC020016F8031F15E0030392C8FCDB000F13E04D507BCE58>101 +D<903801FFFCB6FCA8C67E131F7FB3AD95380FFFE095B512FE05036E7E050F15E0053F15 +F84D81932701FFF01F7F4CD900077FDC07FC6D80DC0FF06D80DC1FC07F4C48824CC8FC04 +7E6F7F5EEDFDF85E03FF707F5EA25EA25EA293C9FCA45DB3B3A6B8D8E003B81280A86178 +79F76C>104 D<902601FFFCEC7FFEB6020FB512F0057F14FE4CB712C0040716F0041F82 +047F16FE93B5C66C7F92B500F0010F14C0C66C0380010380011F4AC76C806D4A6E8004F0 +6F7F4C6F7F4C6F7F4C8193C915804B7014C0861DE0A27414F0A27414F8A47513FCA57513 +FEAF5113FCA598B512F8A31DF0621DE0621DC0621D806F5E701800704B5B505B704B5B70 +92B55A04FC4A5C704A5C706C010F5C05E0013F49C7FC9227FE7FFC01B55A70B712F0040F +16C0040393C8FC040015F8053F14C0050301F0C9FC94CCFCB3A6B812E0A85F6F7ACD6C> +112 D<902601FFF8EB07FEB691383FFFC094B512F00403804C14FE4C8093261FFC3F1380 +93263FE07F13C0DC7F80B5FCC66C5D011FDAFE0114E06DEBF9FC16F815FB16F016E015FF +16C07114C05E72138095381FFE0093C76C5AF001E095C8FCA25DA65DB3B3A2B812F8A843 +4E7ACD4F>114 D<15FFA75CA55CA45CA25CA25CA25CA25C91B5FCA25B5B5B131F5B90B9 +FC120FBAFCA6D8000791C9FCB3B3A3F01FE0AE183F7014C07F187F7014806D16FF826D4B +13006E6D485AEEFE0F6E90B55A020F5D6E5D020115C06E6C5C031F49C7FC030113F03B6E +7CEC4B>116 D E +%EndDVIPSBitmapFont +%DVIPSBitmapFont: Fl cmr10 10 79 +/Fl 79 124 df11 +DI14 D<001C131C007F137F39FF80FF80A26D13C0A3 +007F137F001C131C00001300A40001130101801380A20003130301001300485B00061306 +000E130E485B485B485B006013601A197DB92A>34 D<017C166048B416F02607C3801401 +260F81C01403D900E04A5A001E01784A5A003E6D141F003C013FEC7F80007C90271BE003 +FFC7FC0218B512BF007891381FFC3E00F8011CC75A020C14FC5F4C5A16035F4C5A160F5F +4CC8FC021C5B00780118133E007C5D16FC003C01385B003E90383001F0001EEB70036C01 +E05B903981C007C03907C3800F2601FF005BD8007C49C9FC90C748EB07C0033EEB1FF04B +EB3C3803FCEBF81C4B497E913A01F001E00602030103130703E0497E912607C007148002 +0F15011580DA1F00018013C04A010F1300143E5C14FC5C495A13035C495A130F4A010713 +0149C701C013805B013E1603490203140001FC6F5A49020113064848913800F00E000370 +5A49ED3C3849ED1FF06C48ED07C03A437BBD45>37 D<121C127FEAFF80A213C0A3127F12 +1C1200A412011380A2120313005A1206120E5A5A5A12600A1979B917>39 +D<146014E0EB01C0EB0380EB0700130E131E5B5BA25B485AA2485AA212075B120F90C7FC +A25A121EA2123EA35AA65AB2127CA67EA3121EA2121F7EA27F12077F1203A26C7EA26C7E +1378A27F7F130E7FEB0380EB01C0EB00E01460135278BD20>I<12C07E12707E7E7E120F +6C7E6C7EA26C7E6C7EA21378A2137C133C133E131EA2131F7FA21480A3EB07C0A6EB03E0 +B2EB07C0A6EB0F80A31400A25B131EA2133E133C137C1378A25BA2485A485AA2485A48C7 +FC120E5A5A5A5A5A13527CBD20>I<15301578B3A6007FB812F8B912FCA26C17F8C80078 +C8FCB3A6153036367BAF41>43 D<121C127FEAFF80A213C0A3127F121C1200A412011380 +A2120313005A1206120E5A5A5A12600A19798817>II<121C127F +EAFF80A5EA7F00121C0909798817>I48 DIII<1538A2157815F8A2140114031407A2140F141F141B1433147314 +6314C313011483EB030313071306130C131C131813301370136013C01201EA038013005A +120E120C5A123812305A12E0B712F8A3C73803F800AB4A7E0103B512F8A325397EB82A> +I<0006140CD80780133C9038F003F890B5FC5D5D158092C7FC14FC38067FE090C9FCABEB +07F8EB3FFE9038780F803907E007E090388003F0496C7E12066E7EC87EA28181A21680A4 +123E127F487EA490C71300485C12E000605C12700030495A00385C6C1303001E495A6C6C +485A3907E03F800001B5C7FC38007FFCEB1FE0213A7CB72A>II<12301238123E003FB612E0A316C05A168016000070C712060060140E5D1518 +00E01438485C5D5DC712014A5A92C7FC5C140E140C141C5CA25CA214F0495AA21303A25C +1307A2130FA3495AA3133FA5137FA96DC8FC131E233B7BB82A>III<121C127FEAFF80A5EA7F00121CC7FCB2121C127FEAFF +80A5EA7F00121C092479A317>I<121C127FEAFF80A5EA7F00121CC7FCB2121C127F5A13 +80A4127F121D1201A412031300A25A1206A2120E5A121812385A1260093479A317>I<00 +7FB812F8B912FCA26C17F8CCFCAE007FB812F8B912FCA26C17F836167B9F41>61 +D<1538A3157CA315FEA34A7EA34A6C7EA202077FEC063FA2020E7FEC0C1FA2021C7FEC18 +0FA202387FEC3007A202707FEC6003A202C07F1501A2D901807F81A249C77F167FA20106 +810107B6FCA24981010CC7121FA2496E7EA3496E7EA3496E7EA213E0707E1201486C81D8 +0FFC02071380B56C90B512FEA3373C7DBB3E>65 DI<913A01FF8001 +80020FEBE003027F13F8903A01FF807E07903A03FC000F0FD90FF0EB039F4948EB01DFD9 +3F80EB00FF49C8127F01FE153F12014848151F4848150FA248481507A2485A1703123F5B +007F1601A35B00FF93C7FCAD127F6DED0180A3123F7F001F160318006C7E5F6C7E17066C +6C150E6C6C5D00001618017F15386D6C5CD91FE05C6D6CEB03C0D903FCEB0F80902701FF +803FC7FC9039007FFFFC020F13F002011380313D7BBA3C>IIIIIII75 DIIIII82 DI<003FB812E0A3D9C003EB +001F273E0001FE130348EE01F00078160000701770A300601730A400E01738481718A4C7 +1600B3B0913807FF80011FB612E0A335397DB83C>IIII<007FB590383FFF +FCA3C601F801071380D97FE0D903FCC7FC013FEC01F06D6C5C5F6D6C5C6D6C13034CC8FC +6D6C1306160E6D6C5B6DEB8018163891387FC0306E6C5A16E06E6C5A91380FF18015FB6E +B4C9FC5D14036E7EA26E7F6F7EA24B7E15DF9138019FF09138038FF8150F91380607FC91 +380E03FE140C4A6C7EEC38000230804A6D7E14E04A6D7E49486D7E130391C76C7E01066E +7E130E010C6E7E011C1401013C8101FE822607FF80010713E0B500E0013FEBFF80A33939 +7EB83E>II<003FB7FCA39039FC0001 +FE01C0130349495A003EC7FC003C4A5A5E0038141F00784A5A12704B5A5E006014FF4A90 +C7FCA24A5A5DC712074A5AA24A5A5D143F4A5AA24A5A92C8FC5B495AA2495A5C130F4948 +EB0180A2495A5C137F495A16034890C7FC5B1203485AEE0700485A495C001F5D48485C5E +4848495A49130FB8FCA329397BB833>II<3901800180000313033907000700000E130E485B001813180038133800301330 +0070137000601360A200E013E0485BA400CE13CE39FF80FF806D13C0A3007F137FA2393F +803F80390E000E001A1974B92A>I +I97 DIIII<147E903803FF8090380FC1E0EB1F8790 +383F0FF0137EA213FCA23901F803C091C7FCADB512FCA3D801F8C7FCB3AB487E387FFFF8 +A31C3B7FBA19>IIII< +EB01C0EB07F0EB0FF8A5EB07F0EB01C090C7FCAAEB01F813FFA313071301B3B3A2123C12 +7E00FF13F01303A214E038FE07C0127C383C0F00EA0FFEEA03F8154984B719>III<2703F00FF0EB1FE000FFD93FFCEB7FF8913AF03F01E0 +7E903BF1C01F83803F3D0FF3800FC7001F802603F70013CE01FE14DC49D907F8EB0FC0A2 +495CA3495CB3A3486C496CEB1FE0B500C1B50083B5FCA340257EA445>I<3903F00FF000 +FFEB3FFCECF03F9039F1C01F803A0FF3800FC03803F70013FE496D7EA25BA35BB3A3486C +497EB500C1B51280A329257EA42E>II<3903F01FE000FFEB7FF89038 +F1E07E9039F3801F803A0FF7000FC0D803FEEB07E049EB03F04914F849130116FC150016 +FEA3167FAA16FEA3ED01FCA26DEB03F816F06D13076DEB0FE001F614C09039F7803F0090 +38F1E07E9038F0FFF8EC1FC091C8FCAB487EB512C0A328357EA42E>II<3807E01F00 +FFEB7FC09038E1E3E09038E387F0380FE707EA03E613EE9038EC03E09038FC0080491300 +A45BB3A2487EB512F0A31C257EA421>II<1318A51338A31378A313F8120112031207001FB5FCB6FC +A2D801F8C7FCB215C0A93800FC011580EB7C03017E13006D5AEB0FFEEB01F81A347FB220 +>IIIIII<003FB512FCA2EB8003D83E0013F8003CEB07F00038EB0FE012300070EB1FC0 +EC3F800060137F150014FE495AA2C6485A495AA2495A495A495AA290387F000613FEA248 +5A485A0007140E5B4848130C4848131CA24848133C48C7127C48EB03FC90B5FCA21F247E +A325>II E +%EndDVIPSBitmapFont +%DVIPSBitmapFont: Fm cmbx10 10 63 +/Fm 63 123 df34 +D39 +D46 +D<49B4FC010F13E0017F13FC9038FF83FE4848C67E4848EB7F804848EB3FC04848EB1FE0 +A2001F15F0A24848EB0FF8A3007F15FCA500FF15FEB3007F15FCA4003F15F8A26D131F00 +1F15F0A2000F15E06D133F000715C06C6CEB7F806C6CEBFF003900FF83FE6DB45A011F13 +F0010190C7FC27387CB630>48 D<141E143E14FE1307133FB5FCA313CFEA000FB3B3A600 +7FB61280A4213779B630>IIII<001C15C0D81F80130701F8137F90B61280A216005D5D15F05D15804AC7FC14F0 +90C9FCA8EB07FE90383FFFE090B512F89038FC07FC9038E003FFD98001138090C713C012 +0EC813E0157F16F0A216F8A21206EA3F80EA7FE012FF7FA44914F0A26C4813FF90C713E0 +007C15C06C5B6C491380D9C0071300390FF01FFE6CB512F8000114E06C6C1380D90FF8C7 +FC25387BB630>I<123C123EEA3FE090B71280A41700485D5E5E5EA25E007CC7EA0FC000 +784A5A4BC7FC00F8147E48147C15FC4A5A4A5AC7485A5D140F4A5A143F92C8FC5C147E14 +FE1301A2495AA31307A2130F5CA2131FA5133FA96D5A6D5A6D5A293A7BB830>55 +D<49B47E010F13F0013F13FC9038FE01FF3A01F8007F804848EB3FC04848EB1FE0150F48 +5AED07F0121FA27FA27F7F01FEEB0FE0EBFF809138E01FC06CEBF03F02FC13809138FF7F +006C14FC6C5C7E6C14FE6D7F6D14C04914E048B612F0EA07F848486C13F8261FE01F13FC +383FC007EB8001007F6D13FE90C7123F48140F48140715031501A21500A216FC7E6C1401 +6D14F86C6CEB03F06D13076C6CEB0FE0D80FFEEB7FC00003B61200C614FC013F13F00103 +138027387CB630>II<007FB912FCBA12FEA36C18FCCDFCAF00 +7FB912FCBA12FEA36C18FC3F197BA04A>61 D65 DIII +II73 D75 DIIIII82 DI<003FB91280A4D9F800EBF003D87FC09238007FC049161F007EC7150FA2007C1707 +A200781703A400F818E0481701A4C892C7FCB3AE010FB7FCA43B387DB742>IIII<007FB5D8F803B512F8A4C66C48C7D80FF0C7FC6D6C5D6D5E6F495A6D6D49C8FC7F +6D6D137E6F5B6DEBF8016D5D6F485A6E6C485A023F130FDA1FFF5BEE9F806E01FFC9FC80 +5E6E5B6E5B80826F7E153F826F7F5D4B7F92B57EA2DA01F97FDA03F17F03F07F913807E0 +7FDA0FC07F021F6D7E4B7E4A486C7F027E8102FE6D7F4A7F49488149486D7F0107804A6E +7E49488149486E7E013F81017F83B60107B61280A441397DB848>II91 D<0160130301E05B0003141F49131E48485B48C7 +5A001E5CA248495A00385C0078130300705CA300F013074891C7FCD8E7C0133ED8FFF0EB +FF8001F814C0A201FC14E0A3007F7FA26C486C13C0A26C486C1380D807C0EB3E00231D75 +B932>II97 +D<13FFB5FCA412077EAF4AB47E020F13F0023F13FC9138FE03FFDAF00013804AEB7FC002 +80EB3FE091C713F0EE1FF8A217FC160FA217FEAA17FCA3EE1FF8A217F06E133F6EEB7FE0 +6E14C0903AFDF001FF80903AF8FC07FE009039F03FFFF8D9E00F13E0D9C00390C7FC2F3A +7EB935>I<903801FFC0010F13FC017F13FFD9FF8013802603FE0013C048485AEA0FF812 +1F13F0123F6E13804848EB7F00151C92C7FC12FFA9127FA27F123FED01E06C7E15036C6C +EB07C06C6C14806C6C131FC69038C07E006DB45A010F13F00101138023257DA42A>II<903803FF8001 +1F13F0017F13FC3901FF83FE3A03FE007F804848133F484814C0001FEC1FE05B003FEC0F +F0A2485A16F8150712FFA290B6FCA301E0C8FCA4127FA36C7E1678121F6C6C14F86D14F0 +00071403D801FFEB0FE06C9038C07FC06DB51200010F13FC010113E025257DA42C>II<161FD907FE +EBFFC090387FFFE348B6EAEFE02607FE07138F260FF801131F48486C138F003F15CF4990 +387FC7C0EEC000007F81A6003F5DA26D13FF001F5D6C6C4890C7FC3907FE07FE48B512F8 +6D13E0261E07FEC8FC90CAFCA2123E123F7F6C7E90B512F8EDFF8016E06C15F86C816C81 +5A001F81393FC0000F48C8138048157F5A163FA36C157F6C16006D5C6C6C495AD81FF0EB +07FCD807FEEB3FF00001B612C06C6C91C7FC010713F02B377DA530>I<13FFB5FCA41207 +7EAFED7FC0913803FFF8020F13FE91381F03FFDA3C01138014784A7E4A14C05CA25CA291 +C7FCB3A3B5D8FC3F13FFA4303A7DB935>II<141FEC7FC0ECFFE0A249 +13F0A56D13E0A2EC7FC0EC1F0091C7FCA9EC0FF0EB0FFFA4EB007F143FB3B0121FEA3F80 +EA7FC0EAFFE0EC7FE0A215C014FF6C481380903883FE006CB45A000F13F0000113801C4B +86BA1D>I<13FFB5FCA412077EAF92380FFFE0A4923803FC0016F0ED0FE0ED1F804BC7FC +157E5DEC03F8EC07E04A5A141FEC7FE04A7E8181A2ECCFFEEC0FFF496C7F806E7F6E7F82 +157F6F7E6F7E82150F82B5D8F83F13F8A42D3A7EB932>I<13FFB5FCA412077EB3B3ACB5 +12FCA4163A7DB91B>I<01FED97FE0EB0FFC00FF902601FFFC90383FFF80020701FF90B5 +12E0DA1F81903983F03FF0DA3C00903887801F000749DACF007F00034914DE6D48D97FFC +6D7E4A5CA24A5CA291C75BB3A3B5D8FC1FB50083B512F0A44C257DA451>I<01FEEB7FC0 +00FF903803FFF8020F13FE91381F03FFDA3C011380000713780003497E6D4814C05CA25C +A291C7FCB3A3B5D8FC3F13FFA430257DA435>I<903801FFC0010F13F8017F13FFD9FF80 +7F3A03FE003FE048486D7E48486D7E48486D7EA2003F81491303007F81A300FF1680A900 +7F1600A3003F5D6D1307001F5DA26C6C495A6C6C495A6C6C495A6C6C6CB45A6C6CB5C7FC +011F13FC010113C029257DA430>I<9039FF01FF80B5000F13F0023F13FC9138FE07FFDA +F00113800007496C13C06C0180EB7FE091C713F0EE3FF8A2EE1FFCA3EE0FFEAA17FC161F +A217F8163F17F06E137F6E14E06EEBFFC0DAF00313809139FC07FE0091383FFFF8020F13 +E0020390C7FC91C9FCACB512FCA42F357EA435>I<49B4EB0780010FEBE00F013FEBF81F +9039FFC07C3F0003EB803E3A07FE000F7F4848EB07FF121F497F123F497F127FA25B12FF +AA6C7EA36C7E5D6C7E000F5C6C6C5B6C6C133F6CEBC0FD39007FFFF1011F13C101011301 +90C7FCAC037F13FEA42F357DA432>I<9038FE03F000FFEB0FFEEC3FFF91387C7F809138 +F8FFC000075B6C6C5A5CA29138807F80ED3F00150C92C7FC91C8FCB3A2B512FEA422257E +A427>I<90383FF0383903FFFEF8000F13FF381FC00F383F0003007E1301007C130012FC +15787E7E6D130013FCEBFFE06C13FCECFF806C14C06C14F06C14F81203C614FC131F9038 +007FFE140700F0130114007E157E7E157C6C14FC6C14F8EB80019038F007F090B512C000 +F8140038E01FF81F257DA426>I<130FA55BA45BA25B5BA25A1207001FEBFFE0B6FCA300 +0390C7FCB21578A815F86CEB80F014816CEBC3E090383FFFC06D1380903803FE001D357E +B425>I<01FFEC3FC0B5EB3FFFA4000714016C80B3A35DA25DA26C5C6E4813E06CD9C03E +13FF90387FFFFC011F13F00103138030257DA435>IIIII<003FB612C0A3D9F0031380EB800749481300003E5C003C49 +5A007C133F5D0078495A14FF5D495B5BC6485B92C7FC495A131F5C495A017FEB03C0EBFF +F014E04813C05AEC80074813005A49EB0F80485A003F141F4848133F9038F001FFB7FCA3 +22257DA42A>I E +%EndDVIPSBitmapFont +%DVIPSBitmapFont: Fn cmbx12 24.88 28 +/Fn 28 122 df[97 137 119 262 116 48 D[<4CB5FC047F14F80307B7FC031F16C0037F16F04A +B812FC020717FF021F18C04ADAE0078091B548C7804902F0023F7F494A6E7F494A6E7F49 +91C86C7F49718049496F8049496F8090B55A48737F4B84488548874B824887A24887A274 +14805A4B19C0A25A1DE0A31DF0B6FCA37414F8A61DFCA7626C1CFEA56C61A36C8062A27E +627EA26C6E93B6FC7E616C6EED03F77F6D6DED07E76D6D150F6D6DED1FC76D6E143F6D6E +EC7F876D6E903801FF076D02F8D907FE15FC023F9039FF803FFC6E91B512F8020716F002 +0116E06E6C1580031F1500030314F8DB000F01C016F893C9FC62A21DF0A41DE0A35014C0 +EB0FFE90383FFF80496D198090B57E486E190062486E60A2486E4C5BA26497B5FC64A24F +5C4F5C5D4F5C6C4A4B91C7FC634B4B5B6C4A4B5B4B4B5B6C49C9B55A02E003035CD97FF8 +4B5C6E031F91C8FC6DB4037F5B6D01E049B512F86D01FF011F5C6D91B712C06D95C9FC01 +0017FC023F16F0020F16C002034BCAFCDA007F14E0030301FCCBFC>95 +137 118 262 116 57 D[158 +145 120 272 175 65 D[143 142 120 +269 165 I[<0803B500C0EE01F00703B600FEEE03F8077FDBFFE015070607B800FC150F +063F05FF151F4DBA00E0143F050F07F8147F053F07FE14FF94BC5B04039326F8000FECC0 +03040F4BC86CEBF007043F03C0030F6D5A93B648C900036D5A4B03F09339007FFF3F0307 +03C0051F90B5FC4B92CB7E033F02FC18034B02F08492B648844A0380193F4A92CD7E4A4A +864A4A864A02F0864A4A864A8991B65A494B874992CF7E4C885B494A885E498B494A88A2 +495C8D90B65A8D5A5E48217FA24892D1FC223FA25A5DA248211FA3485CFA0FF09FC7FCA2 +5AA45DA3B6FCB27EA381A47EA46C80FA07F0FA0FF87EA2817EA36C6F1D1F23F07E827E22 +3F6D6E1EE0A26D6E1D7F23C06D6E1DFF7F705213806D806D55130070646D6F646D6F515A +6E6E1B1F6E6E515A6E6E515A6E6E1BFF6E6E505B6E6E505B6E6F4F5B6E03E04F90C7FC6F +6EF13FFE6F02FC4F5A030F02FF4E485A6F03C005075B030103F0051F5B6F03FE057F1380 +043FDAFFE00303B5C8FC040F03FE033F13FC0403DBFFF80107B55A040093B812E0053F1A +80050F4FC9FC050119F8DD003F18C0060795CAFCDE007F16F0070393CBFCDF000314C0> +141 146 115 271 168 I[156 142 120 269 178 I[127 +141 120 268 146 70 D[121 142 120 269 +140 76 D[203 142 120 269 220 I[137 +142 120 269 159 80 D[163 +144 120 269 173 82 D[<93260FFFF8163E4BB600E0153F031F03FE5D037FDBFFC05C02 +03B800F05B020F05FC5B4A05FF5B027FF0C00F91B526FE000FECF01F010302C0D9007F6D +5A4991C800076D5A4901FC030090B6FC4901F0163F4949160F4901808290B5170192CBFC +4849844849181F87484984A2484984874886A248498588A24887A388A2B58680A36E85A2 +80A26E8580A2818103F0725A6C6E96C7FC15FE8116E06C15FEEEFFE017FF6C17F0F0FF80 +6C18F8F1FFC06C19FCF2FF806C1AE01BF86C1AFE6C747E6D1AE0886D866D866D1AFF6D87 +6D87010087806E86020F86020386020086153F030F851501DB001F19801601DC000F18C0 +EF007F060717E0F0003F070316F0F1003F1A0F080315F81A00871B1F877514FCA287007F +86486C85A288A388A36D86A31EF87FA37F1EF0A26D626D1CE0A27F6D5013C0A26E1B806E +96B5FC6E1B0002F8606E4E5B6E626E6C5F03E04D5B03F84D5B03FE057F5BDBFFC093B55A +04F803035C496CD9FF80021F91C7FCD9FC1F02FF49B55AD9F80792B75A496C19F049C661 +49011F18804901074DC8FC90C817F848031F16C048030003FCC9FC007C04011480>102 +146 115 271 129 I[<000FC312F8A6488EA304C0C7001F4AC7120103F8C8F0000F03C0 +1C0192C9737E02FC1E1F4A1E0702E08A4A8A4A8A4890CA757EA249203F49201FA349200F +A2492007A4492003007F8EA4498CA848487A1380A6CC99C7FCB3B3B3B3AA030FBD12FCA9 +>145 140 120 267 162 I<93B512FC037FECFFF00207B8FC023F17E091B912F84918FE +0107727E499126C0007F14E04901E0C7000F80496D020380496D020014FE6F6F7F90B570 +806F6F8085486E6F807380A27380A28885886C5CA26D4982886D5B6D5B010713C0010190 +CAFC90CCFCA90603B7FC050FB8FC0403B9FC167F0307BAFC153F4AB7EA807F020FEDE000 +023F02FCC7FC91B612E0010392C8FC4914FC011F14F04914C0495C90B548C9FC485C485C +485C485C5A5D485CA24891CAFCA3B6FC5CA397B6FCA461806C60F107EF6C6E150F6F16CF +6C183F6FDB7F8F806C6EDBFF0F14E06C02FCDA03FE15FE6C6E91260FFC0791B5FC6C6E6C +D93FF817806C923AF803FFF003013F91B6487E010FEF8000010394C77E010004FC141F02 +1F03F0140702010380DA007F1400DA000701F8CDFC695F79DD71>97 +D[113 +144 121 270 129 I<94387FFFF0041FB612E093B712FE0307707E031F17F092B97E4A18 +FE020784021F9126F8000F14804A0280010014C04A49C74814E049B500F85C494A17F049 +4A5C495C494A4A14F84991C8FC5D495B90B5FC5D5A485C7314F05A4B6F14E05A7314C048 +7214804B93383FFE00F20FF84896C8FCA4485CA5B6FCB07EA281A37EA36C80A37E6F18FE +6CF201FFA26C6E5F1CFE6C801B076C6EEF0FFC6D7F70EE1FF86DF13FF06D6E167F6D6EEE +FFE06D02F84B13C06D6E5D6D02FF030F13806D03C0023F1300023F02F0903801FFFC6E91 +26FF801F5B020792B65A6E18C0020060033F4CC7FC030716F8030016C0041F4AC8FCDC00 +7F13C0585F78DD67>I[113 +144 120 270 129 I<94387FFFC0040FB6FC93B712E0030716FC031F16FF037F17C04AB9 +12F00207DAF80380021F912680003F13FE4A49C7000F7F4A01F802038049B5486E804902 +C06E6C7F494A6F7F4991C9FC49727F4949707F4B84498490B548707F5A4B198048855D48 +1CC086481CE05D5A871DF05AA25D5AA21DF887A2B6FCA392BBFCA51DF00380CDFCA77EA4 +817EA37EA2817EA26CF307F06FF00FF87E816C1B1F6F19F06C1B3F6D6DF07FE06D7FF4FF +C06D6E4C13806D6E5E6D02F04C13006D6EEE1FFE6D6E4C5A6D6C01FFEEFFF86E02E00203 +5B6E02FC021F5B02079126FFC003B55A6E92B7C7FC020060033F17F8030F17E003011780 +DB003F03FCC8FC040315C0DC000F01F8C9FC5D5F7ADD6A>I[114 143 119 270 129 104 D[49 +144 119 271 65 I[50 143 119 270 65 108 D110 +D<94381FFFF00407B612C0047F15FC0303B87E030F17E0037F17FC4ABAFC4A9126FC007F +80020F02C0010714E04A49C880027F01F8033F13FC91B5486F7F4902C003077F494A6F80 +4991C96C80494970804949717F49874949717FA290B548717F48884B83481D80A2481DC0 +4B83481DE0A2481DF0A3484A7114F8A4481DFCA5B61BFEAF6C1DFCA56C6E4D14F8A36C1D +F0A36C1DE06F5F6C1DC0A26C6E4D1480A26C1D006F5F6C646D6D4D5B6F94B5FC6D636D6D +4C5C6D6E4B5C6D6E4B5C6D02F0031F5C6D6E4B91C7FC6D6C01FE92B512FC6ED9FFC00107 +5C6E02FC017F5C020791B812C0020196C8FC6E6C17FC031F17F003031780DB007F03FCC9 +FC040715C0DC001F01F0CAFC675F7ADD74>I114 +D<92261FFFF814F80203B638C001FC023FEDFC0791B8121F010317FF130F013F9038F800 +1F4990C8FCD9FFF8153F4801E0150F484915034849814890CAFC197F4848173F191F485A +A2007F180FA31907487EA27FA28002E0705A6E93C8FC14FC14FF15F06CECFF8016FCEEFF +F06CEEFF8018F06C17FE727E6C18E0856C18FC6C846C727E6C856D84011F846D84130301 +0084023F83140F020183EC001FDB007F16801603DC000F15C01700183F060F14E0007F17 +03486C82727E857F85857FA2857F1BC07FA27F1B806D5F7F1B006E5E6E5F6E163F6E4C5A +02FC4C5A6E03035B6E6C4A5B03F0023F5B03FF0107B55A01F991B7C7FCD9F07F16FCD9E0 +1F16F0D9800716C0D9000193C8FC48D9003F14F8007C020349C9FC4B5F78DD5C>I[72 132 124 +258 90 II<007FB800C04AB71280 +A9D800034ACA000791C7FC6D080013F0775A6D6E4E5AA26E6E6064836E4F90C8FC836E4F +5A836E4F5AA26E6E4C5AA26E6E5F1C3F6E6E5F1C7F836E4F5A846F4D5B846F4D90C9FCA2 +6F6E4A5AA26F6E5D1B0F846F4D5A846F4D5A846F4D5AA26F6E4A5AA2706E5C627002C091 +CAFC6219E0704B5A19F0704B5AA2706E485AA2706E485AA27002FE5B1A7F19FF704B5AA2 +715DA27192CBFCA2715CA2715CA3715CA2715CA2715CA2715CA2725BA27290CCFCA3725A +A2725AA24E5AA24E5AA261187FA24E5AA24D5B13FE2603FF804A90CDFC000F13E0486D4A +5A487F486D4A5AA260B56C141F4D5AA24D5A17FF604C5B4A4990CEFC6C5D4C5A6C49EB3F +FC4A495A6C4948485A9026FE80075B270FFFC03F5B6C90B6CFFC6C5D6C15F86C6C5C011F +14C0010749D0FC9038007FE071857CDB7B>121 D E +%EndDVIPSBitmapFont +%DVIPSBitmapFont: Fo cmr17 17.28 24 +/Fo 24 118 df<120FEA3FC0EA7FE0EAFFF0A6EA7FE0EA3FC0EA0F000C0C748B24>46 +D<4AB4FC021F13F0027F13FC903901FF01FF903A03F8003F80D90FE0EB0FE049486D7E49 +486D7E49C76C7E017E140049157E0001167F49810003178049151F000717C049150F000F +17E0A3001F17F0491507A2003F17F8A54848ED03FCA700FF17FEB3A8007F17FCA56D1507 +A3003F17F8A4001F17F06D150FA2000F17E0A36C6CED1FC0A2000317806D153F00011700 +6D5D6C6C15FE017E5D017F14016D6C495A6D6C495A6D6C495AD903F8EB3F806DB448B4C7 +FC9039007FFFFC021F13F0020190C8FC37607BDD42>48 DI<4AB47E021F13F0027F13FC49B6FC902607FE007FD90FF0EB3FC04948EB +0FE049486D7E49486D7E49C76C7E484881484814004848157F18804848153F001F17C0A2 +4848151F18E0A2007F17F049150FA300FF17F8A418FC1707A618FEA2170F127FA46C7E17 +1FA2121F6D153F000F163717776C6C15676C6C15E70001ED01C77F6C6CEC03876D6CEB07 +076D6C130ED90FE0133CD907F801F813FC0101B512F06D14C0021F130091C8120FA218F8 +A418F0171FA218E0A2173F18C0A2EF7F80EA07F0486CEDFF00487E4C5A5F16035F4C5A49 +140F6C484A5A01804A5A90C8485A6C6C4AC7FC01E0495A6C6CEB07F86CB4EB3FF06C6CB5 +12C0011F91C8FC010713FC010013E037607BDD42>57 D<170FA34D7EA24D7EA34D7EA34D +7EA34C7F17DFA29338039FFC178FA29338070FFE1707040F7FEE0E03A2041E80EE1C01A2 +043C80EE3800A24C80187FA24C80183FA24B4880181F0303814C130FA203078193C71207 +A24B81030E80A24B8284A24B8284A24B82197F03F0824B153FA20201834B151FA2020383 +92B8FCA24A83A292C91207020E8385A24A8485023C84023882A20278840270177FA202F0 +844A173FA24948841A1FA24948841A0FA249CB7F1A074985865B496C85497E48486C4D7F +000F01F8051F13F0B60407B612F0A45C657DE463>65 DI +76 D80 D83 +D86 D97 DI< +4AB47E020F13F8023F13FE9139FF007F80D903FCEB07E0D907F0EB01F0D91FE0EB007849 +488049488049C87E48485D4915FF00034B138048485CA2485AA2485AA2003F6F130049EC +007C94C7FC127FA35B12FFAD127F7FA4123F7FA2001FEE01C07F000F16036D168012076C +6C15076D160000015E6C6C151E6D6C5C6D6C5C6D6C5CD90FF8495AD903FCEB07C0903A00 +FF803F8091263FFFFEC7FC020F13F80201138032417CBF3A>I101 D103 D<133C13FF487F487FA66C5B6C90C7FC133C90C8FCB3A2EB03C0 +EA07FF127FA41201EA007FA2133FB3B3AC497E497EB612E0A41B5F7DDE23>105 +D108 D110 +DII<90390780 +03F8D807FFEB0FFFB5013F13C092387C0FE0913881F01F9238E03FF00001EB838039007F +8700148FEB3F8E029CEB1FE0EE0FC00298EB030002B890C7FCA214B014F0A25CA55CB3B0 +497EEBFFF8B612FCA42C3F7CBE33>114 D<9139FFE00180010FEBFC03017FEBFF073A01 +FF001FCFD803F8EB03EFD807E0EB01FF48487F4848147F48C8123F003E151F007E150F12 +7CA200FC1507A316037EA27E7F6C7E6D91C7FC13F8EA3FFE381FFFF06CEBFF806C14F86C +14FF6C15C06C6C14F0011F80010714FED9007F7F02031480DA003F13C01503030013E016 +7F00E0ED1FF0160F17F86C15071603A36C1501A37EA26C16F016037E17E06D14076DEC0F +C06D1580D8FDF0141FD8F8F8EC7F00013E14FC3AF01FC00FF80107B512E0D8E001148027 +C0003FF8C7FC2D417DBF34>I<1438A71478A414F8A31301A31303A21307130F131FA213 +7F13FF1203000F90B6FCB8FCA3260007F8C8FCB3AE17E0AE6D6CEB01C0A316036D6C1480 +16076D6C14006E6C5A91383FC01E91381FF07C6EB45A020313E09138007F802B597FD733 +>II +E +%EndDVIPSBitmapFont +end +%%EndProlog +%%BeginSetup +%%Feature: *Resolution 600dpi +TeXDict begin +%%PaperSize: A4 + +%%EndSetup +%%Page: 1 1 +1 0 bop 1379 2031 a Fo(PSBLAS)44 b(90)1297 2214 y(P)l(arallel)i(Sparse) +1119 2397 y(Basic)d(Linear)h(Algebra)1383 2579 y(Subroutines)1405 +2762 y(V)-11 b(ersion)45 b(1.0)p eop +%%Page: 2 2 +2 1 bop 739 1188 a Fn(Con)-6 b(ten)g(ts)739 1706 y Fm(1)76 +b(F90)32 b(Data)h(Structures)1834 b(4)739 1891 y(2)76 +b(F90)32 b(PSBLAS)g(Library)1793 b(8)863 1992 y Fl(F90)p +1006 1992 25 4 v 30 w(PSAXPBY)34 b(.)42 b(.)f(.)h(.)f(.)h(.)f(.)h(.)g +(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.) +h(.)f(.)h(.)g(.)f(.)132 b(9)863 2093 y(F90)p 1006 2093 +V 30 w(PSDOT)83 b(.)42 b(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.) +h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g +(.)f(.)90 b(11)863 2194 y(F90)p 1006 2194 V 30 w(DOT)57 +b(.)42 b(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.) +h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)90 +b(13)863 2295 y(F90)p 1006 2295 V 30 w(PSAMAX)74 b(.)42 +b(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f +(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)90 +b(15)863 2396 y(F90)p 1006 2396 V 30 w(AMAX)48 b(.)41 +b(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f +(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)90 +b(17)863 2497 y(F90)p 1006 2497 V 30 w(PSASUM)25 b(.)42 +b(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h +(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)90 +b(18)863 2598 y(F90)p 1006 2598 V 30 w(PSNRM2)30 b(.)42 +b(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h +(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)90 +b(20)863 2699 y(F90)p 1006 2699 V 30 w(PSNRMI)42 b(.)g(.)g(.)f(.)h(.)f +(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.) +g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)90 b(22)863 2800 +y(F90)p 1006 2800 V 30 w(PSSPMM)81 b(.)42 b(.)f(.)h(.)f(.)h(.)f(.)h(.)g +(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.) +h(.)f(.)h(.)g(.)f(.)90 b(23)863 2901 y(F90)p 1006 2901 +V 30 w(PSSPSM)46 b(.)c(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h +(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.) +f(.)90 b(26)863 3002 y(F90)p 1006 3002 V 30 w(PSHALO)30 +b(.)42 b(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.) +f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)90 +b(30)863 3103 y(F90)p 1006 3103 V 30 w(PSO)n(VRL)33 b(.)42 +b(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h +(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)90 +b(32)739 3288 y Fm(3)76 b(T)-8 b(o)s(ols)31 b(Library)2081 +b(34)863 3389 y Fl(P)-7 b(AR)g(TITION)69 b(.)42 b(.)g(.)f(.)h(.)f(.)h +(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.) +f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)90 b(35)863 3490 y(F90)p +1006 3490 V 30 w(PSDSCALL)66 b(.)41 b(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h +(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.) +h(.)g(.)f(.)90 b(37)863 3591 y(F90)p 1006 3591 V 30 w(PSSP)-7 +b(ALL)74 b(.)42 b(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h +(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)90 +b(38)863 3692 y(F90)p 1006 3692 V 30 w(PSSPINS)30 b(.)42 +b(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h +(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)90 +b(39)863 3794 y(F90)p 1006 3794 V 30 w(PSSP)-7 b(ASB)73 +b(.)42 b(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.) +h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)90 +b(42)863 3895 y(F90)p 1006 3895 V 30 w(PSPTINS)81 b(.)42 +b(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f +(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)90 +b(44)863 3996 y(F90)p 1006 3996 V 30 w(PSPT)-7 b(ASB)59 +b(.)42 b(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.) +h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)90 +b(46)863 4097 y(F90)p 1006 4097 V 30 w(PSSPFREE)69 b(.)41 +b(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h +(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)90 b(47)863 +4198 y(F90)p 1006 4198 V 30 w(PSDSCFREE)67 b(.)42 b(.)f(.)h(.)f(.)h(.)g +(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.) +h(.)f(.)h(.)g(.)f(.)90 b(49)863 4299 y(F90)p 1006 4299 +V 30 w(PSDSALL)61 b(.)42 b(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h +(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.) +f(.)90 b(50)863 4400 y(F90)p 1006 4400 V 30 w(PSDSINS)24 +b(.)42 b(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.) +f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)90 +b(52)863 4501 y(F90)p 1006 4501 V 30 w(PSDSASB)60 b(.)42 +b(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f +(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)90 +b(55)863 4602 y(F90)p 1006 4602 V 30 w(PSDSFREE)63 b(.)41 +b(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h +(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)90 b(57)863 +4703 y(F90)p 1006 4703 V 30 w(PSSPREINIT)62 b(.)42 b(.)f(.)h(.)f(.)h(.) +g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f +(.)h(.)f(.)h(.)g(.)f(.)90 b(58)863 4804 y(F90)p 1006 +4804 V 30 w(PSSPUPD)51 b(.)42 b(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f +(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.) +g(.)f(.)90 b(59)863 4905 y(F90)p 1006 4905 V 30 w(PSCSRP)47 +b(.)42 b(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.) +f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)90 +b(62)863 5006 y(F90)p 1006 5006 V 30 w(PSGELP)40 b(.)i(.)g(.)f(.)h(.)f +(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.) +g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)90 b(64)2150 5255 +y(2)p eop +%%Page: 3 3 +3 2 bop 415 523 a Fl(F90)p 558 523 25 4 v 29 w(PSDSCREN)52 +b(.)42 b(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.) +h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)90 +b(66)415 623 y(F90)p 558 623 V 29 w(PSVERIFY)75 b(.)42 +b(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f +(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)90 b(68)415 +722 y(PSGLOB)p 764 722 V 29 w(TO)p 918 722 V 29 w(LOC)32 +b(.)41 b(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.) +f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)90 b(71)415 +822 y(PSLOC)p 700 822 V 29 w(TO)p 854 822 V 29 w(GLOB)32 +b(.)41 b(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.) +f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)90 b(73)291 +1005 y Fm(4)76 b(Metho)s(d)31 b(Library)1973 b(75)415 +1104 y Fl(PRECONDITIONER)75 b(.)41 b(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.) +g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)90 +b(76)415 1204 y(F90)p 558 1204 V 29 w(CGS)75 b(.)41 b(.)h(.)g(.)f(.)h +(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.) +h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)90 b(78)415 +1303 y(F90)p 558 1303 V 29 w(BICG)96 b(.)42 b(.)g(.)f(.)h(.)f(.)h(.)f +(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.) +h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)90 b(81)415 1403 y(F90)p +558 1403 V 29 w(BICGST)-7 b(AB)70 b(.)42 b(.)f(.)h(.)f(.)h(.)g(.)f(.)h +(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.) +h(.)g(.)f(.)h(.)90 b(84)415 1503 y(F90)p 558 1503 V 29 +w(BICGST)-7 b(ABL)83 b(.)41 b(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h +(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.)f(.)h(.)f(.)h(.)g(.)f(.)h(.) +90 b(87)1702 5255 y(3)p eop +%%Page: 4 4 +4 3 bop 739 1146 a Fk(Chapter)65 b(1)739 1561 y Fn(F90)78 +b(Data)g(Structures)739 1993 y Fl(In)38 b(this)h(c)n(hapter)e(are)g +(illustrated)h(data)f(structures)h(used)g(for)f(de\014nition)i(of)f +(routines)739 2092 y(in)n(terfaces.)58 b(This)35 b(include)g(data)g +(structure)g(for)f(sparse)g(matrix)g(and)h(comm)n(unication)739 +2192 y(informations.)55 b(These)33 b(data)h(structures)f(are)g(used)h +(for)f(calling)g(PSBLAS)h(routines)f(in)739 2291 y(F)-7 +b(ortran)23 b(90)g(language)g(and)h(will)g(b)r(e)h(used)f(to)g(next)h +(c)n(hapters)e(con)n(taining)g(these)h(callings.)739 +2391 y(Their)j(de\014nitions)h(are)e(included)j(in)e(the)h(mo)r(dules)g +Fj(TYPESP)d Fl(and)j Fj(TYPE_PSP)p Fl(.)739 2620 y Fi(Comm)m(unication) +35 b(structure)739 2774 y Fl(Ev)n(ery)c(structure)g(of)i(this)f(t)n(yp) +r(e)h(is)f(asso)r(ciated)f(to)i(a)e(sparse)g(matrix,)j(it)e(con)n +(tains)g(data)739 2873 y(ab)r(out)h(general)f(matrix)g(informations)h +(and)g(elemen)n(ts)g(to)g(b)r(e)g(exc)n(hanged)f(among)g(pro-)739 +2973 y(cesses.)k(This)27 b(structure)g(will)h(b)r(e)g(named)g(in)g +(next)f(c)n(hapters)g(as)g Fh(decomp)p 3085 2973 25 4 +v 29 w(data)p Fl(.)739 3072 y(It)j(is)f(not)g(necessary)f(for)h(the)h +(user)f(to)g(kno)n(w)g(the)g(in)n(ternal)g(structure)g(of)h +Fh(decomp)p 3395 3072 V 29 w(data)p Fl(,)739 3172 y(it)c(is)g(set)g(in) +g(fully-transparen)n(t)e(mo)r(de)i(b)n(y)g(PSBLAS-TOOLS)f(routines)g +(when)h(inserting)739 3272 y(a)h(new)h(sparse)e(matrix,)h(ho)n(w)n(ev)n +(er)f(the)i(de\014nition)g(of)f(the)h(descriptor)e(is)i(the)g(follo)n +(wing.)739 3423 y Fm(matrix)p 1025 3423 29 4 v 34 w(data)42 +b Fl(includes)19 b(general)e(information)g(ab)r(out)i(matrix)e(and)i +(BLA)n(CS)f(grid.)33 b(More)946 3523 y(precisely:)946 +3681 y Fm(matrix)p 1232 3681 V 34 w(data[DEC)p 1680 3681 +V 35 w(TYPE)p 1981 3681 V 35 w(])41 b Fl(Iden)n(ti\014es)25 +b(the)g(decomp)r(osition)f(t)n(yp)r(e)h(\(global\);)1129 +3781 y(the)30 b(actual)f(v)-5 b(alues)29 b(are)f(in)n(ternally)h +(de\014ned,)h(so)f(they)h(should)f(nev)n(er)f(b)r(e)i(ac-)1129 +3881 y(cessed)d(directly)-7 b(.)946 4006 y Fm(matrix)p +1232 4006 V 34 w(data[CTXT)p 1748 4006 V 36 w(])41 b +Fl(Comm)n(unication)18 b(con)n(text)g(as)f(returned)h(b)n(y)g(the)h +(BLA)n(CS)1129 4106 y(\(global\).)946 4231 y Fm(matrix)p +1232 4231 V 34 w(data[M)p 1566 4231 V 35 w(])41 b Fl(T)-7 +b(otal)27 b(n)n(um)n(b)r(er)g(of)h(equations)e(\(global\).)946 +4357 y Fm(matrix)p 1232 4357 V 34 w(data[N)p 1550 4357 +V 35 w(])41 b Fl(T)-7 b(otal)27 b(n)n(um)n(b)r(er)g(of)h(v)-5 +b(ariables)26 b(\(global\).)946 4482 y Fm(matrix)p 1232 +4482 V 34 w(data[N)p 1550 4482 V 35 w(R)m(O)m(W)p 1822 +4482 V 34 w(])41 b Fl(Num)n(b)r(er)25 b(of)f(grid)g(v)-5 +b(ariables)24 b(o)n(wned)g(b)n(y)g(the)h(curren)n(t)1129 +4582 y(pro)r(cess)j(\(lo)r(cal\);)h(equiv)-5 b(alen)n(t)29 +b(to)g(the)g(n)n(um)n(b)r(er)g(of)g(lo)r(cal)f(ro)n(ws)f(in)i(the)h +(sparse)1129 4681 y(co)r(e\016cien)n(t)e(matrix.)946 +4807 y Fm(matrix)p 1232 4807 V 34 w(data[N)p 1550 4807 +V 35 w(COL)p 1783 4807 V 34 w(])41 b Fl(T)-7 b(otal)28 +b(n)n(um)n(b)r(er)g(of)h(grid)e(v)-5 b(ariables)27 b(read)h(b)n(y)g +(the)h(cur-)1129 4907 y(ren)n(t)24 b(pro)r(cess)e(\(lo)r(cal\);)k +(equiv)-5 b(alen)n(t)23 b(to)h(the)g(n)n(um)n(b)r(er)g(of)g(lo)r(cal)f +(columns)h(in)g(the)1129 5006 y(sparse)i(co)r(e\016cien)n(t)i(matrix.) +36 b(They)28 b(include)g(the)g(halo.)2150 5255 y(4)p +eop +%%Page: 5 5 +5 4 bop 498 523 a Fl(Sp)r(eci\014ed)28 b(as:)37 b(a)27 +b(p)r(oin)n(ter)g(to)g(in)n(teger)g(arra)n(y)e(of)j(dimension)f(10.)291 +687 y Fm(halo)p 471 687 29 4 v 34 w(index)41 b Fl(A)32 +b(list)g(of)g(the)g(halo)f(and)h(b)r(oundary)f(elemen)n(ts)h(for)f(the) +h(curren)n(t)f(pro)r(cess)498 787 y(to)e(b)r(e)g(exc)n(hanged)e(with)j +(other)e(pro)r(cesses;)g(for)g(eac)n(h)g(pro)r(cesses)f(with)i(whic)n +(h)g(it)g(is)498 887 y(necessary)d(to)h(comm)n(unicate:)575 +1051 y(1.)41 b(Pro)r(cess)26 b(iden)n(ti\014er;)575 1182 +y(2.)41 b(Num)n(b)r(er)28 b(of)f(p)r(oin)n(ts)h(to)f(b)r(e)h(receiv)n +(ed;)575 1313 y(3.)41 b(Indices)27 b(of)h(p)r(oin)n(ts)f(to)h(b)r(e)g +(receiv)n(ed;)575 1445 y(4.)41 b(Num)n(b)r(er)28 b(of)f(p)r(oin)n(ts)h +(to)f(b)r(e)h(sen)n(t;)575 1576 y(5.)41 b(Indices)27 +b(of)h(p)r(oin)n(ts)f(to)h(b)r(e)g(sen)n(t;)498 1740 +y(The)e(list)g(ma)n(y)f(con)n(tain)h(an)f(arbitrary)f(n)n(um)n(b)r(er)i +(of)f(groups;)h(its)g(end)g(is)f(mark)n(ed)g(b)n(y)498 +1840 y(a)i(-1.)498 1939 y(Sp)r(eci\014ed)h(as:)37 b(a)27 +b(p)r(oin)n(ter)g(to)g(an)h(in)n(teger)e(arra)n(y)g(of)h(rank)g(one.) +291 2104 y Fm(o)m(vrlap)p 557 2104 V 35 w(index)41 b +Fl(A)27 b(list)g(of)f(the)h(o)n(v)n(erlap)d(elemen)n(ts)j(for)f(the)g +(curren)n(t)g(pro)r(cess,)g(organized)498 2203 y(in)i(groups)e(lik)n(e) +h(the)h(previous)f(v)n(ector:)575 2368 y(1.)41 b(Pro)r(cess)26 +b(iden)n(ti\014er;)575 2499 y(2.)41 b(Num)n(b)r(er)28 +b(of)f(p)r(oin)n(ts)h(to)f(b)r(e)h(receiv)n(ed;)575 2630 +y(3.)41 b(Indices)27 b(of)h(p)r(oin)n(ts)f(to)h(b)r(e)g(receiv)n(ed;) +575 2761 y(4.)41 b(Num)n(b)r(er)28 b(of)f(p)r(oin)n(ts)h(to)f(b)r(e)h +(sen)n(t;)575 2892 y(5.)41 b(Indices)27 b(of)h(p)r(oin)n(ts)f(to)h(b)r +(e)g(sen)n(t;)498 3057 y(The)e(list)g(ma)n(y)f(con)n(tain)h(an)f +(arbitrary)f(n)n(um)n(b)r(er)i(of)f(groups;)h(its)g(end)g(is)f(mark)n +(ed)g(b)n(y)498 3156 y(a)i(-1.)498 3256 y(Sp)r(eci\014ed)h(as:)37 +b(a)27 b(p)r(oin)n(ter)g(to)g(an)h(in)n(teger)e(arra)n(y)g(of)h(rank)g +(one.)291 3421 y Fm(o)m(vrlap)p 557 3421 V 35 w(index)41 +b Fl(F)-7 b(or)27 b(all)g(o)n(v)n(erlap)f(p)r(oin)n(ts)h(b)r(elonging)g +(to)h(th)g(ecurren)n(t)e(pro)r(cess:)575 3585 y(1.)41 +b(Ov)n(erlap)26 b(p)r(oin)n(t)h(index;)575 3716 y(2.)41 +b(Num)n(b)r(er)28 b(of)f(pro)r(cesses)f(sharing)g(that)i(o)n(v)n(erlap) +e(p)r(oin)n(ts;)498 3880 y(The)g(list)g(ma)n(y)f(con)n(tain)h(an)f +(arbitrary)f(n)n(um)n(b)r(er)i(of)f(groups;)h(its)g(end)g(is)f(mark)n +(ed)g(b)n(y)498 3980 y(a)i(-1.)498 4080 y(Sp)r(eci\014ed)h(as:)37 +b(a)27 b(p)r(oin)n(ter)g(to)g(an)h(in)n(teger)e(arra)n(y)g(of)h(rank)g +(one.)291 4244 y Fm(lo)s(c)p 417 4244 V 33 w(to)p 535 +4244 V 35 w(glob)40 b Fl(eac)n(h)29 b(elemen)n(t)h Fh(i)g +Fl(of)g(this)h(arra)n(y)c(con)n(tains)i(global)g(iden)n(ti\014er)h(of)g +(the)g(lo)r(cal)498 4344 y(v)-5 b(ariable)27 b Fh(i)p +Fl(.)498 4443 y(Sp)r(eci\014ed)h(as:)37 b(a)27 b(p)r(oin)n(ter)g(to)g +(an)h(in)n(teger)e(arra)n(y)g(of)h(rank)g(one.)291 4608 +y Fm(glob)p 473 4608 V 33 w(to)p 591 4608 V 34 w(lo)s(c)41 +b Fl(if)30 b(global)e(v)-5 b(ariable)28 b Fh(i)h Fl(is)g(read)g(b)n(y)g +(curren)n(t)f(pro)r(cess)g(then)i(elemen)n(t)g Fh(i)f +Fl(con-)498 4707 y(tains)c(lo)r(cal)g(index)h(correp)r(onding)e(to)h +(global)f(v)-5 b(ariable)25 b Fh(i)p Fl(;)h(else)f(elemen)n(t)h +Fh(i)f Fl(con)n(tains)498 4807 y Fg(\000)p Fl(\()p Fh(np)12 +b Fl(+)g Fh(p)g Fl(+)g(1\))21 b(where)j Fh(np)g Fl(is)g(the)g(total)g +(n)n(um)n(b)r(er)g(of)g(pro)r(cesses)f(and)h Fh(p)g Fl(is)g(the)h +(index)498 4907 y(of)j(the)g(pro)r(cess)e(o)n(wning)h(global)f(index)i +Fh(i)p Fl(.)498 5006 y(Sp)r(eci\014ed)g(as:)37 b(a)27 +b(p)r(oin)n(ter)g(to)g(an)h(in)n(teger)e(arra)n(y)g(of)h(rank)g(one.) +1702 5255 y(5)p eop +%%Page: 6 6 +6 5 bop 739 523 a Fl(F)n(OR)-7 b(TRAN90)33 b(in)n(terface)g(for)g +Fh(decomp)p 2026 523 25 4 v 30 w(data)h Fl(structures)f(is)g(therefore) +g(de\014ned)h(as)g(fol-)739 623 y(lo)n(ws:)782 790 y +Fj(TYPE)42 b(DECOMP_DATA_TYPE)957 890 y(INTEGER,)e(POINTER)h(::)i +(MATRIX_DATA\(:\))957 989 y(INTEGER,)d(POINTER)h(::)i(HALO_INDEX\(:\)) +957 1089 y(INTEGER,)d(POINTER)h(::)i(OVRLAP_ELEM\(:\))957 +1188 y(INTEGER,)d(POINTER)h(::)i(OVRLAP_INDEX\(:\))957 +1288 y(INTEGER,)d(POINTER)h(::)i(LOC_TO_GLOB\(:\))957 +1388 y(INTEGER,)d(POINTER)h(::)i(GLOB_TO_LOC)c(\(:\))826 +1487 y(END)j(TYPE)g(DECOMP_DATA_TYPE)739 1721 y Fi(Sparse)c(matrix)e +(structure)739 1875 y Fl(Con)n(tains)21 b(all)g(information)g(ab)r(out) +h(lo)r(cal)f(p)r(ortion)g(of)h(the)g(sparse)f(matrix)g(and)g(its)h +(storage)739 1975 y(mo)r(de.)35 b(Man)n(y)20 b(of)h(this)g(\014elds)g +(are)f(set)h(in)g(fully-transparen)n(t)e(mo)r(de)i(b)n(y)g +(PSBLAS-TOOLS)739 2074 y(routines)34 b(when)h(inserting)f(a)g(new)h +(sparse)e(matrix,)k(user)d(m)n(ust)g(set)h(only)g(\014elds)f(whic)n(h) +739 2174 y(describ)r(e)27 b(matrix)g(storage)f(mo)r(de)i(\(see)f +Fg(x)h Fl(3\).)739 2274 y(Fields)g(con)n(tained)f(in)g(Sparse)g(matrix) +g(structures)g(are:)739 2441 y Fm(ASPK)42 b Fl(Con)n(tains)26 +b(v)-5 b(alues)28 b(of)f(the)h(lo)r(cal)f(distributed)h(sparse)e +(matrix.)946 2540 y(Sp)r(eci\014ed)33 b(as:)44 b(a)32 +b(p)r(oin)n(ter)f(to)g(an)h(arra)n(y)d(of)j(rank)f(one)g(of)h(t)n(yp)r +(e)g(corresp)r(onding)d(to)946 2640 y(matrix)f(en)n(tries)e(t)n(yp)r(e) +i(.)739 2807 y Fm(IA1)42 b Fl(Holds)31 b(in)n(teger)f(information)h(on) +g(distributed)h(sparse)e(matrix.)48 b(Actual)32 b(informa-)946 +2907 y(tion)c(will)g(dep)r(end)g(on)g(data)f(format)g(used.)946 +3006 y(Sp)r(eci\014ed)i(as:)36 b(a)27 b(p)r(oin)n(ter)g(to)h(an)f(in)n +(teger)g(arra)n(y)e(of)j(rank)e(one.)739 3174 y Fm(IA2)42 +b Fl(Holds)31 b(in)n(teger)f(information)h(on)g(distributed)h(sparse)e +(matrix.)48 b(Actual)32 b(informa-)946 3273 y(tion)c(will)g(dep)r(end)g +(on)g(data)f(format)g(used.)946 3373 y(Sp)r(eci\014ed)i(as:)36 +b(a)27 b(p)r(oin)n(ter)g(to)h(an)f(in)n(teger)g(arra)n(y)e(of)j(rank)e +(one.)739 3540 y Fm(INF)m(O)m(A)42 b Fl(On)24 b(en)n(try)f(can)h(hold)g +(auxiliary)f(information)g(on)h(distributed)h(sparse)e(matrix.)946 +3640 y(Actual)28 b(information)f(will)h(dep)r(end)g(on)g(data)f(format) +g(used.)946 3739 y(Sp)r(eci\014ed)i(as:)36 b(in)n(teger)27 +b(arra)n(y)e(of)i(length)h(10.)739 3907 y Fm(FID)m(A)42 +b Fl(De\014nes)28 b(the)g(format)f(of)h(the)g(distributed)g(sparse)e +(matrix.)946 4006 y(Sp)r(eci\014ed)j(as:)36 b(a)27 b(string)g(of)h +(length)f(5)739 4174 y Fm(DESCRA)41 b Fl(Describ)r(e)28 +b(the)g(c)n(haracteristic)d(of)j(the)g(distributed)g(sparse)e(matrix.) +946 4273 y(Sp)r(eci\014ed)j(as:)36 b(arra)n(y)25 b(of)j(c)n(haracter)d +(of)j(length)f(9.)739 4440 y Fm(PL)42 b Fl(Sp)r(eci\014es)30 +b(the)g(lo)r(cal)e(ro)n(w)h(p)r(erm)n(utation)g(of)g(distributed)h +(sparse)e(matrix.)42 b(If)30 b(PL\(1\))946 4540 y(is)e(equal)f(to)g(0,) +h(then)g(there)f(isn't)h(ro)n(w)f(p)r(erm)n(utation.)946 +4640 y(Sp)r(eci\014ed)39 b(as:)55 b(p)r(oin)n(ter)37 +b(to)h(in)n(teger)e(arra)n(y)f(of)j(dimension)f(equal)g(to)g(n)n(um)n +(b)r(er)g(of)946 4739 y(lo)r(cal)27 b(ro)n(w)g(\(matrix)p +1582 4739 V 29 w(data[N)p 1858 4739 V 30 w(R)n(O)n(W)p +2095 4739 V 29 w(]\))739 4907 y Fm(PR)41 b Fl(Sp)r(eci\014es)e(the)g +(lo)r(cal)e(column)i(p)r(erm)n(utation)f(of)g(distributed)h(sparse)e +(matrix.)69 b(If)946 5006 y(PR\(1\))28 b(is)f(equal)g(to)h(0,)f(then)h +(there)g(isn't)g(columnm)f(p)r(erm)n(utation.)2150 5255 +y(6)p eop +%%Page: 7 7 +7 6 bop 498 523 a Fl(Sp)r(eci\014ed)38 b(as:)56 b(p)r(oin)n(ter)37 +b(to)g(in)n(teger)g(arra)n(y)e(of)i(dimension)h(equal)f(to)g(n)n(um)n +(b)r(er)g(of)498 623 y(lo)r(cal)27 b(ro)n(w)g(\(matrix)p +1134 623 25 4 v 29 w(data[N)p 1410 623 V 30 w(COL)p 1617 +623 V 29 w(]\))291 789 y Fm(M)41 b Fl(Num)n(b)r(er)19 +b(of)g(ro)n(ws;)i(if)f(ro)n(w)d(indices)j(are)e(stored)g(explicitly)-7 +b(,)21 b(as)e(in)g(Co)r(ordinate)f(Storage,)498 888 y(should)g(b)r(e)h +(greater)e(than)i(or)f(equal)g(to)g(the)h(maxim)n(um)g(ro)n(w)e(index)i +(actually)f(presen)n(t)498 988 y(in)28 b(the)g(sparse)e(matrix.)37 +b(Sp)r(eci\014ed)28 b(as:)36 b(in)n(teger)27 b(v)-5 b(ariable.)291 +1154 y Fm(K)41 b Fl(Num)n(b)r(er)26 b(of)f(columns;)i(if)f(column)f +(indices)h(are)f(stored)g(explicitly)-7 b(,)26 b(as)f(in)h(Co)r +(ordinate)498 1254 y(Storage)17 b(or)h(Compressed)g(Sparse)g(Ro)n(ws,)h +(should)g(b)r(e)g(greater)e(than)i(or)f(equal)g(to)h(the)498 +1353 y(maxim)n(um)31 b(column)g(index)g(actually)f(presen)n(t)h(in)g +(the)g(sparse)f(matrix.)46 b(Sp)r(eci\014ed)498 1453 +y(as:)36 b(in)n(teger)27 b(v)-5 b(ariable.)291 1619 y(V)e(alues)27 +b(assumed)g(b)n(y)g(this)h(\014elds)g(are)e(compatible)i(with)g(ref.)37 +b(1)27 b(\(see)g Fg(x)h Fl(4\).)291 1719 y(F)n(OR)-7 +b(TRAN90)25 b(in)n(terface)h(for)g(distributed)h(sparse)e(matrices)h +(con)n(taining)f(double)i(preci-)291 1818 y(sion)g(real)f(en)n(tries)h +(is)h(de\014ned)g(as)f(follo)n(ws:)291 1984 y Fj(TYPE)42 +b(D_SPMAT)509 2084 y(INTEGER)476 b(::)43 b(M,)g(K)509 +2183 y(CHARACTER\(LEN=5)o(\))81 b(::)43 b(FIDA)509 2283 +y(CHARACTER\(LEN=1)o(1\))37 b(::)43 b(DESCRA)509 2383 +y(INTEGER)476 b(::)43 b(INFOA\(10\))509 2482 y(REAL\(KIND\(1.D0\))o +(\),)37 b(POINTER)k(::)i(ASPK\(:\))509 2582 y(INTEGER,)d(POINTER)g(::)j +(IA1\(:\),)e(IA2\(:\))509 2682 y(INTEGER,)f(POINTER)g(::)j(PL\(:\),)e +(PR\(:\))291 2781 y(END)h(TYPE)g(D_SPMAT)291 2947 y Fl(The)27 +b(follo)n(wing)g(t)n(w)n(o)g(cases)f(are)h(used)g(in)h(the)g(data)f +(insertion)g(routines:)291 3113 y Fm(FID)m(A=\\CSR")42 +b Fl(Compressed)34 b(storage)e(ro)n(ws.)57 b(In)35 b(this)g(case)f(the) +h(follo)n(wing)f(should)498 3213 y(hold:)575 3379 y(1.)41 +b Fj(IA2\(I\))d Fl(con)n(tains)i(the)h(index)f(of)h(the)g(\014rst)f +(elemen)n(t)h(of)g(ro)n(w)e Fj(I)p Fl(;)h(the)h(last)681 +3479 y(elemen)n(t)20 b(of)g(the)g(sparse)f(matrix)g(is)h(th)n(us)g +(stored)f(at)h(index)g Fh(I)7 b(A)p Fl(2\()p Fh(M)12 +b Fl(+)s(1\))s Fg(\000)s Fl(1.)33 b(It)681 3578 y(should)20 +b(con)n(tain)g Fj(M+1)f Fl(en)n(tries)h(in)h(nondecreasing)e(order)g +(\(strictly)h(increasing,)681 3678 y(if)28 b(there)f(are)g(no)g(empt)n +(y)h(ro)n(ws\).)575 3811 y(2.)41 b Fj(IA1\(J\))28 b Fl(con)n(tains)j +(the)g(column)g(index)g(and)g Fj(ASPK\(J\))d Fl(con)n(tains)i(the)i +(corre-)681 3910 y(sp)r(onding)27 b(co)r(e\016cien)n(t)h(v)-5 +b(alue,)27 b(for)g(all)g Fh(I)7 b(A)p Fl(2\(1\))23 b +Fg(\024)g Fh(J)31 b Fg(\024)23 b Fh(I)7 b(A)p Fl(2\()p +Fh(M)27 b Fl(+)18 b(1\))g Fg(\000)g Fl(1.)291 4076 y +Fm(FID)m(A=\\COO")42 b Fl(Co)r(ordinate)26 b(storage.)35 +b(In)28 b(this)g(case)f(the)h(follo)n(wing)e(should)i(hold:)575 +4242 y(1.)41 b Fj(INFOA\(1\))24 b Fl(con)n(tains)j(the)h(n)n(um)n(b)r +(er)f(of)h(nonzero)e(elemen)n(ts)h(in)h(the)g(matrix;)575 +4375 y(2.)41 b(F)-7 b(or)31 b(all)g(1)f Fg(\024)g Fh(J)38 +b Fg(\024)29 b Fh(I)7 b(N)i(F)j(O)r(A)p Fl(\(1\),)34 +b(the)e(co)r(e\016cien)n(t,)h(ro)n(w)d(index)i(and)g(column)681 +4475 y(index)27 b(are)g(stored)g(in)n(to)g Fj(APSK\(J\))p +Fl(,)e Fj(IA1\(J\))g Fl(and)j Fj(IA2\(J\))d Fl(resp)r(ectiv)n(ely)-7 +b(.)1702 5255 y(7)p eop +%%Page: 8 8 +8 7 bop 739 1146 a Fk(Chapter)65 b(2)739 1561 y Fn(F90)78 +b(PSBLAS)g(Library)739 1993 y Fl(Routines)20 b(in)g(this)h(c)n(hapter)e +(pro)n(vide)g(a)h(con)n(v)n(enien)n(t)f(F90)g(in)n(terface)g(to)h(the)h +(computational)739 2092 y(k)n(ernels)37 b(of)h(PSBLAS;)f(they)h(hide)h +(most)e(of)h(the)h(details)e(dep)r(ending)h(on)g(the)g(parallel)739 +2192 y(programming)16 b(en)n(vironmen)n(t)i(and)g(on)h(sparse)e(matrix) +h(represen)n(tation.)32 b(Their)18 b(in)n(terfaces)739 +2291 y(are)26 b(de\014ned)i(in)g(the)g(mo)r(dule)g Fj(F90PSBLAS)p +Fl(.)739 2524 y Fi(Con)m(v)m(en)m(tions)37 b(on)h(routines)e(name)739 +2677 y Fl(The)28 b(name)f(of)g(a)h(subroutine)f(follo)n(w)g(these)g +(con)n(v)n(en)n(tions:)840 2843 y(1.)41 b(First)28 b(\014v)n(e)f(c)n +(haracters)e(are)i Fm(F90)p 1997 2843 29 4 v 34 w(PS)h +Fl(\(that)g(stands)f(for)g Ff(Par)l(al)t(lel)32 b(Sp)l(arse)p +Fl(\))840 3009 y(2.)41 b(Last)28 b(c)n(haracters)d(describ)r(es)i(the)h +(function)g(p)r(erformed)f(b)n(y)g(subroutine.)946 3142 +y(The)20 b(in)n(terfaces)f(are)g(constructed)g(so)h(that)g(the)g +(compiler)f(substitute)i(all)e(in)n(terfaces)946 3242 +y(with)29 b(the)f(e\013ectiv)n(e)f(routines)g(accordingly)f(with)i(t)n +(yp)r(e)g(of)f(parameters.)2150 5255 y(8)p eop +%%Page: 9 9 +9 8 bop 291 752 a Fe(F90)p 518 752 41 4 v 48 w(PSAXPBY|General)44 +b(Dense)h(Matrix)h(Sum)291 1013 y Fl(This)36 b(subroutine)f(is)h(an)g +(in)n(terface)g(to)g(the)g(computational)g(k)n(ernel)f(for)g(dense)h +(matrix)291 1112 y(sum:)1441 1222 y Fh(Y)42 b Fg( )23 +b Fh(\013)c(X)25 b Fl(+)18 b Fh(\014)t(Y)291 1381 y Fl(where:)291 +1557 y Fh(X)47 b Fl(represen)n(ts)27 b(the)g(global)g(dense)g +(submatrix)g Fh(X)1876 1569 y Fd(:)p Fc(;j)s(x)p Fd(:)p +Fc(j)s(x)p Fd(+)p Fc(n)p Fb(\000)p Fd(1)291 1736 y Fh(Y)60 +b Fl(represen)n(ts)26 b(the)i(global)e(dense)i(submatrix)f +Fh(Y)1847 1748 y Fd(:)p Fc(;j)s(y)r Fd(:)p Fc(j)s(y)r +Fd(+)p Fc(n)p Fb(\000)p Fd(1)291 2028 y Fe(Syn)l(tax)723 +2212 y Fl(CALL)h(F90)p 1120 2212 25 4 v 29 w(PSAXPBY)f(\()p +Ff(alpha,)33 b(x,)d(b)l(eta,)g(y,)g(de)l(c)l(omp)p 2508 +2212 26 4 v 32 w(data)p Fl(\))552 2412 y(CALL)e(F90)p +949 2412 25 4 v 29 w(PSAXPBY)g(\()p Ff(alpha,)k(x,)e(b)l(eta,)g(y,)g +(de)l(c)l(omp)p 2337 2412 26 4 v 32 w(data,)h(n,)f(jx,)g(jy)p +Fl(\))p 894 2641 1658 4 v 944 2711 a Fh(X)7 b Fl(,)27 +b Fh(Y)19 b Fl(,)27 b Fh(\013)p Fl(,)h Fh(\014)592 b +Fm(Subroutine)p 894 2744 V 944 2814 a Fl(Long)27 b(Precision)f(Real)257 +b(F90)p 2073 2814 25 4 v 29 w(PSAXPBY)944 2914 y(Long)27 +b(Precision)f(Complex)99 b(F90)p 2073 2914 V 29 w(PSAXPBY)p +894 2947 1658 4 v 1326 3179 a(T)-7 b(able)28 b(2.1:)36 +b(Data)27 b(t)n(yp)r(es)291 3473 y Fm(On)k(En)m(try)291 +3652 y(alpha)42 b Fl(the)27 b(scalar)f Fh(\013)p Fl(.)498 +3751 y(Scop)r(e:)37 b Fm(global)498 3851 y Fl(T)n(yp)r(e:)g +Fm(required)498 3951 y Fl(Sp)r(eci\014ed)28 b(as:)37 +b(a)27 b(n)n(um)n(b)r(er)g(of)h(the)g(data)f(t)n(yp)r(e)h(indicated)f +(in)h(T)-7 b(able)27 b(2.1.)291 4130 y Fm(x)41 b Fl(the)28 +b(lo)r(cal)f(p)r(ortion)h(of)f(global)g(dense)g(matrix)g +Fh(X)7 b Fl(.)498 4229 y(Scop)r(e:)37 b Fm(lo)s(cal)498 +4329 y Fl(T)n(yp)r(e:)g Fm(required)498 4429 y Fl(Sp)r(eci\014ed)25 +b(as:)34 b(a)24 b(rank)f(one)g(or)g(t)n(w)n(o)h(arra)n(y)d(con)n +(taining)i(n)n(um)n(b)r(ers)h(of)f(t)n(yp)r(e)i(sp)r(eci\014ed)498 +4528 y(in)j(T)-7 b(able)27 b(2.1.)36 b(The)28 b(rank)f(of)g +Fh(x)h Fl(m)n(ust)g(b)r(e)g(the)g(same)f(of)h Fh(y)s +Fl(.)291 4707 y Fm(b)s(eta)41 b Fl(the)28 b(scalar)e +Fh(\014)t Fl(.)498 4807 y(Scop)r(e:)37 b Fm(global)498 +4907 y Fl(T)n(yp)r(e:)g Fm(required)498 5006 y Fl(Sp)r(eci\014ed)28 +b(as:)37 b(a)27 b(n)n(um)n(b)r(er)g(of)h(the)g(data)f(t)n(yp)r(e)h +(indicated)f(in)h(T)-7 b(able)27 b(2.1.)1702 5255 y(9)p +eop +%%Page: 10 10 +10 9 bop 739 523 a Fm(y)42 b Fl(the)28 b(lo)r(cal)f(p)r(ortion)g(of)g +(the)h(global)f(dense)g(matrix)g Fh(Y)19 b Fl(.)946 623 +y(Scop)r(e:)37 b Fm(lo)s(cal)946 722 y Fl(T)n(yp)r(e:)g +Fm(required)946 822 y Fl(Sp)r(eci\014ed)d(as:)46 b(a)32 +b(rank)g(one)g(or)f(t)n(w)n(o)h(arra)n(y)e(with)j(the)g(POINTER)f +(attributecon-)946 922 y(taining)g(n)n(um)n(b)r(ers)f(of)g(the)h(t)n +(yp)r(e)g(indicated)f(in)h(T)-7 b(able)31 b(2.1.)48 b(The)32 +b(rank)e(of)i Fh(y)i Fl(m)n(ust)946 1021 y(b)r(e)28 b(the)g(same)f(of)h +Fh(x)p Fl(.)739 1187 y Fm(decomp)p 1066 1187 29 4 v 33 +w(data)42 b Fl(con)n(tains)27 b(data)g(structures)g(for)g(comm)n +(unications.)946 1287 y(Scop)r(e:)37 b Fm(lo)s(cal)946 +1386 y Fl(T)n(yp)r(e:)g Fm(required)946 1486 y Fl(Sp)r(eci\014ed)29 +b(as:)36 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g Fg(x)f Fl(1.)739 1652 y Fm(n)41 b Fl(n)n(um)n(b)r(er)28 +b(of)f(columns)h(in)g(dense)f(submatrices)g Fh(X)34 b +Fl(and)27 b Fh(Y)19 b Fl(.)946 1752 y(Scop)r(e:)37 b +Fm(global)946 1851 y Fl(T)n(yp)r(e:)g Fm(optional)p Fl(;)27 +b(can)g(only)g(b)r(e)h(presen)n(t)f(if)i Fh(x)f Fl(and)f +Fh(y)j Fl(are)d(of)h(rank)e(2.)946 1951 y(Default:)38 +b Fj(min\(size\(X,2\),si)o(ze\()o(Y,)o(2\)\))o Fl(.)946 +2051 y(Sp)r(eci\014ed)29 b(as:)36 b(an)27 b(in)n(teger)g(v)-5 +b(ariable)26 b Fh(n)d Fg(\025)g Fl(0.)739 2217 y Fm(jx)42 +b Fl(the)27 b(column)g(index)g(of)g(the)g(global)f(dense)h(matrix)f +Fh(X)7 b Fl(,)26 b(iden)n(tifying)h(the)h(\014rst)e(column)946 +2316 y(of)i(the)g(submatrix)f Fh(X)7 b Fl(.)946 2416 +y(Scop)r(e:)37 b Fm(global)946 2516 y Fl(T)n(yp)r(e:)g +Fm(optional)p Fl(;)27 b(can)g(only)g(b)r(e)h(presen)n(t)f(if)i +Fh(x)f Fl(and)f Fh(y)j Fl(are)d(of)h(rank)e(2.)946 2615 +y(Default:)38 b Fh(j)5 b(x)24 b Fl(=)e(1.)946 2715 y(Sp)r(eci\014ed)29 +b(as:)36 b(an)27 b(in)n(teger)g(v)-5 b(ariable)26 b Fh(j)5 +b(x)24 b Fg(\025)f Fl(1.)739 2881 y Fm(jy)42 b Fl(the)28 +b(column)g(index)f(of)h(the)g(global)e(dense)i(matrix)f +Fh(Y)18 b Fl(,)28 b(iden)n(tifying)g(the)g(\014rst)f(column)946 +2980 y(of)h(the)g(submatrix)f Fh(Y)19 b Fl(.)946 3080 +y(Scop)r(e:)37 b Fm(global)946 3180 y Fl(T)n(yp)r(e:)g +Fm(optional)p Fl(;)27 b(can)g(only)g(b)r(e)h(presen)n(t)f(if)i +Fh(x)f Fl(and)f Fh(y)j Fl(are)d(of)h(rank)e(2.)946 3279 +y(Default:)38 b Fh(j)5 b(y)26 b Fl(=)c(1.)946 3379 y(Sp)r(eci\014ed)29 +b(as:)36 b(an)27 b(in)n(teger)g(v)-5 b(ariable)26 b Fh(j)5 +b(y)26 b Fg(\025)d Fl(1.)739 3562 y Fm(On)31 b(Return)739 +3728 y(y)42 b Fl(the)28 b(lo)r(cal)f(p)r(ortion)g(of)g(result)h +(submatrix)f Fh(Y)18 b Fl(.)946 3827 y(Scop)r(e:)37 b +Fm(lo)s(cal)946 3927 y Fl(T)n(yp)r(e:)g Fm(required)946 +4027 y Fl(Sp)r(eci\014ed)j(as:)60 b(a)38 b(rank)g(one)h(or)f(t)n(w)n(o) +h(arra)n(y)e(con)n(taining)h(n)n(um)n(b)r(ers)g(of)h(the)h(t)n(yp)r(e) +946 4126 y(indicated)28 b(in)g(T)-7 b(able)27 b(2.1.)2130 +5255 y(10)p eop +%%Page: 11 11 +11 10 bop 291 734 a Fe(F90)p 518 734 41 4 v 48 w(PSDOT|Dot)45 +b(Pro)t(duct)291 977 y Fl(This)27 b(function)h(computes)g(dot)f(pro)r +(duct)h(b)r(et)n(w)n(een)f(t)n(w)n(o)g(v)n(ectors)f Fh(X)34 +b Fl(and)28 b Fh(Y)18 b Fl(.)291 1077 y(If)28 b Fh(X)34 +b Fl(and)27 b Fh(Y)46 b Fl(are)27 b(double)g(precision)g(or)g(complex)g +(v)n(ectors)f(computes)h(dot-pro)r(duct)h(as:)1504 1254 +y Fh(dot)c Fg( )f Fh(X)1823 1219 y Fc(T)1874 1254 y Fh(Y)291 +1430 y Fl(Else)18 b(if)i Fh(X)26 b Fl(and)19 b Fh(Y)39 +b Fl(are)18 b(double)i(precision)e(complex)h(v)n(ectors)f(then)i +(computes)f(dot-pro)r(duct)291 1530 y(as:)1499 1630 y +Fh(dot)k Fg( )h Fh(X)1818 1595 y Fc(H)1880 1630 y Fh(Y)291 +1775 y Fl(where:)291 1937 y Fh(X)47 b Fl(represen)n(ts)27 +b(the)g(global)g(sub)n(v)n(ector)f Fh(X)1627 1949 y Fd(:)p +Fc(;j)s(x)291 2100 y Fh(Y)60 b Fl(represen)n(ts)26 b(the)i(global)e +(sub)n(v)n(ector)g Fh(Y)1597 2112 y Fd(:)p Fc(;j)s(y)291 +2374 y Fe(Syn)l(tax)1129 2555 y Fl(F90)p 1272 2555 25 +4 v 29 w(PSDOT)h(\()p Ff(x,)j(y,)h(de)l(c)l(omp)p 2103 +2555 26 4 v 31 w(data)p Fl(\))1009 2755 y(F90)p 1152 +2755 25 4 v 29 w(PSDOT)c(\()p Ff(x,)j(y,)h(de)l(c)l(omp)p +1983 2755 26 4 v 31 w(data,)g(jx,)f(jy)p Fl(\))p 951 +2976 1544 4 v 1001 3045 a Fh(dot)p Fl(,)e Fh(X)7 b Fl(,)27 +b Fh(Y)648 b Fm(F)-8 b(unction)p 951 3079 V 1001 3148 +a Fl(Long)27 b(Precision)f(Real)257 b(F90)p 2130 3148 +25 4 v 29 w(PSDOT)1001 3248 y(Long)27 b(Precision)f(Complex)99 +b(F90)p 2130 3248 V 29 w(PSDOT)p 951 3281 1544 4 v 1326 +3513 a(T)-7 b(able)28 b(2.2:)36 b(Data)27 b(t)n(yp)r(es)291 +3782 y Fm(On)k(En)m(try)291 3946 y(x)41 b Fl(the)h(lo)r(cal)e(p)r +(ortion)g(of)g(global)g(dense)h(matrix)f Fh(X)7 b Fl(.)75 +b(This)41 b(function)g(computes)g(the)498 4046 y(lo)r(cation)30 +b(of)h(the)g(\014rst)g(elemen)n(t)g(of)f(lo)r(cal)h(subarra)n(y)d +(used,)k(based)e(on)h Fh(j)5 b(x)31 b Fl(and)g(the)498 +4145 y(\014eld)d Fh(matr)r(ix)p 945 4145 25 4 v 30 w(data)g +Fl(of)g Fh(decomp)p 1532 4145 V 29 w(data)g Fl(.)498 +4245 y(Scop)r(e:)37 b Fm(lo)s(cal)498 4344 y Fl(T)n(yp)r(e:)g +Fm(required)498 4444 y Fl(Sp)r(eci\014ed)28 b(as:)36 +b(a)27 b(p)r(oin)n(ter)g(to)h(arra)n(y)d(of)i(rank)f(one)i(or)e(t)n(w)n +(o)h(con)n(taining)f(n)n(um)n(b)r(ers)h(of)498 4544 y(t)n(yp)r(e)h(sp)r +(eci\014ed)g(in)g(T)-7 b(able)27 b(2.2.)36 b(The)28 b(rank)e(of)i +Fh(x)g Fl(m)n(ust)g(b)r(e)g(the)g(same)f(of)g Fh(y)s +Fl(.)291 4707 y Fm(y)41 b Fl(the)h(lo)r(cal)f(p)r(ortion)g(of)g(global) +f(dense)i(matrix)e Fh(Y)19 b Fl(.)78 b(This)41 b(function)h(computes)g +(the)498 4807 y(lo)r(cation)34 b(of)g(the)g(\014rst)g(elemen)n(t)g(of)g +(lo)r(cal)g(subarra)n(y)e(used,)k(based)d(on)h Fh(iy)s(;)14 +b(j)5 b(y)36 b Fl(and)498 4907 y(the)28 b(\014eld)g Fh(matr)r(ix)p +1088 4907 V 30 w(data)g Fl(of)g Fh(decomp)p 1675 4907 +V 29 w(data)g Fl(.)498 5006 y(Scop)r(e:)37 b Fm(lo)s(cal)1681 +5255 y Fl(11)p eop +%%Page: 12 12 +12 11 bop 946 523 a Fl(T)n(yp)r(e:)37 b Fm(required)946 +623 y Fl(Sp)r(eci\014ed)28 b(as:)37 b(a)27 b(p)r(oin)n(ter)g(to)g(arra) +n(y)e(of)i(rank)g(one)g(or)f(t)n(w)n(o)h(con)n(taining)f(n)n(um)n(b)r +(ers)h(of)946 722 y(t)n(yp)r(e)h(sp)r(eci\014ed)g(in)g(T)-7 +b(able)27 b(2.2.)36 b(The)28 b(rank)f(of)g Fh(y)k Fl(m)n(ust)c(b)r(e)h +(the)g(same)f(of)h Fh(x)p Fl(.)739 888 y Fm(decomp)p +1066 888 29 4 v 33 w(data)42 b Fl(con)n(tains)27 b(data)g(structures)g +(for)g(comm)n(unications.)946 988 y(Scop)r(e:)37 b Fm(lo)s(cal)946 +1088 y Fl(T)n(yp)r(e:)g Fm(required)946 1187 y Fl(Sp)r(eci\014ed)29 +b(as:)36 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g Fg(x)f Fl(1.)739 1353 y Fm(jx)42 b Fl(the)30 b(column)h(index)f(of)g +(global)f(dense)h(matrix)f Fh(X)7 b Fl(,)31 b(iden)n(tifying)f(the)g +(column)g(of)g(sub-)946 1453 y(v)n(ector)d Fh(X)7 b Fl(.)946 +1553 y(Scop)r(e:)37 b Fm(global)946 1652 y Fl(T)n(yp)r(e:)g +Fm(optional)p Fl(;)27 b(can)g(only)g(b)r(e)h(presen)n(t)f(if)i +Fh(x)f Fl(and)f Fh(y)j Fl(are)d(of)h(rank)e(2.)946 1752 +y(Default:)38 b Fh(j)5 b(x)24 b Fl(=)e(1.)739 2017 y +Fm(jy)42 b Fl(the)31 b(column)g(index)g(of)g(global)e(dense)i(matrix)f +Fh(Y)19 b Fl(,)32 b(iden)n(tifying)f(the)g(column)g(of)f(sub-)946 +2117 y(v)n(ector)d Fh(Y)18 b Fl(.)946 2217 y(Scop)r(e:)37 +b Fm(global)946 2316 y Fl(T)n(yp)r(e:)g Fm(optional)p +Fl(;)27 b(can)g(only)g(b)r(e)h(presen)n(t)f(if)i Fh(x)f +Fl(and)f Fh(y)j Fl(are)d(of)h(rank)e(2.)946 2416 y(Default:)38 +b Fh(j)5 b(y)26 b Fl(=)c(1.)946 2516 y(Sp)r(eci\014ed)29 +b(as:)36 b(an)27 b(in)n(teger)g(v)-5 b(ariable)26 b Fh(j)5 +b(y)26 b Fg(\025)d Fl(1.)739 2682 y Fm(On)31 b(Return)739 +2848 y(F)-8 b(unction)32 b(v)-5 b(alue)41 b Fl(is)28 +b(the)g(dot)f(pro)r(duct)h(of)f(sub)n(v)n(ectors)f Fh(X)34 +b Fl(and)27 b Fh(Y)19 b Fl(.)946 2947 y(Scop)r(e:)37 +b Fm(global)946 3047 y Fl(Sp)r(eci\014ed)29 b(as:)36 +b(a)27 b(n)n(um)n(b)r(er)g(of)h(the)g(data)f(t)n(yp)r(e)h(indicated)g +(in)f(T)-7 b(able)28 b(2.2.)2130 5255 y(12)p eop +%%Page: 13 13 +13 12 bop 291 749 a Fe(F90)p 518 749 41 4 v 48 w(DOT|Generalized)46 +b(Dot)g(Pro)t(duct)291 1006 y Fl(This)32 b(subroutine)g(computes)g(a)g +(series)g(of)g(dot)h(pro)r(ducts)f(among)f(the)i(columns)f(of)h(t)n(w)n +(o)291 1106 y(dense)27 b(matrices)g Fh(X)34 b Fl(and)27 +b Fh(Y)19 b Fl(:)1303 1293 y Fh(r)r(es)p Fl(\()p Fh(i)p +Fl(\))k Fg( )g Fh(X)7 b Fl(\(:)p Fh(;)14 b(i)p Fl(\))1871 +1259 y Fc(T)1923 1293 y Fh(Y)19 b Fl(\(:)p Fh(;)14 b(i)p +Fl(\))291 1481 y(If)20 b(the)h(matrices)e(are)h(complex,)h(then)g(the)f +(usual)g(con)n(v)n(en)n(tion)f(applies,)i(i.e.)35 b(the)21 +b(conjugate)291 1581 y(transp)r(ose)k(of)i Fh(X)34 b +Fl(is)27 b(used.)36 b(If)28 b Fh(X)33 b Fl(and)27 b Fh(Y)45 +b Fl(are)26 b(of)h(rank)f(one,)h(then)h Fh(r)r(es)f Fl(is)g(a)f +(scalar,)g(else)g(it)291 1680 y(is)h(a)g(rank)g(one)g(arra)n(y)-7 +b(.)291 1968 y Fe(Syn)l(tax)1101 2152 y Fl(F90)p 1244 +2152 25 4 v 29 w(DOT)28 b(\()p Ff(r)l(es,)i(x,)g(y,)g(de)l(c)l(omp)p +2130 2152 26 4 v 32 w(data)p Fl(\))p 950 2380 1546 4 +v 1000 2450 a Fh(r)r(es)p Fl(,)e Fh(X)7 b Fl(,)27 b Fh(Y)644 +b Fm(Subroutine)p 950 2483 V 1000 2553 a Fl(Long)27 b(Precision)f(Real) +257 b(F90)p 2129 2553 25 4 v 29 w(DOT)1000 2652 y(Long)27 +b(Precision)f(Complex)99 b(F90)p 2129 2652 V 29 w(DOT)p +950 2685 1546 4 v 1326 2917 a(T)-7 b(able)28 b(2.3:)36 +b(Data)27 b(t)n(yp)r(es)291 3207 y Fm(On)k(En)m(try)291 +3383 y(x)41 b Fl(the)28 b(lo)r(cal)f(p)r(ortion)h(of)f(global)g(dense)g +(matrix)g Fh(X)7 b Fl(.)498 3483 y(Scop)r(e:)37 b Fm(lo)s(cal)498 +3582 y Fl(T)n(yp)r(e:)g Fm(required)498 3682 y Fl(Sp)r(eci\014ed)28 +b(as:)36 b(a)27 b(p)r(oin)n(ter)g(to)h(arra)n(y)d(of)i(rank)f(one)i(or) +e(t)n(w)n(o)h(con)n(taining)f(n)n(um)n(b)r(ers)h(of)498 +3781 y(t)n(yp)r(e)h(sp)r(eci\014ed)g(in)g(T)-7 b(able)27 +b(2.3.)36 b(The)28 b(rank)e(of)i Fh(x)g Fl(m)n(ust)g(b)r(e)g(the)g +(same)f(of)g Fh(y)s Fl(.)291 3957 y Fm(y)41 b Fl(the)28 +b(lo)r(cal)f(p)r(ortion)h(of)f(global)g(dense)g(matrix)g +Fh(Y)19 b Fl(.)498 4057 y(Scop)r(e:)37 b Fm(lo)s(cal)498 +4157 y Fl(T)n(yp)r(e:)g Fm(required)498 4256 y Fl(Sp)r(eci\014ed)28 +b(as:)36 b(a)27 b(p)r(oin)n(ter)g(to)h(arra)n(y)d(of)i(rank)f(one)i(or) +e(t)n(w)n(o)h(con)n(taining)f(n)n(um)n(b)r(ers)h(of)498 +4356 y(t)n(yp)r(e)h(sp)r(eci\014ed)g(in)g(T)-7 b(able)27 +b(2.3.)36 b(The)28 b(rank)e(of)i Fh(y)i Fl(m)n(ust)e(b)r(e)g(the)g +(same)f(of)g Fh(x)p Fl(.)291 4532 y Fm(decomp)p 618 4532 +29 4 v 33 w(data)42 b Fl(con)n(tains)27 b(data)g(structures)g(for)g +(comm)n(unications.)498 4631 y(Scop)r(e:)37 b Fm(lo)s(cal)498 +4731 y Fl(T)n(yp)r(e:)g Fm(required)498 4830 y Fl(Sp)r(eci\014ed)28 +b(as:)37 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g(Sec.)f(1.)291 5006 y Fm(On)k(Return)1681 5255 y Fl(13)p +eop +%%Page: 14 14 +14 13 bop 739 523 a Fm(res)41 b Fl(is)28 b(the)g(dot)f(pro)r(duct)h(of) +f(sub)n(v)n(ectors)f Fh(X)34 b Fl(and)27 b Fh(Y)19 b +Fl(.)946 623 y(Scop)r(e:)37 b Fm(global)946 722 y Fl(Sp)r(eci\014ed)31 +b(as:)40 b(a)29 b(n)n(um)n(b)r(er)h(or)e(a)h(rank-one)f(arra)n(y)g(of)h +(the)h(data)f(t)n(yp)r(e)h(indicated)g(in)946 822 y(T)-7 +b(able)28 b(2.2.)2130 5255 y(14)p eop +%%Page: 15 15 +15 14 bop 291 744 a Fe(F90)p 518 744 41 4 v 48 w(PSAMAX|In\014nit)l +(y-Norm)44 b(of)h(V)-11 b(ector)291 996 y Fl(This)27 +b(function)h(computes)g(the)g(in\014nit)n(y-norm)f(of)g(a)g(v)n(ector)g +Fh(X)7 b Fl(.)291 1096 y(If)28 b Fh(X)34 b Fl(is)27 b(double)h +(precision)e(real)h(v)n(ector)f(computes)i(in\014nit)n(y)g(norm)f(as:) +1409 1281 y Fh(amax)c Fg( )h Fl(max)1812 1333 y Fc(i)1915 +1281 y Fg(j)p Fh(x)1985 1293 y Fc(i)2013 1281 y Fg(j)291 +1493 y Fl(else)j(if)h Fh(X)34 b Fl(is)27 b(double)h(precision)f +(complex)g(v)n(ector)f(then)i(computes)g(in\014nit)n(y-norm)f(as:)1111 +1678 y Fh(amax)c Fg( )g Fl(max)1514 1730 y Fc(i)1617 +1678 y Fl(\()p Fg(j)p Fh(r)r(e)p Fl(\()p Fh(x)1829 1690 +y Fc(i)1858 1678 y Fl(\))p Fg(j)c Fl(+)f Fg(j)p Fh(im)p +Fl(\()p Fh(x)2219 1690 y Fc(i)2247 1678 y Fl(\))p Fg(j)p +Fl(\))291 1889 y(where:)291 2059 y Fh(X)47 b Fl(represen)n(ts)27 +b(the)g(global)g(sub)n(v)n(ector)f Fh(X)1627 2071 y Fd(:)p +Fc(;j)s(x)291 2340 y Fe(Syn)l(tax)1154 2522 y Fl(F90)p +1297 2522 25 4 v 29 w(PSAMAX)i(\()p Ff(x,de)l(c)l(omp)p +2077 2522 26 4 v 32 w(data)p Fl(\))1094 2722 y(F90)p +1237 2722 25 4 v 30 w(PSAMAX)g(\()p Ff(x,de)l(c)l(omp)p +2018 2722 26 4 v 32 w(data,)j(jx)p Fl(\))p 474 2947 2497 +4 v 524 3017 a Fh(amax)620 b(X)917 b Fm(F)-8 b(unction)p +474 3050 V 524 3120 a Fl(Long)27 b(Precision)f(Real)99 +b(Long)27 b(Precision)f(Real)257 b(F90)p 2481 3120 25 +4 v 29 w(PSAMAX)524 3220 y(Long)27 b(Precision)f(Real)99 +b(Long)27 b(Precision)f(Complex)99 b(F90)p 2481 3220 +V 29 w(PSZAMAX)p 474 3253 2497 4 v 1326 3485 a(T)-7 b(able)28 +b(2.4:)36 b(Data)27 b(t)n(yp)r(es)291 3768 y Fm(On)k(En)m(try)291 +3939 y(x)41 b Fl(the)h(lo)r(cal)e(p)r(ortion)g(of)g(global)g(dense)h +(matrix)f Fh(X)7 b Fl(.)75 b(This)41 b(function)g(computes)g(the)498 +4039 y(lo)r(cation)30 b(of)h(the)g(\014rst)g(elemen)n(t)g(of)f(lo)r +(cal)h(subarra)n(y)d(used,)k(based)e(on)h Fh(j)5 b(x)31 +b Fl(and)g(the)498 4138 y(\014eld)d Fh(matr)r(ix)p 945 +4138 25 4 v 30 w(data)g Fl(of)g Fh(decomp)p 1532 4138 +V 29 w(data)g Fl(.)498 4238 y(Scop)r(e:)37 b Fm(lo)s(cal)498 +4337 y Fl(T)n(yp)r(e:)g Fm(required)498 4437 y Fl(Sp)r(eci\014ed)25 +b(as:)34 b(a)24 b(rank)f(one)g(or)g(t)n(w)n(o)h(arra)n(y)d(con)n +(taining)i(n)n(um)n(b)r(ers)h(of)f(t)n(yp)r(e)i(sp)r(eci\014ed)498 +4537 y(in)j(T)-7 b(able)27 b(2.4.)291 4707 y Fm(decomp)p +618 4707 29 4 v 33 w(data)42 b Fl(con)n(tains)27 b(data)g(structures)g +(for)g(comm)n(unications.)498 4807 y(Scop)r(e:)37 b Fm(lo)s(cal)498 +4907 y Fl(T)n(yp)r(e:)g Fm(required)498 5006 y Fl(Sp)r(eci\014ed)28 +b(as:)37 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g Fg(x)f Fl(1.)1681 5255 y(15)p eop +%%Page: 16 16 +16 15 bop 739 523 a Fm(jx)42 b Fl(the)30 b(column)h(index)f(of)g +(global)f(dense)h(matrix)f Fh(X)7 b Fl(,)31 b(iden)n(tifying)f(the)g +(column)g(of)g(sub-)946 623 y(v)n(ector)d Fh(X)7 b Fl(.)946 +722 y(Scop)r(e:)37 b Fm(global)946 822 y Fl(T)n(yp)r(e:)g +Fm(optional)p Fl(;)27 b(can)g(only)g(b)r(e)h(presen)n(t)f(if)i +Fh(x)f Fl(is)f(of)h(rank)e(2.)946 922 y(Default:)38 b +Fh(j)5 b(x)24 b Fl(=)e(1)946 1021 y(Sp)r(eci\014ed)29 +b(as:)36 b(an)27 b(in)n(teger)g(v)-5 b(ariable)26 b Fh(j)5 +b(x)24 b Fg(\025)f Fl(1.)739 1187 y Fm(On)31 b(Return)739 +1353 y(F)-8 b(unction)32 b(v)-5 b(alue)41 b Fl(is)28 +b(the)g(in\014nit)n(y)g(norm)f(of)g(sub)n(v)n(ector)f +Fh(X)7 b Fl(.)946 1453 y(Scop)r(e:)37 b Fm(global)946 +1553 y Fl(Sp)r(eci\014ed)29 b(as:)36 b(a)27 b(n)n(um)n(b)r(er)g(of)h +(the)g(data)f(t)n(yp)r(e)h(indicated)g(in)f(T)-7 b(able)28 +b(2.4.)2130 5255 y(16)p eop +%%Page: 17 17 +17 16 bop 291 739 a Fe(F90)p 518 739 41 4 v 48 w(AMAX|Generalized)45 +b(In\014nit)l(y)g(Norm)291 987 y Fl(This)29 b(subroutine)h(computes)f +(a)h(series)e(of)i(in\014nit)n(y)g(norms)f(on)g(the)i(columns)e(of)h(a) +f(dense)291 1087 y(matrix)e Fh(X)7 b Fl(:)1320 1186 y +Fh(r)r(es)p Fl(\()p Fh(i)p Fl(\))24 b Fg( )f Fl(max)1719 +1241 y Fc(k)1828 1186 y Fg(j)p Fh(X)7 b Fl(\()p Fh(k)s(;)14 +b(i)p Fl(\))p Fg(j)291 1461 y Fe(Syn)l(tax)1100 1643 +y Fl(F90)p 1243 1643 25 4 v 29 w(AMAX)29 b(\()p Ff(r)l(es,)h(X,)g(de)l +(c)l(omp)p 2132 1643 26 4 v 31 w(data)p Fl(\))p 536 1866 +2374 4 v 586 1936 a Fh(r)r(es)711 b(X)917 b Fm(Subroutine)p +536 1969 V 586 2039 a Fl(Long)27 b(Precision)f(Real)99 +b(Long)27 b(Precision)f(Real)257 b(F90)p 2543 2039 25 +4 v 29 w(AMAX)586 2138 y(Long)27 b(Precision)f(Real)99 +b(Long)27 b(Precision)f(Complex)99 b(F90)p 2543 2138 +V 29 w(AMAX)p 536 2172 2374 4 v 1326 2404 a(T)-7 b(able)28 +b(2.5:)36 b(Data)27 b(t)n(yp)r(es)291 2681 y Fm(On)k(En)m(try)291 +2847 y(x)41 b Fl(the)28 b(lo)r(cal)f(p)r(ortion)h(of)f(global)g(dense)g +(matrix)g Fh(X)7 b Fl(.)498 2947 y(Scop)r(e:)37 b Fm(lo)s(cal)498 +3047 y Fl(T)n(yp)r(e:)g Fm(required)498 3146 y Fl(Sp)r(eci\014ed)25 +b(as:)34 b(a)24 b(rank)f(one)g(or)g(t)n(w)n(o)h(arra)n(y)d(con)n +(taining)i(n)n(um)n(b)r(ers)h(of)f(t)n(yp)r(e)i(sp)r(eci\014ed)498 +3246 y(in)j(T)-7 b(able)27 b(2.5.)291 3412 y Fm(decomp)p +618 3412 29 4 v 33 w(data)42 b Fl(con)n(tains)27 b(data)g(structures)g +(for)g(comm)n(unications.)498 3512 y(Scop)r(e:)37 b Fm(lo)s(cal)498 +3611 y Fl(T)n(yp)r(e:)g Fm(required)498 3711 y Fl(Sp)r(eci\014ed)28 +b(as:)37 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g(Sec.)f(1.)291 3877 y Fm(On)k(Return)291 4043 y(res)41 +b Fl(is)27 b(the)h(in\014nit)n(y)g(norm)f(of)h(the)g(columns)f(of)h +Fh(X)7 b Fl(.)498 4142 y(Scop)r(e:)37 b Fm(global)498 +4242 y Fl(Sp)r(eci\014ed)30 b(as:)41 b(a)29 b(n)n(um)n(b)r(er)g(or)g(a) +g(rank-one)f(arra)n(y)f(of)j(the)g(data)f(t)n(yp)r(e)h(indicated)g(in) +498 4342 y(T)-7 b(able)27 b(2.4.)1681 5255 y(17)p eop +%%Page: 18 18 +18 17 bop 739 749 a Fe(F90)p 966 749 41 4 v 48 w(PSASUM|1-Norm)45 +b(of)g(V)-11 b(ector)739 1007 y Fl(This)27 b(function)i(computes)e(the) +h(1-norm)e(of)i(a)f(v)n(ector)f Fh(X)7 b Fl(.)739 1106 +y(If)28 b Fh(X)34 b Fl(is)27 b(double)h(precision)f(real)f(v)n(ector)g +(computes)i(1-norm)e(as:)1926 1294 y Fh(asum)c Fg( )h(k)p +Fh(x)2347 1306 y Fc(i)2375 1294 y Fg(k)739 1482 y Fl(else)k(if)h +Fh(X)34 b Fl(ic)28 b(double)f(precision)g(complex)g(v)n(ector)f(then)j +(computes)e(1-norm)f(as:)1604 1669 y Fh(asum)c Fg( )h(k)p +Fh(r)r(e)p Fl(\()p Fh(X)7 b Fl(\))p Fg(k)2238 1681 y +Fd(1)2293 1669 y Fl(+)18 b Fg(k)p Fh(im)p Fl(\()p Fh(X)7 +b Fl(\))p Fg(k)2702 1681 y Fd(1)739 1857 y Fl(where:)739 +2031 y Fh(X)48 b Fl(represen)n(ts)26 b(the)i(global)e(sub)n(v)n(ector)g +Fh(X)2075 2043 y Fd(:)p Fc(;j)s(x)739 2318 y Fe(Syn)l(tax)1596 +2502 y Fl(F90)p 1739 2502 25 4 v 29 w(PSASUM)i(\()p Ff(x,)i(de)l(c)l +(omp)p 2533 2502 26 4 v 31 w(data)p Fl(\))1536 2702 y(F90)p +1679 2702 25 4 v 29 w(PSASUM)e(\()p Ff(x,)i(de)l(c)l(omp)p +2473 2702 26 4 v 32 w(data,)h(jx)p Fl(\))p 1370 2930 +1603 4 v 1420 3000 a Fh(dot)p Fl(,)d Fh(X)7 b Fl(,)27 +b Fh(Y)648 b Fm(F)-8 b(unction)p 1370 3033 V 1420 3103 +a Fl(Long)27 b(Precision)f(Real)257 b(F90)p 2549 3103 +25 4 v 29 w(PSASUM)1420 3202 y(Long)27 b(Precision)f(Complex)99 +b(F90)p 2549 3202 V 29 w(PSASUM)p 1370 3235 1603 4 v +1775 3467 a(T)-7 b(able)27 b(2.6:)36 b(Data)27 b(t)n(yp)r(es)739 +3758 y Fm(On)k(En)m(try)739 3934 y(x)42 b Fl(the)f(lo)r(cal)f(p)r +(ortion)g(of)h(global)e(dense)i(matrix)f Fh(X)7 b Fl(.)75 +b(This)41 b(function)g(computes)g(the)946 4033 y(lo)r(cation)31 +b(of)f(the)i(\014rst)e(elemen)n(t)h(of)g(lo)r(cal)f(subarra)n(y)e +(used,)k(based)e(on)h Fh(j)5 b(x)31 b Fl(and)g(the)946 +4133 y(\014eld)d Fh(matr)r(ix)p 1393 4133 25 4 v 31 w(data)f +Fl(of)h Fh(decomp)p 1980 4133 V 30 w(data)f Fl(.)946 +4232 y(Scop)r(e:)37 b Fm(lo)s(cal)946 4332 y Fl(T)n(yp)r(e:)g +Fm(required)946 4432 y Fl(Sp)r(eci\014ed)25 b(as:)34 +b(a)24 b(rank)f(one)h(or)f(t)n(w)n(o)g(arra)n(y)e(con)n(taining)i(n)n +(um)n(b)r(ers)h(of)g(t)n(yp)r(e)g(sp)r(eci\014ed)946 +4531 y(in)k(T)-7 b(able)28 b(2.6.)739 4707 y Fm(decomp)p +1066 4707 29 4 v 33 w(data)42 b Fl(con)n(tains)27 b(data)g(structures)g +(for)g(comm)n(unications.)946 4807 y(Scop)r(e:)37 b Fm(lo)s(cal)946 +4907 y Fl(T)n(yp)r(e:)g Fm(required)946 5006 y Fl(Sp)r(eci\014ed)29 +b(as:)36 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g Fg(x)f Fl(1.)2130 5255 y(18)p eop +%%Page: 19 19 +19 18 bop 291 523 a Fm(jx)42 b Fl(the)30 b(column)g(index)g(of)h +(global)d(dense)i(matrix)g Fh(X)7 b Fl(,)30 b(iden)n(tifying)h(the)f +(column)g(of)g(sub-)498 623 y(v)n(ector)c Fh(X)7 b Fl(.)498 +722 y(Scop)r(e:)37 b Fm(global)498 822 y Fl(T)n(yp)r(e:)g +Fm(optional)p Fl(;)27 b(can)g(only)g(b)r(e)h(presen)n(t)f(if)h +Fh(x)g Fl(is)g(of)f(rank)g(2.)498 922 y(Default:)38 b +Fh(j)5 b(x)23 b Fl(=)g(1)498 1021 y(Sp)r(eci\014ed)28 +b(as:)37 b(an)27 b(in)n(teger)g(v)-5 b(ariable)26 b Fh(j)5 +b(x)24 b Fg(\025)e Fl(1.)291 1187 y Fm(On)31 b(Return)291 +1353 y(F)-8 b(unction)31 b(v)-5 b(alue)42 b Fl(is)27 +b(the)h(1-norm)e(of)i(sub)n(v)n(ector)e Fh(X)7 b Fl(.)498 +1453 y(Scop)r(e:)37 b Fm(global)498 1553 y Fl(Sp)r(eci\014ed)28 +b(as:)37 b(a)27 b(n)n(um)n(b)r(er)g(of)h(the)g(data)f(t)n(yp)r(e)h +(indicated)f(in)h(T)-7 b(able)27 b(2.6.)1681 5255 y(19)p +eop +%%Page: 20 20 +20 19 bop 739 749 a Fe(F90)p 966 749 41 4 v 48 w(PSNRM2|2-Norm)46 +b(of)f(V)-11 b(ector)739 1007 y Fl(This)27 b(function)i(computes)e(the) +h(2-norm)e(of)i(a)f(v)n(ector)f Fh(X)7 b Fl(.)739 1107 +y(If)28 b Fh(X)34 b Fl(is)27 b(double)h(precision)f(real)f(v)n(ector)g +(computes)i(2-norm)e(as:)1868 1303 y Fh(nr)r(m)p Fl(2)d +Fg( )2202 1226 y(p)p 2271 1226 203 4 v 77 x Fh(X)2347 +1279 y Fc(T)2398 1303 y Fh(X)739 1491 y Fl(else)k(if)h +Fh(X)34 b Fl(is)28 b(double)f(precision)g(complex)g(v)n(ector)f(then)i +(computes)g(2-norm)e(as:)1863 1688 y Fh(nr)r(m)p Fl(2)d +Fg( )2196 1610 y(p)p 2265 1610 214 4 v 78 x Fh(X)2341 +1664 y Fc(H)2404 1688 y Fh(X)739 1876 y Fl(where:)739 +2049 y Fh(X)48 b Fl(represen)n(ts)26 b(the)i(global)e(sub)n(v)n(ector)g +Fh(X)2075 2061 y Fd(:)p Fc(;j)s(x)p 1373 2252 1597 4 +v 1423 2322 a Fh(nr)r(m)p Fl(2,)h Fh(X)663 b Fm(F)-8 +b(unction)p 1373 2355 V 1423 2425 a Fl(Long)26 b(Precision)g(Real)258 +b(F90)p 2552 2425 25 4 v 29 w(PSNRM2)1423 2525 y(Long)26 +b(Precision)g(Complex)100 b(F90)p 2552 2525 V 29 w(PSNRM2)p +1373 2558 1597 4 v 1775 2790 a(T)-7 b(able)27 b(2.7:)36 +b(Data)27 b(t)n(yp)r(es)739 3172 y Fe(Syn)l(tax)1598 +3356 y Fl(F90)p 1741 3356 25 4 v 30 w(PSNRM2)g(\()p Ff(x,)j(de)l(c)l +(omp)p 2530 3356 26 4 v 31 w(data)p Fl(\))1539 3556 y(F90)p +1682 3556 25 4 v 29 w(PSNRM2)d(\()p Ff(x,)j(de)l(c)l(omp)p +2470 3556 26 4 v 32 w(data,)h(jx)p Fl(\))739 3757 y Fm(On)g(En)m(try) +739 3934 y(x)42 b Fl(the)f(lo)r(cal)f(p)r(ortion)g(of)h(global)e(dense) +i(matrix)f Fh(X)7 b Fl(.)75 b(This)41 b(function)g(computes)g(the)946 +4033 y(lo)r(cation)31 b(of)f(the)i(\014rst)e(elemen)n(t)h(of)g(lo)r +(cal)f(subarra)n(y)e(used,)k(based)e(on)h Fh(j)5 b(x)31 +b Fl(and)g(the)946 4133 y(\014eld)d Fh(matr)r(ix)p 1393 +4133 25 4 v 31 w(data)f Fl(of)h Fh(decomp)p 1980 4133 +V 30 w(data)f Fl(.)946 4232 y(Scop)r(e:)37 b Fm(lo)s(cal)946 +4332 y Fl(T)n(yp)r(e:)g Fm(required)946 4432 y Fl(Sp)r(eci\014ed)25 +b(as:)34 b(a)24 b(rank)f(one)h(or)f(t)n(w)n(o)g(arra)n(y)e(con)n +(taining)i(n)n(um)n(b)r(ers)h(of)g(t)n(yp)r(e)g(sp)r(eci\014ed)946 +4531 y(in)k(T)-7 b(able)28 b(2.7.)739 4707 y Fm(decomp)p +1066 4707 29 4 v 33 w(data)42 b Fl(con)n(tains)27 b(data)g(structures)g +(for)g(comm)n(unications.)946 4807 y(Scop)r(e:)37 b Fm(lo)s(cal)946 +4907 y Fl(T)n(yp)r(e:)g Fm(required)946 5006 y Fl(Sp)r(eci\014ed)29 +b(as:)36 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g Fg(x)f Fl(1.)2130 5255 y(20)p eop +%%Page: 21 21 +21 20 bop 291 523 a Fm(jx)42 b Fl(the)30 b(column)g(index)g(of)h +(global)d(dense)i(matrix)g Fh(X)7 b Fl(,)30 b(iden)n(tifying)h(the)f +(column)g(of)g(sub-)498 623 y(v)n(ector)c Fh(X)7 b Fl(.)498 +722 y(Scop)r(e:)37 b Fm(global)498 822 y Fl(T)n(yp)r(e:)g +Fm(optional)p Fl(;)27 b(can)g(only)g(b)r(e)h(presen)n(t)f(if)h +Fh(x)g Fl(is)g(of)f(rank)g(2.)498 922 y(Default:)38 b +Fh(j)5 b(x)23 b Fl(=)g(1)498 1021 y(Sp)r(eci\014ed)28 +b(as:)37 b(an)27 b(in)n(teger)g(v)-5 b(ariable)26 b Fh(j)5 +b(x)24 b Fg(\025)e Fl(1.)291 1187 y Fm(On)31 b(Return)291 +1353 y(F)-8 b(unction)31 b(V)-8 b(alue)42 b Fl(is)27 +b(the)h(2-norm)f(of)g(sub)n(v)n(ector)f Fh(X)7 b Fl(.)498 +1453 y(Scop)r(e:)37 b Fm(global)498 1553 y Fl(T)n(yp)r(e:)g +Fm(required)498 1652 y Fl(Sp)r(eci\014ed)28 b(as:)37 +b(a)27 b(n)n(um)n(b)r(er)g(of)h(the)g(data)f(t)n(yp)r(e)h(indicated)f +(in)h(T)-7 b(able)27 b(2.7.)1681 5255 y(21)p eop +%%Page: 22 22 +22 21 bop 739 739 a Fe(F90)p 966 739 41 4 v 48 w(PSNRMI|In\014nit)l(y) +46 b(Norm)f(of)g(Sparse)g(Matrix)739 987 y Fl(This)27 +b(function)i(computes)e(the)h(in\014nit)n(y-norm)f(of)h(a)f(matrix)g +Fh(A)p Fl(:)1903 1269 y Fh(nr)r(mi)c Fg( )g(k)p Fh(A)p +Fg(k)2369 1281 y Fb(1)739 1452 y Fl(where:)739 1618 y +Fh(A)42 b Fl(represen)n(ts)26 b(the)i(global)e(matrix)h +Fh(A)p 1379 1804 1585 4 v 1428 1873 a(nr)r(mi)p Fl(,)h +Fh(A)682 b Fm(F)-8 b(unction)p 1379 1907 V 1428 1976 +a Fl(Long)27 b(Precision)f(Real)257 b(F90)p 2557 1976 +25 4 v 29 w(PSNRMI)1428 2076 y(Long)27 b(Precision)f(Complex)99 +b(F90)p 2557 2076 V 29 w(PSNRMI)p 1379 2109 1585 4 v +1775 2341 a(T)-7 b(able)27 b(2.8:)36 b(Data)27 b(t)n(yp)r(es)739 +2715 y Fe(Syn)l(tax)1593 2897 y Fl(F90)p 1736 2897 25 +4 v 29 w(PSNRMI)g(\()p Ff(A,)k(de)l(c)l(omp)p 2536 2897 +26 4 v 31 w(data)p Fl(\))739 3096 y Fm(On)g(En)m(try)739 +3262 y(a)42 b Fl(the)28 b(lo)r(cal)f(p)r(ortion)g(of)g(the)h(global)f +(sparse)f(matrix)h Fh(A)p Fl(.)946 3362 y(Scop)r(e:)37 +b Fm(lo)s(cal)946 3462 y Fl(T)n(yp)r(e:)g Fm(required)946 +3561 y Fl(Sp)r(eci\014ed)29 b(as:)36 b(a)27 b(structured)g(data)g(t)n +(yp)r(e)h(sp)r(eci\014ed)g(in)g Fg(x)f Fl(1.)739 3727 +y Fm(decomp)p 1066 3727 29 4 v 33 w(data)42 b Fl(con)n(tains)27 +b(data)g(structures)g(for)g(comm)n(unications.)946 3827 +y(Scop)r(e:)37 b Fm(lo)s(cal)946 3927 y Fl(T)n(yp)r(e:)g +Fm(required)946 4026 y Fl(Sp)r(eci\014ed)29 b(as:)36 +b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in)g +Fg(x)f Fl(1.)739 4192 y Fm(On)k(Return)739 4358 y(F)-8 +b(unction)32 b(v)-5 b(alue)41 b Fl(is)28 b(the)g(in\014nit)n(y-norm)f +(of)g(sparse)f(submatrix)h Fh(A)p Fl(.)946 4458 y(Scop)r(e:)37 +b Fm(global)946 4558 y Fl(Sp)r(eci\014ed)29 b(as:)36 +b(a)27 b(n)n(um)n(b)r(er)g(of)h(the)g(data)f(t)n(yp)r(e)h(indicated)g +(in)f(T)-7 b(able)28 b(2.8.)2130 5255 y(22)p eop +%%Page: 23 23 +23 22 bop 291 734 a Fe(F90)p 518 734 41 4 v 48 w(PSSPMM|Sparse)43 +b(Matrix)i(b)l(y)f(Dense)h(Matrix)291 883 y(Pro)t(duct)291 +1127 y Fl(This)27 b(subroutine)g(computes)h(the)g(Sparse)e(Matrix)h(b)n +(y)h(Dense)f(Matrix)g(Pro)r(duct:)1331 1326 y Fh(Y)42 +b Fg( )23 b Fh(\013P)1633 1338 y Fc(r)1670 1326 y Fh(AP)1785 +1338 y Fc(c)1820 1326 y Fh(X)h Fl(+)18 b Fh(\014)t(Y)889 +b Fl(\(2.1\))1305 1472 y Fh(Y)42 b Fg( )23 b Fh(\013P)1607 +1484 y Fc(r)1644 1472 y Fh(A)1706 1437 y Fc(T)1759 1472 +y Fh(P)1812 1484 y Fc(c)1846 1472 y Fh(X)i Fl(+)18 b +Fh(\014)t(Y)862 b Fl(\(2.2\))1300 1617 y Fh(Y)41 b Fg( )23 +b Fh(\013P)1601 1629 y Fc(r)1639 1617 y Fh(A)1701 1583 +y Fc(H)1764 1617 y Fh(P)1817 1629 y Fc(c)1851 1617 y +Fh(X)i Fl(+)18 b Fh(\014)t(Y)857 b Fl(\(2.3\))415 1763 +y(where:)291 1924 y Fh(X)47 b Fl(is)28 b(the)g(global)e(dense)i +(submatrix)f Fh(X)1569 1936 y Fd(:)p Fc(;j)s(x)p Fd(:)p +Fc(j)s(x)p Fd(+)p Fc(k)q Fb(\000)p Fd(1)291 2088 y Fh(Y)60 +b Fl(is)27 b(the)h(global)f(dense)g(submatrix)g Fh(Y)1539 +2100 y Fd(:)p Fc(;j)s(y)r Fd(:)p Fc(j)s(y)r Fd(+)p Fc(k)q +Fb(\000)p Fd(1)291 2251 y Fh(A)41 b Fl(is)28 b(the)g(global)e(sparse)g +(submatrix)i Fh(A)291 2415 y(P)344 2427 y Fc(r)381 2415 +y Fh(;)14 b(P)471 2427 y Fc(c)546 2415 y Fl(are)27 b(the)h(p)r(erm)n +(utation)f(matrices.)p 918 2593 1611 4 v 967 2663 a Fh(A)p +Fl(,)h Fh(X)7 b Fl(,)28 b Fh(Y)18 b Fl(,)28 b Fh(\013)p +Fl(,)g Fh(\014)478 b Fm(Subroutine)p 918 2696 V 967 2766 +a Fl(Long)27 b(Precision)f(Real)257 b(F90)p 2096 2766 +25 4 v 29 w(PSSPMM)967 2865 y(Long)27 b(Precision)f(Complex)99 +b(F90)p 2096 2865 V 29 w(PSSPMM)p 918 2898 1611 4 v 1326 +3130 a(T)-7 b(able)28 b(2.9:)36 b(Data)27 b(t)n(yp)r(es)291 +3501 y Fe(Syn)l(tax)698 3683 y Fl(CALL)h(F90)p 1095 3683 +25 4 v 29 w(PSSPMM)f(\()p Ff(alpha,)32 b(a,)f(x,)f(b)l(eta,)g(y,)g(de)l +(c)l(omp)p 2533 3683 26 4 v 32 w(data)p Fl(\))315 3882 +y(CALL)e(F90)p 712 3882 25 4 v 29 w(PSSPMM)f(\()p Ff(alpha,)32 +b(a,)f(x,)f(b)l(eta,)g(y,de)l(c)l(omp)p 2120 3882 26 +4 v 32 w(data,)h(tr)l(ans,)f(k,)g(jx,)g(jy,)h(work)p +Fl(\))291 4081 y Fm(On)g(En)m(try)291 4245 y(alpha)42 +b Fl(the)27 b(scalar)f Fh(\013)p Fl(.)498 4345 y(Scop)r(e:)37 +b Fm(global)498 4444 y Fl(T)n(yp)r(e:)g Fm(required)498 +4544 y Fl(Sp)r(eci\014ed)28 b(as:)37 b(a)27 b(n)n(um)n(b)r(er)g(of)h +(the)g(data)f(t)n(yp)r(e)h(indicated)f(in)h(T)-7 b(able)27 +b(2.9.)291 4707 y Fm(a)41 b Fl(the)28 b(lo)r(cal)g(p)r(ortion)f(of)g +(the)h(sparse)e(matrix)h Fh(A)p Fl(.)498 4807 y(Scop)r(e:)37 +b Fm(lo)s(cal)498 4907 y Fl(T)n(yp)r(e:)g Fm(required)498 +5006 y Fl(Sp)r(eci\014ed)28 b(as:)37 b(a)27 b(structured)g(data)g(t)n +(yp)r(e)h(sp)r(eci\014ed)g(in)g Fg(x)f Fl(1.)1681 5255 +y(23)p eop +%%Page: 24 24 +24 23 bop 739 523 a Fm(x)42 b Fl(the)34 b(lo)r(cal)g(p)r(ortion)f(of)h +(global)f(dense)h(matrix)g Fh(X)7 b Fl(.)55 b(This)34 +b(subroutine)g(computes)g(the)946 623 y(lo)r(cation)d(of)f(the)i +(\014rst)e(elemen)n(t)h(of)g(lo)r(cal)f(subarra)n(y)e(used,)k(based)e +(on)h Fh(j)5 b(x)31 b Fl(and)g(the)946 722 y(\014eld)d +Fh(matr)r(ix)p 1393 722 25 4 v 31 w(data)f Fl(of)h Fh(decomp)p +1980 722 V 30 w(data)f Fl(.)946 822 y(Scop)r(e:)37 b +Fm(lo)s(cal)946 922 y Fl(T)n(yp)r(e:)g Fm(required)946 +1021 y Fl(Sp)r(eci\014ed)25 b(as:)34 b(a)24 b(rank)f(one)h(or)f(t)n(w)n +(o)g(arra)n(y)e(con)n(taining)i(n)n(um)n(b)r(ers)h(of)g(t)n(yp)r(e)g +(sp)r(eci\014ed)946 1121 y(in)k(T)-7 b(able)28 b(2.9.)36 +b(The)27 b(rank)g(of)h Fh(x)g Fl(m)n(ust)f(b)r(e)h(the)g(same)f(of)h +Fh(y)s Fl(.)739 1277 y Fm(b)s(eta)41 b Fl(the)28 b(scalar)e +Fh(\014)t Fl(.)946 1377 y(Scop)r(e:)37 b Fm(global)946 +1476 y Fl(T)n(yp)r(e:)g Fm(required)946 1576 y Fl(Sp)r(eci\014ed)29 +b(as:)36 b(a)27 b(n)n(um)n(b)r(er)g(of)h(the)g(data)f(t)n(yp)r(e)h +(indicated)g(in)f(T)-7 b(able)28 b(2.9.)739 1732 y Fm(y)42 +b Fl(the)35 b(lo)r(cal)f(p)r(ortion)g(of)h(global)f(dense)g(matrix)g +Fh(Y)19 b Fl(.)58 b(This)35 b(subroutine)f(computes)h(the)946 +1831 y(lo)r(cation)c(of)g(the)g(\014rst)g(elemen)n(t)g(of)h(lo)r(cal)e +(subarra)n(y)f(used,)j(based)e(on)h Fh(j)5 b(y)34 b Fl(and)d(the)946 +1931 y(\014eld)d Fh(matr)r(ix)p 1393 1931 V 31 w(data)f +Fl(of)h Fh(decomp)p 1980 1931 V 30 w(data)f Fl(.)946 +2031 y(Scop)r(e:)37 b Fm(lo)s(cal)946 2130 y Fl(T)n(yp)r(e:)g +Fm(required)946 2230 y Fl(Sp)r(eci\014ed)25 b(as:)34 +b(a)24 b(rank)f(one)h(or)f(t)n(w)n(o)g(arra)n(y)e(con)n(taining)i(n)n +(um)n(b)r(ers)h(of)g(t)n(yp)r(e)g(sp)r(eci\014ed)946 +2330 y(in)k(T)-7 b(able)28 b(2.9.)36 b(The)27 b(rank)g(of)h +Fh(y)i Fl(m)n(ust)e(b)r(e)g(the)g(same)f(of)g Fh(x)p +Fl(.)739 2486 y Fm(decomp)p 1066 2486 29 4 v 33 w(data)42 +b Fl(con)n(tains)27 b(data)g(structures)g(for)g(comm)n(unications.)946 +2585 y(Scop)r(e:)37 b Fm(lo)s(cal)946 2685 y Fl(T)n(yp)r(e:)g +Fm(required)946 2785 y Fl(Sp)r(eci\014ed)29 b(as:)36 +b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in)g +Fg(x)f Fl(1.)739 2941 y Fm(trans)42 b Fl(indicate)28 +b(what)f(kind)h(of)g(op)r(eration)e(to)i(p)r(erform.)946 +3097 y Fm(trans)33 b(=)f(N)41 b Fl(the)28 b(op)r(eration)f(is)g(sp)r +(eci\014ed)h(b)n(y)f(equation)g(2.1)946 3220 y Fm(trans)33 +b(=)f(T)42 b Fl(the)28 b(op)r(eration)e(is)i(sp)r(eci\014ed)g(b)n(y)f +(equation)g(2.2)946 3342 y Fm(trans)33 b(=)f(C)41 b Fl(the)28 +b(op)r(eration)f(is)g(sp)r(eci\014ed)h(b)n(y)g(equation)f(2.3)946 +3499 y(Scop)r(e:)37 b Fm(global)946 3598 y Fl(T)n(yp)r(e:)g +Fm(optional)946 3698 y Fl(Default:)h Fh(tr)r(ans)24 b +Fl(=)e Fh(N)946 3797 y Fl(Sp)r(eci\014ed)29 b(as:)36 +b(a)27 b(c)n(haracter)f(v)-5 b(ariable.)739 3954 y Fm(k)42 +b Fl(n)n(um)n(b)r(er)27 b(of)h(columns)f(in)h(dense)f(submatrices)g +Fh(X)34 b Fl(and)28 b Fh(Y)18 b Fl(.)946 4053 y(Scop)r(e:)37 +b Fm(global)946 4153 y Fl(T)n(yp)r(e:)g Fm(optional)946 +4252 y Fl(Default:)h Fj(min\(size\(x,2\)-jx)o(+1,)o(si)o(ze\()o(y,)o +(2\))o(-jy)o(+1)o(\))946 4352 y Fl(Sp)r(eci\014ed)29 +b(as:)36 b(an)27 b(in)n(teger)g(v)-5 b(ariable)26 b Fh(k)g +Fg(\025)d Fl(1.)739 4508 y Fm(jx)42 b Fl(the)30 b(column)h(index)f(of)g +(global)f(dense)h(matrix)f Fh(X)7 b Fl(,)31 b(iden)n(tifying)f(the)g +(column)g(of)g(sub-)946 4608 y(v)n(ector)d Fh(X)7 b Fl(.)946 +4707 y(Scop)r(e:)37 b Fm(global)946 4807 y Fl(T)n(yp)r(e:)g +Fm(optional)p Fl(;)27 b(can)g(only)g(b)r(e)h(presen)n(t)f(if)i +Fh(x)f Fl(is)f(of)h(rank)e(2.)946 4907 y(Default:)38 +b Fh(iy)25 b Fl(=)e(1)946 5006 y(Sp)r(eci\014ed)29 b(as:)36 +b(an)27 b(in)n(teger)g(v)-5 b(ariable)26 b Fh(j)5 b(x)24 +b Fg(\025)f Fl(1.)2130 5255 y(24)p eop +%%Page: 25 25 +25 24 bop 291 523 a Fm(jy)42 b Fl(the)31 b(column)g(index)g(of)f +(global)g(dense)h(matrix)f Fh(Y)19 b Fl(,)31 b(iden)n(tifying)g(the)h +(column)e(of)h(sub-)498 623 y(v)n(ector)26 b Fh(Y)19 +b Fl(.)498 722 y(Scop)r(e:)37 b Fm(global)498 822 y Fl(T)n(yp)r(e:)g +Fm(optional)p Fl(;)27 b(can)g(only)g(b)r(e)h(presen)n(t)f(if)h +Fh(y)j Fl(is)c(of)h(rank)e(2.)498 922 y(Default:)38 b +Fh(j)5 b(y)25 b Fl(=)e(1)498 1021 y(Sp)r(eci\014ed)28 +b(as:)37 b(an)27 b(in)n(teger)g(v)-5 b(ariable)26 b Fh(j)5 +b(y)26 b Fg(\025)c Fl(1.)291 1187 y Fm(w)m(ork)42 b Fl(the)28 +b(w)n(ork)e(arra)n(y)-7 b(.)498 1287 y(Scop)r(e:)37 b +Fm(lo)s(cal)498 1386 y Fl(Sp)r(eci\014ed)h(as:)55 b(a)37 +b(rank)g(one)f(arra)n(y)f(of)i(the)h(same)f(t)n(yp)r(e)g(of)g +Fh(X)44 b Fl(and)37 b Fh(Y)56 b Fl(with)38 b(the)498 +1486 y(POINTER)27 b(attribute.)291 1652 y Fm(On)k(Return)291 +1818 y(y)41 b Fl(the)28 b(lo)r(cal)f(p)r(ortion)h(of)f(result)g +(submatrix)h Fh(Y)18 b Fl(.)498 1918 y(Scop)r(e:)37 b +Fm(lo)s(cal)498 2017 y Fl(T)n(yp)r(e:)g Fm(required)498 +2117 y Fl(Sp)r(eci\014ed)28 b(as:)36 b(a)27 b(p)r(oin)n(ter)g(to)h +(arra)n(y)d(of)i(rank)f(one)i(or)e(t)n(w)n(o)h(con)n(taining)f(n)n(um)n +(b)r(ers)h(of)498 2217 y(t)n(yp)r(e)h(sp)r(eci\014ed)g(in)g(T)-7 +b(able)27 b(2.9.)1681 5255 y(25)p eop +%%Page: 26 26 +26 25 bop 739 736 a Fe(F90)p 966 736 41 4 v 48 w(PSSPSM|T)-11 +b(riangular)45 b(System)g(Solv)l(e)739 981 y Fl(This)27 +b(subroutine)h(computes)f(the)h(T)-7 b(riangular)26 b(System)h(Solv)n +(e:)1628 1259 y Fh(Y)101 b Fg( )83 b Fh(\013P)2049 1271 +y Fc(r)2087 1259 y Fh(T)2148 1224 y Fb(\000)p Fd(1)2236 +1259 y Fh(P)2289 1271 y Fc(c)2323 1259 y Fh(X)25 b Fl(+)18 +b Fh(\014)t(Y)1628 1383 y(Y)101 b Fg( )83 b Fh(\013D)r(P)2120 +1395 y Fc(r)2158 1383 y Fh(T)2219 1349 y Fb(\000)p Fd(1)2307 +1383 y Fh(P)2360 1395 y Fc(c)2394 1383 y Fh(X)25 b Fl(+)18 +b Fh(\014)t(Y)1628 1508 y(Y)101 b Fg( )83 b Fh(\013P)2049 +1520 y Fc(r)2087 1508 y Fh(T)2148 1473 y Fb(\000)p Fd(1)2236 +1508 y Fh(P)2289 1520 y Fc(c)2323 1508 y Fh(D)r(X)25 +b Fl(+)18 b Fh(\014)t(Y)1628 1632 y(Y)101 b Fg( )83 b +Fh(\013P)2049 1644 y Fc(r)2087 1632 y Fh(T)2148 1598 +y Fb(\000)p Fc(T)2251 1632 y Fh(P)2304 1644 y Fc(c)2338 +1632 y Fh(X)25 b Fl(+)18 b Fh(\014)t(Y)1628 1757 y(Y)101 +b Fg( )83 b Fh(\013D)r(P)2120 1769 y Fc(r)2158 1757 y +Fh(T)2219 1723 y Fb(\000)p Fc(T)2322 1757 y Fh(P)2375 +1769 y Fc(c)2409 1757 y Fh(X)25 b Fl(+)18 b Fh(\014)t(Y)1628 +1881 y(Y)101 b Fg( )83 b Fh(\013P)2049 1893 y Fc(r)2087 +1881 y Fh(T)2148 1847 y Fb(\000)p Fc(T)2251 1881 y Fh(P)2304 +1893 y Fc(c)2338 1881 y Fh(D)r(X)25 b Fl(+)18 b Fh(\014)t(Y)1628 +2006 y(Y)101 b Fg( )83 b Fh(\013P)2049 2018 y Fc(r)2087 +2006 y Fh(T)2148 1972 y Fb(\000)p Fc(H)2262 2006 y Fh(P)2315 +2018 y Fc(c)2349 2006 y Fh(X)24 b Fl(+)19 b Fh(\014)t(Y)1628 +2130 y(Y)101 b Fg( )83 b Fh(\013D)r(P)2120 2142 y Fc(r)2158 +2130 y Fh(T)2219 2096 y Fb(\000)p Fc(H)2333 2130 y Fh(P)2386 +2142 y Fc(c)2420 2130 y Fh(X)25 b Fl(+)18 b Fh(\014)t(Y)1628 +2255 y(Y)101 b Fg( )83 b Fh(\013P)2049 2267 y Fc(r)2087 +2255 y Fh(T)2148 2221 y Fb(\000)p Fc(H)2262 2255 y Fh(P)2315 +2267 y Fc(c)2349 2255 y Fh(D)r(X)25 b Fl(+)18 b Fh(\014)t(Y)863 +2558 y Fl(where:)739 2721 y Fh(X)48 b Fl(is)27 b(the)h(global)f(dense)g +(submatrix)g Fh(X)2017 2733 y Fd(:)p Fc(;j)s(x)p Fd(:)p +Fc(j)s(x)p Fd(+)p Fc(n)p Fb(\000)p Fd(1)739 2885 y Fh(Y)60 +b Fl(is)28 b(the)g(global)e(dense)h(submatrix)g Fh(Y)1987 +2897 y Fd(:)p Fc(;j)s(y)r Fd(:)p Fc(j)s(y)r Fd(+)p Fc(n)p +Fb(\000)p Fd(1)739 3050 y Fh(T)52 b Fl(is)28 b(the)g(global)e(sparse)h +(blo)r(c)n(k)g(triangular)f(submatrix)h Fh(T)739 3214 +y(D)43 b Fl(is)28 b(the)g(scaling)f(diagonal)f(matrix.)739 +3378 y Fh(P)792 3390 y Fc(r)829 3378 y Fh(;)14 b(P)919 +3390 y Fc(c)994 3378 y Fl(are)27 b(the)h(p)r(erm)n(utation)f(matrices.) +739 3652 y Fe(Syn)l(tax)1169 3834 y Fl(CALL)h(F90)p 1566 +3834 25 4 v 29 w(PSSPSM)f(\()p Ff(alpha,)32 b(t,)e(x,)g(b)l(eta,)g(y,)g +(de)l(c)l(omp)p 2959 3834 26 4 v 32 w(data)p Fl(\))739 +4033 y(CALL)e(F90)p 1136 4033 25 4 v 29 w(PSSPSM)891 +4133 y(\()p Ff(alpha,)33 b(t,)d(x,)f(b)l(eta,)i(y,)f(de)l(c)l(omp)p +1906 4133 26 4 v 32 w(data,)h(tr)l(ans,)e(unit,)h(choic)l(e,)h(diag,)h +(n,)e(jx,)g(jy,)h(work)p Fl(\))p 1381 4355 1581 4 v 1431 +4424 a Fh(T)12 b Fl(,)27 b Fh(X)7 b Fl(,)27 b Fh(Y)18 +b Fl(,)28 b Fh(D)r Fl(,)g Fh(\013)p Fl(,)g Fh(\014)359 +b Fm(Subroutine)p 1381 4458 V 1431 4527 a Fl(Long)26 +b(Precision)g(Real)258 b(F90)p 2560 4527 25 4 v 29 w(PSSPSM)1431 +4627 y(Long)26 b(Precision)g(Complex)100 b(F90)p 2560 +4627 V 29 w(PSSPSM)p 1381 4660 1581 4 v 1754 4892 a(T)-7 +b(able)27 b(2.10:)36 b(Data)27 b(t)n(yp)r(es)2130 5255 +y(26)p eop +%%Page: 27 27 +27 26 bop 291 523 a Fm(On)31 b(En)m(try)291 692 y(alpha)42 +b Fl(the)27 b(scalar)f Fh(\013)p Fl(.)498 792 y(Scop)r(e:)37 +b Fm(global)498 892 y Fl(T)n(yp)r(e:)g Fm(required)498 +991 y Fl(Sp)r(eci\014ed)28 b(as:)37 b(a)27 b(n)n(um)n(b)r(er)g(of)h +(the)g(data)f(t)n(yp)r(e)h(indicated)f(in)h(T)-7 b(able)27 +b(2.10.)291 1161 y Fm(t)41 b Fl(the)28 b(global)f(p)r(ortion)g(of)g +(the)h(sparse)f(matrix)g Fh(T)12 b Fl(.)498 1260 y(Scop)r(e:)37 +b Fm(lo)s(cal)498 1360 y Fl(T)n(yp)r(e:)g Fm(required)498 +1460 y Fl(Sp)r(eci\014ed)28 b(as:)37 b(a)27 b(structured)g(data)g(t)n +(yp)r(e)h(sp)r(eci\014ed)g(in)g Fg(x)f Fl(1.)291 1629 +y Fm(x)41 b Fl(the)35 b(lo)r(cal)e(p)r(ortion)h(of)g(global)f(dense)h +(matrix)f Fh(X)7 b Fl(.)56 b(This)34 b(subroutine)g(computes)g(the)498 +1729 y(lo)r(cation)c(of)h(the)g(\014rst)g(elemen)n(t)g(of)f(lo)r(cal)h +(subarra)n(y)d(used,)k(based)e(on)h Fh(j)5 b(x)31 b Fl(and)g(the)498 +1828 y(\014eld)d Fh(matr)r(ix)p 945 1828 25 4 v 30 w(data)g +Fl(of)g Fh(decomp)p 1532 1828 V 29 w(data)g Fl(.)498 +1928 y(Scop)r(e:)37 b Fm(lo)s(cal)498 2027 y Fl(T)n(yp)r(e:)g +Fm(required)498 2127 y Fl(Sp)r(eci\014ed)25 b(as:)34 +b(a)24 b(rank)f(one)g(or)g(t)n(w)n(o)h(arra)n(y)d(con)n(taining)i(n)n +(um)n(b)r(ers)h(of)f(t)n(yp)r(e)i(sp)r(eci\014ed)498 +2227 y(in)j(T)-7 b(able)27 b(2.10.)36 b(The)28 b(rank)e(of)i +Fh(x)g Fl(m)n(ust)g(b)r(e)g(the)g(same)f(of)g Fh(y)s +Fl(.)291 2396 y Fm(b)s(eta)41 b Fl(the)28 b(scalar)e +Fh(\014)t Fl(.)498 2496 y(Scop)r(e:)37 b Fm(global)498 +2595 y Fl(T)n(yp)r(e:)g Fm(required)498 2695 y Fl(Sp)r(eci\014ed)28 +b(as:)37 b(a)27 b(n)n(um)n(b)r(er)g(of)h(the)g(data)f(t)n(yp)r(e)h +(indicated)f(in)h(T)-7 b(able)27 b(2.10.)291 2864 y Fm(y)41 +b Fl(the)36 b(lo)r(cal)e(p)r(ortion)g(of)h(global)e(dense)i(matrix)f +Fh(Y)19 b Fl(.)58 b(This)35 b(subroutine)f(computes)h(the)498 +2964 y(lo)r(cation)c(of)g(the)g(\014rst)g(elemen)n(t)g(of)g(lo)r(cal)g +(subarra)n(y)d(used,)k(based)f(on)g Fh(j)5 b(y)34 b Fl(and)d(the)498 +3064 y(\014eld)d Fh(matr)r(ix)p 945 3064 V 30 w(data)g +Fl(of)g Fh(decomp)p 1532 3064 V 29 w(data)g Fl(.)498 +3163 y(Scop)r(e:)37 b Fm(lo)s(cal)498 3263 y Fl(T)n(yp)r(e:)g +Fm(required)498 3362 y Fl(Sp)r(eci\014ed)25 b(as:)34 +b(a)24 b(rank)f(one)g(or)g(t)n(w)n(o)h(arra)n(y)d(con)n(taining)i(n)n +(um)n(b)r(ers)h(of)f(t)n(yp)r(e)i(sp)r(eci\014ed)498 +3462 y(in)j(T)-7 b(able)27 b(2.10.)36 b(The)28 b(rank)e(of)i +Fh(y)i Fl(m)n(ust)e(b)r(e)g(the)g(same)f(of)g Fh(x)p +Fl(.)291 3631 y Fm(decomp)p 618 3631 29 4 v 33 w(data)42 +b Fl(con)n(tains)27 b(data)g(structures)g(for)g(comm)n(unications.)498 +3731 y(Scop)r(e:)37 b Fm(lo)s(cal)498 3831 y Fl(T)n(yp)r(e:)g +Fm(required)498 3930 y Fl(Sp)r(eci\014ed)28 b(as:)37 +b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in)g +Fg(x)f Fl(1.)291 4100 y Fm(trans)42 b Fl(sp)r(ecify)28 +b(with)g Ff(unitd)f Fl(the)h(op)r(eration)f(to)g(p)r(erform.)498 +4269 y Fm(trans)33 b(=)f('N')40 b Fl(the)28 b(op)r(eration)e(is)i(with) +g(no)f(transp)r(osed)g(matrix)498 4403 y Fm(trans)33 +b(=)f('T')41 b Fl(the)28 b(op)r(eration)e(is)i(with)g(transp)r(osed)e +(matrix.)498 4538 y Fm(trans)33 b(=)f('C')40 b Fl(the)28 +b(op)r(eration)f(is)g(with)h(conjugate)f(transp)r(osed)g(matrix.)498 +4707 y(Scop)r(e:)37 b Fm(global)498 4807 y Fl(T)n(yp)r(e:)g +Fm(optional)498 4907 y Fl(Default:)h Fh(tr)r(ans)23 b +Fl(=)g Fh(N)498 5006 y Fl(Sp)r(eci\014ed)28 b(as:)37 +b(a)27 b(c)n(haracter)e(v)-5 b(ariable.)1681 5255 y(27)p +eop +%%Page: 28 28 +28 27 bop 739 523 a Fm(unitd)41 b Fl(sp)r(ecify)28 b(with)g +Ff(tr)l(ans)f Fl(the)h(op)r(eration)f(to)g(p)r(erform.)946 +689 y Fm(unitd)32 b(=)g('U')41 b Fl(the)28 b(op)r(eration)f(is)g(with)h +(no)g(scaling)946 822 y Fm(unitd)k(=)g('L')41 b Fl(the)28 +b(op)r(eration)f(is)g(with)h(left)g(scaling)946 955 y +Fm(unitd)k(=)g('R')40 b Fl(the)28 b(op)r(eration)f(is)g(with)h(righ)n +(t)f(scaling.)946 1121 y(Scop)r(e:)37 b Fm(global)946 +1220 y Fl(T)n(yp)r(e:)g Fm(optional)946 1320 y Fl(Default:)h +Fh(unitd)22 b Fl(=)h Fh(U)946 1420 y Fl(Sp)r(eci\014ed)29 +b(as:)36 b(a)27 b(c)n(haracter)f(v)-5 b(ariable.)739 +1586 y Fm(c)m(hoice)42 b Fl(sp)r(ecify)21 b(whether)f(a)h(clean)n(up)f +(of)g(the)i(o)n(v)n(erlapp)r(ed)d(elemen)n(ts)h(is)h(required)f(on)g +(exit.)946 1752 y Fm(c)m(hoice)33 b(=)f(.false.)40 b +Fl(no)27 b(clean)n(up)g(on)g(exit)946 1885 y Fm(c)m(hoice)33 +b(=)f(.true.)40 b Fl(clean)n(up)28 b(on)f(exit.)946 2150 +y(Scop)r(e:)37 b Fm(global)946 2250 y Fl(T)n(yp)r(e:)g +Fm(optional)946 2350 y Fl(Default:)h Fh(choice)22 b Fl(=)h +Fh(:tr)r(ue:)946 2449 y Fl(Sp)r(eci\014ed)29 b(as:)36 +b(a)27 b(logical)f(v)-5 b(ariable.)739 2615 y Fm(diag)41 +b Fl(the)28 b(diagonal)e(scaling)h(matrix.)946 2715 y(Scop)r(e:)37 +b Fm(lo)s(cal)946 2814 y Fl(T)n(yp)r(e:)g Fm(optional)946 +2914 y Fl(Default:)h Fh(diag)s Fl(\(1\))23 b(=)f(1\()p +Fh(noscal)r(ing)s Fl(\))946 3014 y(Sp)r(eci\014ed)33 +b(as:)44 b(a)31 b(rank)g(one)g(arra)n(y)f(con)n(taining)g(n)n(um)n(b)r +(ers)i(of)f(the)h(t)n(yp)r(e)g(indicated)946 3113 y(in)c(T)-7 +b(able)28 b(2.10.)739 3279 y Fm(n)41 b Fl(n)n(um)n(b)r(er)28 +b(of)f(columns)h(in)g(dense)f(submatrices)g Fh(X)34 b +Fl(and)27 b Fh(Y)19 b Fl(.)946 3379 y(Scop)r(e:)37 b +Fm(global)946 3479 y Fl(T)n(yp)r(e:)g Fm(optional)946 +3578 y Fl(Default:)h Fj(min\(size\(x,2\)-jx)o(+1,)o(si)o(ze\()o(y,)o +(2\))o(-jy)o(+1)o(\))946 3678 y Fl(Sp)r(eci\014ed)29 +b(as:)36 b(an)27 b(in)n(teger)g(v)-5 b(ariable)26 b Fh(n)d +Fg(\025)g Fl(0.)739 3844 y Fm(jx)42 b Fl(the)30 b(column)h(index)f(of)g +(global)f(dense)h(matrix)f Fh(X)7 b Fl(,)31 b(iden)n(tifying)f(the)g +(column)g(of)g(sub-)946 3944 y(v)n(ector)d Fh(X)7 b Fl(.)946 +4043 y(Scop)r(e:)37 b Fm(global)946 4143 y Fl(T)n(yp)r(e:)g +Fm(optional)p Fl(;)27 b(can)g(only)g(b)r(e)h(presen)n(t)f(if)i +Fh(x)f Fl(is)f(of)h(rank)e(2.)946 4242 y(Default:)38 +b Fh(j)5 b(x)24 b Fl(=)e(1)946 4342 y(Sp)r(eci\014ed)29 +b(as:)36 b(an)27 b(in)n(teger)g(v)-5 b(ariable)26 b Fh(j)5 +b(x)24 b Fg(\025)f Fl(1.)739 4508 y Fm(jy)42 b Fl(the)31 +b(column)g(index)g(of)g(global)e(dense)i(matrix)f Fh(Y)19 +b Fl(,)32 b(iden)n(tifying)f(the)g(column)g(of)f(sub-)946 +4608 y(v)n(ector)d Fh(Y)18 b Fl(.)946 4707 y(Scop)r(e:)37 +b Fm(global)946 4807 y Fl(T)n(yp)r(e:)g Fm(optional)p +Fl(;)27 b(can)g(only)g(b)r(e)h(presen)n(t)f(if)i Fh(y)h +Fl(is)d(of)h(rank)f(2.)946 4907 y(Default:)38 b Fh(j)5 +b(y)26 b Fl(=)c(1)946 5006 y(Sp)r(eci\014ed)29 b(as:)36 +b(an)27 b(in)n(teger)g(v)-5 b(ariable)26 b Fh(j)5 b(y)26 +b Fg(\025)d Fl(1.)2130 5255 y(28)p eop +%%Page: 29 29 +29 28 bop 498 523 a Fl(Scop)r(e:)37 b Fm(global)498 623 +y Fl(Sp)r(eci\014ed)28 b(as:)37 b(a)27 b(n)n(um)n(b)r(er)g(of)h(the)g +(data)f(t)n(yp)r(e)h(indicated)f(in)h(T)-7 b(able)27 +b(2.10.)291 789 y Fm(w)m(ork)42 b Fl(the)28 b(w)n(ork)e(arra)n(y)-7 +b(.)498 888 y(Scop)r(e:)37 b Fm(lo)s(cal)498 988 y Fl(T)n(yp)r(e:)g +Fm(optional)498 1088 y Fl(Sp)r(eci\014ed)28 b(as:)36 +b(a)26 b(rank)g(one)h(arra)n(y)d(of)j(the)g(same)g(t)n(yp)r(e)g(of)g +Fh(X)33 b Fl(with)28 b(the)f(POINTER)498 1187 y(attribute.)291 +1353 y Fm(On)k(Return)291 1519 y(y)41 b Fl(the)36 b(lo)r(cal)e(p)r +(ortion)g(of)h(global)e(dense)i(matrix)f Fh(Y)19 b Fl(.)58 +b(This)35 b(subroutine)f(computes)h(the)498 1619 y(lo)r(cation)c(of)g +(the)g(\014rst)g(elemen)n(t)g(of)g(lo)r(cal)g(subarra)n(y)d(used,)k +(based)f(on)g Fh(j)5 b(y)34 b Fl(and)d(the)498 1719 y(\014eld)d +Fh(matr)r(ix)p 945 1719 25 4 v 30 w(data)g Fl(of)g Fh(decomp)p +1532 1719 V 29 w(data)g Fl(.)498 1818 y(Scop)r(e:)37 +b Fm(lo)s(cal)498 1918 y Fl(T)n(yp)r(e:)g Fm(required)498 +2017 y Fl(Sp)r(eci\014ed)28 b(as:)36 b(a)27 b(p)r(oin)n(ter)g(to)h +(arra)n(y)d(of)i(rank)f(one)i(or)e(t)n(w)n(o)h(con)n(taining)f(n)n(um)n +(b)r(ers)h(of)498 2117 y(t)n(yp)r(e)h(sp)r(eci\014ed)g(in)g(T)-7 +b(able)27 b(2.10.)1681 5255 y(29)p eop +%%Page: 30 30 +30 29 bop 739 738 a Fe(F90)p 966 738 41 4 v 48 w(PSHALO|Halo)46 +b(Data)g(Comm)l(unication)739 986 y Fl(These)35 b(subroutines)f +(restore)g(a)h(consisten)n(t)f(status)h(for)g(the)g(halo)g(elemen)n +(ts,)i(and)e(\(op-)739 1086 y(tionally\))27 b(scale)g(the)h(result:) +2004 1289 y Fh(X)i Fg( )23 b Fh(\013X)739 1471 y Fl(where:)739 +1637 y Fh(X)48 b Fl(is)27 b(a)g(global)g(dense)g(submatrix.)p +1373 1821 1597 4 v 1423 1891 a Fh(\013)p Fl(,)h Fh(X)813 +b Fm(Subroutine)p 1373 1924 V 1423 1994 a Fl(In)n(teger)731 +b(F90)p 2552 1994 25 4 v 29 w(PSHALO)1423 2094 y(Long)26 +b(Precision)g(Real)258 b(F90)p 2552 2094 V 29 w(PSHALO)1423 +2193 y(Long)26 b(Precision)g(Complex)100 b(F90)p 2552 +2193 V 29 w(PSHALO)p 1373 2227 1597 4 v 1754 2459 a(T)-7 +b(able)27 b(2.11:)36 b(Data)27 b(t)n(yp)r(es)739 2832 +y Fe(Syn)l(tax)1472 3014 y Fl(CALL)g(F90)p 1868 3014 +25 4 v 29 w(PSHALO)h(\()p Ff(x,)i(de)l(c)l(omp)p 2657 +3014 26 4 v 31 w(data)p Fl(\))1236 3214 y(CALL)d(F90)p +1632 3214 25 4 v 29 w(PSHALO)h(\()p Ff(x,)i(de)l(c)l(omp)p +2421 3214 26 4 v 31 w(data,)h(alpha,)h(work)p Fl(\))739 +3413 y Fm(On)f(En)m(try)739 3579 y(x)42 b Fl(global)26 +b(dense)i(matrix)f Fh(X)7 b Fl(.)946 3678 y(Scop)r(e:)37 +b Fm(lo)s(cal)946 3778 y Fl(T)n(yp)r(e:)g Fm(required)946 +3878 y Fl(Sp)r(eci\014ed)25 b(as:)34 b(a)24 b(rank)f(one)h(or)f(t)n(w)n +(o)g(arra)n(y)e(con)n(taining)i(n)n(um)n(b)r(ers)h(of)g(t)n(yp)r(e)g +(sp)r(eci\014ed)946 3977 y(in)k(T)-7 b(able)28 b(2.11.)739 +4143 y Fm(decomp)p 1066 4143 29 4 v 33 w(data)42 b Fl(con)n(tains)27 +b(data)g(structures)g(for)g(comm)n(unications.)946 4243 +y(Scop)r(e:)37 b Fm(lo)s(cal)946 4342 y Fl(T)n(yp)r(e:)g +Fm(required)946 4442 y Fl(Sp)r(eci\014ed)29 b(as:)36 +b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in)g +Fg(x)f Fl(1.)739 4608 y Fm(alpha)42 b Fl(the)28 b(scalar)e +Fh(\013)p Fl(.)946 4707 y(Scop)r(e:)37 b Fm(global)946 +4807 y Fl(T)n(yp)r(e:)g Fm(optional)946 4907 y Fl(Default:)h +Fh(al)r(pha)22 b Fl(=)h(1)946 5006 y(Sp)r(eci\014ed)29 +b(as:)36 b(a)27 b(n)n(um)n(b)r(er)g(of)h(the)g(data)f(t)n(yp)r(e)h +(indicated)g(in)f(T)-7 b(able)28 b(2.11.)2130 5255 y(30)p +eop +%%Page: 31 31 +31 30 bop 291 523 a Fm(w)m(ork)42 b Fl(the)28 b(w)n(ork)e(arra)n(y)-7 +b(.)498 623 y(Scop)r(e:)37 b Fm(lo)s(cal)498 722 y Fl(T)n(yp)r(e:)g +Fm(optional)498 822 y Fl(Sp)r(eci\014ed)28 b(as:)36 b(a)26 +b(rank)g(one)h(arra)n(y)d(of)j(the)g(same)g(t)n(yp)r(e)g(of)g +Fh(X)33 b Fl(with)28 b(the)f(POINTER)498 922 y(attribute.)291 +1088 y Fm(On)k(Return)291 1254 y(x)41 b Fl(global)27 +b(dense)g(result)h(matrix)f Fh(X)7 b Fl(.)498 1353 y(Scop)r(e:)37 +b Fm(lo)s(cal)498 1453 y Fl(T)n(yp)r(e:)g Fm(required)498 +1553 y Fl(Returned)23 b(as:)33 b(a)22 b(rank)g(one)g(or)f(t)n(w)n(o)h +(arra)n(y)e(con)n(taining)i(n)n(um)n(b)r(ers)g(of)g(t)n(yp)r(e)h(sp)r +(eci\014ed)498 1652 y(in)28 b(T)-7 b(able)27 b(2.11.)1681 +5255 y(31)p eop +%%Page: 32 32 +32 31 bop 739 740 a Fe(F90)p 966 740 41 4 v 48 w(PSO)l(VRL|Ov)l(erlap) +46 b(Up)t(date)739 990 y Fl(These)27 b(subroutines)g(restore)f(a)h +(consisten)n(t)g(status)h(for)f(the)h(o)n(v)n(erlap)d(elemen)n(ts:)1998 +1190 y Fh(X)30 b Fg( )23 b Fh(QX)739 1373 y Fl(where:)739 +1540 y Fh(X)48 b Fl(is)27 b(the)h(global)f(dense)g(submatrix)g +Fh(X)739 1708 y(Q)41 b Fl(is)28 b(the)g(o)n(v)n(erlap)d(op)r(erator;)h +(it)i(is)f(the)h(comp)r(osition)f(of)h(t)n(w)n(o)f(op)r(erators)e +Fh(P)3164 1720 y Fc(a)3233 1708 y Fl(and)i Fh(P)3459 +1678 y Fc(T)3511 1708 y Fl(.)p 1375 1895 1593 4 v 1424 +1964 a Fh(X)917 b Fm(Subroutine)p 1375 1998 V 1424 2067 +a Fl(Long)27 b(Precision)f(Real)257 b(F90)p 2553 2067 +25 4 v 29 w(PSO)n(VRL)1424 2167 y(Long)27 b(Precision)f(Complex)99 +b(F90)p 2553 2167 V 29 w(PSO)n(VRL)p 1375 2200 1593 4 +v 1754 2432 a(T)-7 b(able)27 b(2.12:)36 b(Data)27 b(t)n(yp)r(es)739 +2809 y Fe(Syn)l(tax)1473 2991 y Fl(CALL)h(F90)p 1870 +2991 25 4 v 29 w(PSO)n(VRL)f(\()p Ff(x,)j(de)l(c)l(omp)p +2655 2991 26 4 v 32 w(data)p Fl(\))1543 3190 y(CALL)d(F90)p +1939 3190 25 4 v 29 w(PSO)n(VRL)g(\()p Ff(x,)j(de)l(c)l(omp)p +2724 3190 26 4 v 32 w(data,)h(CHOICE=choic)l(e,)1924 +3290 y(UPD)n(A)-6 b(TE)p 2279 3290 V 31 w(TYPE=up)l(date)p +2840 3290 V 31 w(typ)l(e,)30 b(WORK=work)p Fl(\))739 +3573 y Fm(On)h(En)m(try)739 3741 y(x)42 b Fl(global)26 +b(dense)i(matrix)f Fh(X)7 b Fl(.)946 3840 y(Scop)r(e:)37 +b Fm(lo)s(cal)946 3940 y Fl(T)n(yp)r(e:)g Fm(required)946 +4040 y Fl(Sp)r(eci\014ed)25 b(as:)34 b(a)24 b(rank)f(one)h(or)f(t)n(w)n +(o)g(arra)n(y)e(con)n(taining)i(n)n(um)n(b)r(ers)h(of)g(t)n(yp)r(e)g +(sp)r(eci\014ed)946 4139 y(in)k(T)-7 b(able)28 b(2.12.)739 +4307 y Fm(decomp)p 1066 4307 29 4 v 33 w(data)42 b Fl(con)n(tains)27 +b(data)g(structures)g(for)g(comm)n(unications.)946 4406 +y(Scop)r(e:)37 b Fm(lo)s(cal)946 4506 y Fl(T)n(yp)r(e:)g +Fm(required)946 4606 y Fl(Sp)r(eci\014ed)31 b(as:)40 +b(a)29 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in)g +Fg(x)f Fl(1.)43 b(item[c)n(hoice])29 b(sp)r(ecify)946 +4705 y(if)g(exc)n(hange)d(o)n(v)n(erlap)f(elemen)n(ts.)946 +4873 y Fm(c)m(hoice)33 b(=)f(.true.)40 b Fl(exc)n(hange)27 +b(o)n(v)n(erlap)e(elemen)n(ts,)j(i.e.)37 b(apply)27 b(op)r(erator)f +Fh(P)3383 4843 y Fc(T)3435 4873 y Fl(;)946 5006 y Fm(c)m(hoice)33 +b(=)f(.false.)40 b Fl(don't)27 b(exc)n(hange)g(o)n(v)n(erlap)e(elemen)n +(ts)2130 5255 y(32)p eop +%%Page: 33 33 +33 32 bop 498 523 a Fl(Scop)r(e:)37 b Fm(global)498 623 +y Fl(T)n(yp)r(e:)g Fm(optional)498 722 y Fl(Default:)h +Fh(choice)22 b Fl(=)h Fh(:tr)r(ue:)498 822 y Fl(Sp)r(eci\014ed)28 +b(as:)37 b(a)27 b(logical)f(v)-5 b(ariable.)498 988 y +Fm(up)s(date)p 793 988 29 4 v 35 w(t)m(yp)s(e)32 b(=)g(1)41 +b Fl(normal)27 b(up)r(date)h Fh(P)1847 1000 y Fc(a)1887 +988 y Fl(;)498 1121 y Fm(up)s(date)p 793 1121 V 35 w(t)m(yp)s(e)k(=)g +(2)41 b Fl(square)26 b(ro)r(ot)h(up)r(date)1951 1057 +y Fg(p)p 2020 1057 94 4 v 64 x Fh(P)2073 1133 y Fc(a)2114 +1121 y Fl(;)498 1287 y(Scop)r(e:)37 b Fm(global)498 1386 +y Fl(Default:)h Fh(update)p 1078 1386 25 4 v 29 w(ty)s(pe)22 +b Fl(=)g Fh(:tr)r(ue:)498 1486 y Fl(Scop)r(e:)37 b Fm(global)498 +1586 y Fl(Sp)r(eci\014ed)28 b(as:)37 b(a)27 b(in)n(teger)f(v)-5 +b(ariable.)291 1752 y Fm(w)m(ork)42 b Fl(the)28 b(w)n(ork)e(arra)n(y)-7 +b(.)498 1851 y(Scop)r(e:)37 b Fm(lo)s(cal)498 1951 y +Fl(T)n(yp)r(e:)g Fm(optional)498 2051 y Fl(Sp)r(eci\014ed)28 +b(as:)37 b(a)27 b(one)g(dimensional)g(arra)n(y)e(of)j(the)g(same)f(t)n +(yp)r(e)h(of)f Fh(X)7 b Fl(.)291 2217 y Fm(On)31 b(Return)291 +2383 y(x)41 b Fl(global)27 b(dense)g(result)h(matrix)f +Fh(X)7 b Fl(.)498 2482 y(Scop)r(e:)37 b Fm(lo)s(cal)498 +2582 y Fl(T)n(yp)r(e:)g Fm(required)498 2682 y Fl(Sp)r(eci\014ed)28 +b(as:)36 b(a)27 b(p)r(oin)n(ter)g(to)h(arra)n(y)d(of)i(rank)f(one)i(or) +e(t)n(w)n(o)h(con)n(taining)f(n)n(um)n(b)r(ers)h(of)498 +2781 y(t)n(yp)r(e)h(sp)r(eci\014ed)g(in)g(T)-7 b(able)27 +b(2.12.)291 3056 y Fe(Usage)46 b(notes)392 3238 y Fl(1.)41 +b(If)21 b(there)g(is)f(no)h(o)n(v)n(erlap)d(in)j(the)g(data)g +(distribution,)h(no)e(op)r(erations)g(are)f(p)r(erformed;)392 +3404 y(2.)41 b(The)35 b(op)r(erator)f Fh(P)1084 3374 +y Fc(T)1171 3404 y Fl(p)r(erforms)h(the)g(reduction)g(sum)g(of)g(o)n(v) +n(erlap)f(elemen)n(ts;)k(it)e(is)498 3503 y(the)k(in)n(v)n(erse)e(of)h +(a)g(\\stretc)n(h")e(op)r(erator)h Fh(P)51 b Fl(that)40 +b(replicates)e(o)n(v)n(erlap)f(elemen)n(ts,)498 3603 +y(accoun)n(ting)27 b(for)g(the)h(ph)n(ysical)e(replication)h(of)h +(data;)392 3769 y(3.)41 b(The)21 b(op)r(erator)f Fh(P)1044 +3781 y Fc(a)1105 3769 y Fl(p)r(erforms)h(a)g(scaling)f(on)g(the)i(o)n +(v)n(erlap)d(elemen)n(ts)i(b)n(y)g(the)g(amoun)n(t)498 +3869 y(of)j(replication;)g(th)n(us,)h(when)f(com)n(bined)g(with)h(the)f +(reduction)f(op)r(erator,)g(it)i(imple-)498 3968 y(men)n(ts)j(the)g(a)n +(v)n(erage)c(of)k(replicated)f(elemen)n(ts)h(o)n(v)n(er)d(all)j(of)f +(their)h(instances.)392 4134 y(4.)41 b(The)c(square)f(ro)r(ot)h(up)r +(date)h(option)f(mak)n(es)f(it)i(p)r(ossible)f(to)g(applythe)g(follo)n +(wing)498 4234 y(op)r(erator:)1336 4334 y Fh(X)30 b Fg( )1541 +4257 y Fa(p)p 1624 4257 94 4 v 77 x Fh(P)1677 4346 y +Fc(a)1717 4334 y Fh(P)1782 4299 y Fc(T)1834 4334 y Fh(K)1911 +4299 y Fb(\000)p Fd(1)2000 4334 y Fh(P)2065 4257 y Fa(p)p +2148 4257 V 77 x Fh(P)2201 4346 y Fc(a)2241 4334 y Fh(X)498 +4483 y Fl(In)e(the)h(case)e(of)h(a)g(symmetric)g Fh(K)6 +b Fl(,)28 b(this)g(preserv)n(es)e(simmetry)i(of)g(the)h(o)n(v)n(erall)d +(pre-)498 4583 y(conditioner,)h(whic)n(h)h(w)n(ould)f(otherwise)f(b)r +(e)i(destro)n(y)n(ed.)1681 5255 y(33)p eop +%%Page: 34 34 +34 33 bop 739 1146 a Fk(Chapter)65 b(3)739 1561 y Fn(T)-19 +b(o)6 b(ols)77 b(Library)739 1993 y Fl(Routines)34 b(in)h(this)f(c)n +(hapter)f(pro)n(vide)g(a)h(con)n(v)n(enien)n(t)f(in)n(terface)h(to)g +(the)h(k)n(ernels)e(for)g(ini-)739 2092 y(tialize)25 +b(and)g(set)g(comm)n(unication)f(data)h(structures,)g(sparse)e(matrix)i +(and)g(dense)g(matrix;)739 2192 y(they)31 b(hide)g(most)f(of)g(the)h +(details)g(dep)r(ending)g(on)f(the)h(parallel)e(programming)g(en)n +(viron-)739 2291 y(men)n(t.)37 b(Their)27 b(in)n(terfaces)f(are)h +(de\014ned)g(in)h(the)g(mo)r(dule)f Fj(F90TOOLS)d Fl(T)-7 +b(o)27 b(use)g(the)h(TOOLS)739 2391 y(subroutines,)21 +b(the)f(user)f(m)n(ust)h(de\014ne)g(a)f(partition)g(subroutine)h(that)g +(sp)r(eci\014es)f(ho)n(w)g(global)739 2491 y(matrix)27 +b(ro)n(ws)f(are)h(assigned)f(o)n(v)n(er)g(all)h(pro)r(cesses.)2130 +5255 y(34)p eop +%%Page: 35 35 +35 34 bop 291 746 a Fe(P)-11 b(AR)g(TITION|User)43 b(de\014ned)f +(partition)j(subroutine)291 1000 y Fl(De\014nes)35 b(ho)n(w)f(global)g +(matrix)g(ro)n(ws)g(are)g(assigned)f(o)n(v)n(er)g(all)i(pro)r(cesses.) +57 b(This)35 b(routine)291 1100 y(m)n(ust)j(b)r(e)g(implemen)n(ted)h(b) +n(y)e(user)h(and)f(m)n(ust)h(b)r(e)h(passed)e(as)g(argumen)n(t)g(to)h +(all)g(T)-7 b(o)r(ols)291 1200 y(library)26 b(subroutine)h(that)h +(require)e(it.)291 1299 y(It)i(m)n(ust)f(satisfy)g(the)h(follo)n(wing)f +(sp)r(eci\014cation:)291 1472 y Fj(interface)421 1572 +y(subroutine)40 b(partition\(glob_i)o(nd)o(ex,)o(nr)o(ow)o(,np)o(,p)o +(v,n)o(v\))509 1671 y(integer)84 b(glob_index,np,nr)o(ow)o(,nv)509 +1771 y(integer)g(pv\(*\))421 1870 y(end)43 b(subroutine)c(partition)291 +1970 y(end)j(interface)291 2159 y Fm(On)31 b(En)m(try)291 +2332 y(glob)p 473 2332 29 4 v 33 w(index)41 b Fl(global)27 +b(matrix)g(ro)n(w)f(index)498 2432 y(Sp)r(eci\014ed)c(as:)33 +b(an)22 b(in)n(teger)e(v)-5 b(ariable)21 b(0)h Fg(\024)h +Fh(g)s(l)r(ob)p 1958 2432 25 4 v 28 w(index)g Fg(\024)g +Fh(N)9 b(o)p 2421 2432 V 30 w(r)r(ow)r Fl(,)24 b(where)d +Fh(N)9 b(o)p 2988 2432 V 30 w(r)r(ow)498 2531 y Fl(si)28 +b(the)g(total)f(n)n(um)n(b)r(er)g(of)h(global)e(matrix)h(ro)n(ws.)291 +2704 y Fm(pro)s(cs)41 b Fl(F)-7 b(or)27 b(return)g(v)-5 +b(alues)27 b(see)g(\\On)g(Return".)498 2804 y(Sp)r(eci\014ed)h(as:)37 +b(in)n(teger)26 b(arra)n(y)f(of)j(length)g Fh(l)h Fl(with)f +Fh(l)c Fg(\025)f Fh(N)2268 2816 y Fc(p)2306 2804 y Fl(.)291 +3076 y Fm(npro)s(cs)41 b Fl(see)27 b(\\On)g(Return".)291 +3249 y Fm(On)k(Return)291 3422 y(pro)s(cs)41 b Fl(con)n(tains)27 +b(pro)r(cess)f(iden)n(ti\014er)h(to)h(whic)n(h)f(is)h(assigned)e +Fh(g)s(l)r(ob)p 2420 3422 V 29 w(r)r(ow)31 b Fl(matrix)c(ro)n(w.)498 +3521 y(Sp)r(eci\014ed)h(as:)37 b(in)n(teger)26 b(arra)n(y)f(of)j +(length)g Fh(l)h Fl(with)f Fh(l)c Fg(\025)f Fh(N)2268 +3533 y Fc(p)2306 3521 y Fl(.)291 3794 y Fm(npro)s(cs)41 +b Fl(No)27 b(of)h(pro)r(cess)e(to)i(whic)n(h)f(is)h(assigned)e +Fh(g)s(l)r(ob)p 2025 3794 V 29 w(r)r(ow)31 b Fl(matrix)c(ro)n(w.)498 +3893 y(Sp)r(eci\014ed)h(as:)37 b(in)n(teger)26 b(1)d +Fg(\024)f Fh(pr)r(ocs)i Fg(\024)f Fh(N)1782 3905 y Fc(p)1820 +3893 y Fl(.)291 4177 y Fe(Usage)46 b(Notes)392 4362 y +Fl(1.)41 b(Duplicate)28 b(elemen)n(ts)g(are)e(not)i(admitted)g(on)f +(return)g(arra)n(y)f Fh(pr)r(ocs)392 4535 y Fl(2.)41 +b(If)20 b(this)f(subroutine)f(returns)h(for)f(a)h(ro)n(w)f(matrix)g +Fh(r)2035 4547 y Fc(m)2118 4535 y Fl(a)g(n)n(um)n(b)r(er)h(of)g(pro)r +(cesses)e Fh(npr)r(ocs)498 4634 y Fl(greater)25 b(than)i(1,)g(then)g +Fh(r)1289 4646 y Fc(m)1380 4634 y Fl(will)g(b)r(e)h(an)e(o)n(v)n(erlap) +f(ro)n(w)h(b)r(et)n(w)n(een)h(all)f(pro)r(cesses)g(that)498 +4734 y(o)n(wn)h(it.)392 4907 y(3.)41 b(An)21 b(example)e(of)h +(partition)f(subroutine)h(whic)n(h)g(sub)r(divide)g(sparse)f(matrix)g +(in)h(blo)r(c)n(k)498 5006 y(ro)n(ws)26 b(with)i(an)g(one)f(ro)n(w)f(o) +n(v)n(erlap)g(of)h(is:)1681 5255 y(35)p eop +%%Page: 36 36 +36 35 bop 946 523 a Fe(Example)46 b(1)1208 738 y Fj(SUBROUTINE)39 +b(PART_BLOCK\(GLOB_I)o(NDX)o(,P)o(RO)o(CS,)o(NP)o(ROC)o(S\))1208 +838 y(INTEGER)259 b(GLOB_INDX)1208 937 y(INTEGER)g(PROCS\(*\),NPROCS) +946 1236 y(C)218 b(...Common)40 b(area)i(previously)d(initialized)g +(...)1208 1336 y(COMMON)i(/BLOCK_DATA/)e(DIM_BLOCK,)g(NP)1208 +1435 y(INTEGER)i(DIM_BLOCK,)e(NP)1208 1535 y(SAVE)85 +b(/BLOCK_DATA/)946 1635 y(C)218 b(..DIM_BLOCK..is)37 +b(dimension)j(\(number)h(of)i(rows\))e(of)i(each)f(matrix)f(block)946 +1734 y(C)218 b(..NP.........is)37 b(number)k(of)i(processes)d(involved) +g(in)j(computation)1208 1934 y(NPROCS=1)1208 2033 y(PROCS\(1\))d(=)j +(\(GLOB_INDX-1\)/DIM)o(_BL)o(OC)o(K)1208 2133 y(IF)g +(\(\(MOD\(GLOB_INDX,)37 b(DIM_BLOCK\).EQ.0)o(\).A)o(ND)o(.)1164 +2232 y(+)44 b(\(PROCS\(NPROCS\).)o(NE.)o(\(N)o(P-)o(1\)\))o(\))37 +b(THEN)946 2332 y(C)305 b(....this)40 b(is)j(an)g(overlap)e(row....) +1469 2432 y(NPROCS)h(=)h(NPROCS+1)1469 2531 y(PROCS\(NPROCS\))c(=)k +(PROCS\(NPROCS-1\)+)o(1)1208 2631 y(ENDIF)1208 2731 y(RETURN)1208 +2930 y(BLOCK)e(DATA)h(BLOCK)1208 3029 y(COMMON)f(/BLOCK_DATA/)e +(DIM_BLOCK,)g(NP)1208 3129 y(INTEGER)i(DIM_BLOCK,)e(NP)1208 +3229 y(SAVE)85 b(/BLOCK_DATA/)2130 5255 y Fl(36)p eop +%%Page: 37 37 +37 36 bop 291 711 a Fe(F90)p 518 711 41 4 v 48 w(PSDSCALL)44 +b(|Allo)t(cate)i(Comm)l(unication)291 861 y(Descriptor)291 +1081 y Fl(This)25 b(subroutine)g(allo)r(cates)g(the)h(comm)n(unication) +f(desciptor)g(for)g(global)f(sparse)g(matrix)291 1181 +y Fh(A)p Fl(.)291 1450 y Fe(Syn)l(tax)577 1631 y Fl(CALL)j(F90)p +973 1631 25 4 v 29 w(PSDSCALL)h(\()p Ff(m,n,p)l(arts,ic)l(ontxt,)j +(ierrv,)g(de)l(c)l(omp)p 2655 1631 26 4 v 32 w(data)p +Fl(\))291 1831 y Fm(On)g(En)m(try)291 1983 y(m)40 b Fl(n)n(um)n(b)r(er) +27 b(of)h(ro)n(ws)e(of)h(global)g(sparse)f(submatrix)h +Fh(A)h Fl(to)f(allo)r(cate.)498 2083 y(Scop)r(e:)37 b +Fm(global)498 2182 y Fl(T)n(yp)r(e:)g Fm(required)498 +2282 y Fl(Sp)r(eci\014ed)20 b(as:)32 b(an)19 b(in)n(teger)f(v)-5 +b(ariable)18 b(0)23 b Fg(\024)f Fh(m)h Fg(\024)g Fh(decomp)p +2258 2282 25 4 v 30 w(data)p Fl(\045)p Fh(matr)r(ix)p +2780 2282 V 30 w(data)p Fl([)p Fh(M)p 3084 2282 V 39 +w Fl(].)291 2434 y Fm(n)41 b Fl(n)n(um)n(b)r(er)27 b(of)h(columns)f(of) +h(global)e(sparse)h(submatrix)g Fh(X)34 b Fl(to)27 b(allo)r(cate.)498 +2534 y(Scop)r(e:)37 b Fm(global)498 2633 y Fl(T)n(yp)r(e:)g +Fm(required)498 2733 y Fl(Sp)r(eci\014ed)28 b(as:)37 +b(an)27 b(in)n(teger)g(v)-5 b(ariable)26 b Fh(n)d Fg(\025)g +Fl(0.)291 2885 y Fm(parts)42 b Fl(function)24 b(that)h(sp)r(ecify)f +(the)g(partition)g(of)g(global)f(matrix)g(ro)n(ws)g(o)n(v)n(er)f(all)i +(pro)r(cess.)498 2985 y(See)k(...)37 b(for)27 b(more)g(informations.) +498 3084 y(Scop)r(e:)37 b Fm(global)498 3184 y Fl(T)n(yp)r(e:)g +Fm(required)498 3284 y Fl(Sp)r(eci\014ed)28 b(as:)37 +b(a)27 b(pro)r(cedure.)291 3436 y Fm(icon)m(txt)42 b +Fl(BLA)n(CS)27 b(comm)n(unication)g(con)n(text.)498 3536 +y(Scop)r(e:)37 b Fm(global)498 3635 y Fl(T)n(yp)r(e:)g +Fm(required)498 3735 y Fl(Sp)r(eci\014ed)28 b(as:)37 +b(an)27 b(in)n(teger)g(v)-5 b(ariable.)291 3887 y Fm(ierrv)41 +b Fl(see)28 b(\\On)e(Return".)291 4039 y Fm(On)31 b(Return)291 +4192 y(ierrv)41 b Fl(error)26 b(v)n(ector)g(used)i(in)g(subroutines)f +(F90)p 1858 4192 V 29 w(PSSPINS)g(and)h(F90)p 2558 4192 +V 29 w(PSSP)-7 b(ASB.)498 4291 y(Scop)r(e:)37 b Fm(lo)s(cal)498 +4391 y Fl(T)n(yp)r(e:)g Fm(required)498 4490 y Fl(Sp)r(eci\014ed)28 +b(as:)37 b(a)27 b(one)g(dimensional)g(arra)n(y)e(of)j(in)n(teger.)291 +4643 y Fm(decomp)p 618 4643 29 4 v 33 w(data)42 b Fl(con)n(tains)27 +b(data)g(structures)g(for)g(comm)n(unications.)498 4742 +y(Scop)r(e:)37 b Fm(lo)s(cal)498 4842 y Fl(T)n(yp)r(e:)g +Fm(required)498 4942 y Fl(Sp)r(eci\014ed)28 b(as:)37 +b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in)g +Fg(x)f Fl(1.)1681 5255 y(37)p eop +%%Page: 38 38 +38 37 bop 739 708 a Fe(F90)p 966 708 41 4 v 48 w(PSSP)-11 +b(ALL)43 b(|Allo)t(cate)k(Global)e(Sparse)g(Matrix)739 +925 y Fl(This)25 b(subroutine)f(allo)r(cates)g(global)f(sparse)g +(matrix)i Fh(A)p Fl(,)g(the)g(storage)e(mo)r(de)i(of)g(matrix)f +Fh(A)739 1024 y Fl(is)j(decided)h(b)n(y)f(user)g(setting)h(its)g(FID)n +(A)g(and)f(DESCRA)i(\014elds.)p 1363 1128 1617 4 v 1412 +1198 a Fh(A)924 b Fm(Subroutine)p 1363 1231 V 1412 1301 +a Fl(Long)27 b(Precision)f(Real)257 b(F90)p 2541 1301 +25 4 v 29 w(PSSP)-7 b(ALL)1412 1400 y(Long)27 b(Precision)f(Complex)99 +b(F90)p 2541 1400 V 29 w(PSSP)-7 b(ALL)p 1363 1433 1617 +4 v 1775 1665 a(T)g(able)27 b(3.1:)36 b(Data)27 b(t)n(yp)r(es)739 +2017 y Fe(Syn)l(tax)1346 2199 y Fl(CALL)g(F90)p 1742 +2199 25 4 v 30 w(PSSP)-7 b(ALL)27 b(\()p Ff(a,)j(ierrv,)h(de)l(c)l(omp) +p 2782 2199 26 4 v 32 w(data)p Fl(\))1255 2398 y(CALL)c(F90)p +1651 2398 25 4 v 29 w(PSSP)-7 b(ALL)27 b(\()p Ff(a,)k(ierrv,)g(de)l(c)l +(omp)p 2691 2398 26 4 v 32 w(data,)g(nnz)p Fl(\))739 +2598 y Fm(On)g(En)m(try)739 2748 y(a)42 b Fl(see)27 b(\\On)g(Return".) +739 2899 y Fm(decomp)p 1066 2899 29 4 v 33 w(data)42 +b Fl(con)n(tains)27 b(data)g(structures)g(for)g(comm)n(unications.)946 +2998 y(Scop)r(e:)37 b Fm(lo)s(cal)946 3098 y Fl(T)n(yp)r(e:)g +Fm(required)946 3197 y Fl(Sp)r(eci\014ed)29 b(as:)36 +b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in)g +Fg(x)f Fl(1.)739 3348 y Fm(ierrv)42 b Fl(see)27 b(\\On)g(Return".)739 +3498 y Fm(nnz)42 b Fl(An)28 b(estimate)g(of)f(the)h(n)n(um)n(b)r(er)f +(of)h(non-zero)e(v)-5 b(alues)27 b(of)h(lo)r(cal)f(sparse)f(matrix.)946 +3598 y(Scop)r(e:)37 b Fm(lo)s(cal)946 3698 y Fl(T)n(yp)r(e:)g +Fm(optional)946 3797 y Fl(Default:)h Fh(nnz)26 b Fl(=)d(9)18 +b Fg(\003)g Fh(decomp)p 1926 3797 25 4 v 29 w(data)p +Fl(\045)p Fh(matr)r(ix)p 2447 3797 V 31 w(data)p Fl([)p +Fh(n)p 2712 3797 V 30 w(r)r(ow)r Fl(].)946 3897 y(Sp)r(eci\014ed)29 +b(as:)36 b(an)27 b(in)n(teger)g(v)-5 b(ariable.)739 4047 +y Fm(On)31 b(Return)739 4198 y(a)42 b Fl(allo)r(cated)27 +b(lo)r(cal)g(p)r(ortion)g(of)g(global)g(sparse)f(matrix)h +Fh(A)p Fl(.)946 4297 y(Scop)r(e:)37 b Fm(lo)s(cal)946 +4397 y Fl(T)n(yp)r(e:)g Fm(required)946 4497 y Fl(Sp)r(eci\014ed)29 +b(as:)36 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g Fg(x)f Fl(1.)739 4647 y Fm(ierrv)42 b Fl(error)25 b(v)n(ector)i(used) +g(in)h(subroutines)f(F90)p 2306 4647 V 29 w(PSSPINS)g(and)h(F90)p +3006 4647 V 29 w(PSSP)-7 b(ASB.)946 4747 y(Scop)r(e:)37 +b Fm(lo)s(cal)946 4846 y Fl(T)n(yp)r(e:)g Fm(required)946 +4946 y Fl(Sp)r(eci\014ed)29 b(as:)36 b(a)27 b(one)g(dimensional)g(arra) +n(y)f(of)h(in)n(teger.)2130 5255 y(38)p eop +%%Page: 39 39 +39 38 bop 291 745 a Fe(F90)p 518 745 41 4 v 48 w(PSSPINS|Insert)45 +b(Sparse)g(Matrix)291 999 y Fl(This)22 b(subroutine)h(inserts)f(a)h(lo) +r(cal)f(sparse)g(matrix)g Fh(B)t(LC)6 b(K)29 b Fl(in)23 +b(the)g(global)f(sparse)f(matrix)291 1098 y Fh(A)p Fl(.)p +925 1204 1597 4 v 974 1274 a Fh(A;)14 b(B)t(LC)6 b(K)627 +b Fm(Subroutine)p 925 1307 V 974 1377 a Fl(Long)27 b(Precision)f(Real) +257 b(F90)p 2103 1377 25 4 v 29 w(PSSPINS)974 1477 y(Long)27 +b(Precision)f(Complex)99 b(F90)p 2103 1477 V 29 w(PSSPINS)p +925 1510 1597 4 v 1326 1742 a(T)-7 b(able)28 b(3.2:)36 +b(Data)27 b(t)n(yp)r(es)291 2141 y Fe(Syn)l(tax)689 2324 +y Fl(CALL)h(F90)p 1086 2324 25 4 v 29 w(PSSPINS)f(\()p +Ff(a,)k(ia,)g(ja,)g(blck,)g(ierrv,)g(de)l(c)l(omp)p 2542 +2324 26 4 v 32 w(data)p Fl(\))291 2524 y(CALL)c(F90)p +687 2524 25 4 v 29 w(PSSPINS)777 2623 y(\()p Ff(a,)k(ia,)g(ja,)g(blck,) +g(ierrv,)g(de)l(c)l(omp)p 1838 2623 26 4 v 31 w(data,)g(IBLCK=iblck,)h +(JBLCK=jblck)p Fl(\))291 2824 y Fm(On)f(En)m(try)291 +2996 y(a)41 b Fl(the)34 b(lo)r(cal)f(p)r(ortion)g(of)h(global)e(sparse) +g(matrix)h Fh(A)p Fl(.)56 b(This)33 b(subroutine)g(computes)h(the)498 +3096 y(lo)r(cation)j(of)h(the)f(\014rst)h(elemen)n(t)f(of)h(the)g(lo)r +(cal)f(submatrix)g(to)g(insert,)j(based)d(on)498 3195 +y Fh(ia;)14 b(j)5 b(a)27 b Fl(and)g Fh(matr)r(ix)p 1146 +3195 25 4 v 31 w(data)h Fl(\014eld)f(of)h Fh(decomp)p +1913 3195 V 30 w(data)p Fl(.)498 3295 y(Scop)r(e:)37 +b Fm(lo)s(cal)498 3395 y Fl(T)n(yp)r(e:)g Fm(required)498 +3494 y Fl(Sp)r(eci\014ed)28 b(as:)37 b(a)27 b(structured)g(data)g(t)n +(yp)r(e)h(sp)r(eci\014ed)g(in)g Fg(x)f Fl(1.)291 3666 +y Fm(ia)41 b Fl(the)f(ro)n(w)e(index)h(of)g(global)g(sparse)f(matrix)g +Fh(A)p Fl(,)43 b(iden)n(tifying)c(the)h(\014rst)f(ro)n(w)f(of)i(the)498 +3766 y(submatrix)27 b Fh(A)p Fl(.)498 3865 y(Scop)r(e:)37 +b Fm(global)498 3965 y Fl(T)n(yp)r(e:)g Fm(required)498 +4065 y Fl(Sp)r(eci\014ed)20 b(as:)32 b(an)19 b(in)n(teger)f(v)-5 +b(ariable)19 b(1)j Fg(\024)h Fh(ia)f Fg(\024)h Fh(decomp)p +2258 4065 V 30 w(data)p Fl(\045)p Fh(matr)r(ix)p 2780 +4065 V 30 w(data)p Fl([)p Fh(M)p 3084 4065 V 39 w Fl(].)291 +4237 y Fm(ja)42 b Fl(the)26 b(column)h(index)f(of)g(the)h(global)e +(sparse)g(matrix)h Fh(A)p Fl(,)h(iden)n(tifying)f(the)h(\014rst)f +(column)498 4336 y(of)i(the)g(submatrix)f Fh(A)p Fl(.)498 +4436 y(Scop)r(e:)37 b Fm(global)498 4535 y Fl(T)n(yp)r(e:)g +Fm(required)498 4635 y Fl(Sp)r(eci\014ed)21 b(as:)32 +b(an)20 b(in)n(teger)f(v)-5 b(ariable)19 b(1)k Fg(\024)f +Fh(j)5 b(a)23 b Fg(\024)g Fh(decomp)p 2272 4635 V 29 +w(data)p Fl(\045)p Fh(matr)r(ix)p 2793 4635 V 31 w(data)p +Fl([)p Fh(N)p 3084 4635 V 39 w Fl(].)291 4807 y Fm(blc)m(k)42 +b Fl(the)28 b(lo)r(cal)f(submatrix)g(to)g(insert.)498 +4907 y(Scop)r(e:)37 b Fm(lo)s(cal)498 5006 y Fl(T)n(yp)r(e:)g +Fm(required)1681 5255 y Fl(39)p eop +%%Page: 40 40 +40 39 bop 946 523 a Fl(Sp)r(eci\014ed)36 b(as:)50 b(a)35 +b(structured)f(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in)g +Fg(x)g Fl(1;)j(supp)r(orted)c(storage)946 623 y(formats)27 +b(are)g Fj(CSR)f Fl(and)i Fj(COO)p Fl(..)739 810 y Fm(decomp)p +1066 810 29 4 v 33 w(data)42 b Fl(con)n(tains)27 b(data)g(structures)g +(for)g(comm)n(unications.)946 910 y(Scop)r(e:)37 b Fm(lo)s(cal)946 +1009 y Fl(T)n(yp)r(e:)g Fm(required)946 1109 y Fl(Sp)r(eci\014ed)29 +b(as:)36 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g Fg(x)f Fl(1.)739 1296 y Fm(ierrv)42 b Fl(error)25 b(v)n(ector.)36 +b(It)28 b(m)n(ust)g(b)r(e)g(initialized)f(b)n(y)h(F90)p +2515 1296 25 4 v 29 w(PSSP)-7 b(ALL.)946 1396 y(Scop)r(e:)37 +b Fm(lo)s(cal)946 1495 y Fl(T)n(yp)r(e:)g Fm(required)946 +1595 y Fl(Sp)r(eci\014ed)29 b(as:)36 b(a)27 b(one)g(dimensional)g(arra) +n(y)f(of)h(in)n(teger.)739 1782 y Fm(iblc)m(k)42 b Fl(the)29 +b(ro)n(w)f(index)i(of)f(lo)r(cal)f(sparse)g(matrix)h +Fh(B)t(LC)6 b(K)g Fl(,)29 b(iden)n(tifying)h(the)f(\014rst)g(ro)n(w)f +(of)946 1882 y(the)g(submatrix)f Fh(B)t(LC)6 b(K)g Fl(.)946 +1981 y(Scop)r(e:)37 b Fm(lo)s(cal)946 2081 y Fl(T)n(yp)r(e:)g +Fm(optional)946 2181 y Fl(Default:)h Fh(ibl)r(ck)25 b +Fl(=)d(1.)946 2280 y(Sp)r(eci\014ed)29 b(as:)36 b(an)27 +b(in)n(teger)g(v)-5 b(ariable)26 b Fh(j)5 b(bl)r(ck)25 +b Fg(\025)e Fl(1.)739 2468 y Fm(jblc)m(k)42 b Fl(the)28 +b(column)f(index)h(of)f(the)h(lo)r(cal)f(sparse)f(matrix)g +Fh(B)t(LC)6 b(K)g Fl(,)27 b(iden)n(tifying)h(the)f(\014rst)946 +2567 y(column)h(of)g(the)g(submatrix)f Fh(B)t(LC)6 b(K)g +Fl(.)946 2667 y(Scop)r(e:)37 b Fm(lo)s(cal)946 2766 y +Fl(T)n(yp)r(e:)g Fm(optional)946 2866 y Fl(Default:)h +Fh(j)5 b(bl)r(ck)25 b Fl(=)e(1.)946 2966 y(Sp)r(eci\014ed)29 +b(as:)36 b(an)27 b(in)n(teger)g(v)-5 b(ariable)26 b Fh(j)5 +b(bl)r(ck)25 b Fg(\025)e Fl(1.)739 3153 y Fm(On)31 b(Return)739 +3340 y(a)42 b Fl(the)28 b(lo)r(cal)f(p)r(ortion)g(of)g(global)g(sparse) +f(matrix)h Fh(A)h Fl(up)r(dated)g(with)g(matrix)f Fh(B)t(LC)6 +b(K)g Fl(.)946 3440 y(Scop)r(e:)37 b Fm(lo)s(cal)946 +3540 y Fl(T)n(yp)r(e:)g Fm(required)946 3639 y Fl(Sp)r(eci\014ed)29 +b(as:)36 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g Fg(x)f Fl(1.)739 3826 y Fm(ierrv)42 b Fl(error)24 b(v)n(ector)i(used) +g(in)h(successiv)n(e)e(call)h(to)h(F90)p 2493 3826 V +29 w(PSSPINS)f(and)g(F90)p 3190 3826 V 30 w(PSSP)-7 b(ASB.)946 +3926 y(Scop)r(e:)37 b Fm(lo)s(cal)946 4026 y Fl(T)n(yp)r(e:)g +Fm(required)946 4125 y Fl(Sp)r(eci\014ed)29 b(as:)36 +b(a)27 b(one)g(dimensional)g(arra)n(y)f(of)h(in)n(teger.)739 +4428 y Fe(Usage)46 b(Notes)840 4620 y Fl(1.)41 b(Before)28 +b(call)h(this)g(subroutine)f(y)n(ou)g(m)n(ust)h(allo)r(cate)f +Fh(A)h Fl(with)g(F90)p 3033 4620 V 29 w(PSSP)-7 b(ALL)28 +b(sub-)946 4719 y(routine.)840 4907 y(2.)41 b Fh(I)7 +b(A;)14 b(J)8 b(A)38 b Fl(and)e(the)h(indices)g(in)g +Fh(B)t(LC)6 b(K)42 b Fl(are)36 b(written)g(in)h(terms)g(of)f(the)h +(\\global")946 5006 y(n)n(um)n(b)r(ering;)28 b(the)g(mapping)f(to)g(lo) +r(cal)g(storage)f(is)h(handled)h(in)n(ternally)-7 b(.)2130 +5255 y(40)p eop +%%Page: 41 41 +41 40 bop 392 523 a Fl(3.)41 b(The)i(\\insertion")d(in)j(the)g(routine) +f(name)g(means)g(that)h(co)r(e\016cien)n(ts)e(from)i(the)498 +623 y Fh(B)t(LC)6 b(K)46 b Fl(parameter)39 b(are)h(stored)g(in)n(to)g +(the)h(global)f(sparse)f(matrix)h Fh(A)h Fl(starting)498 +722 y(from)22 b Fh(A)p Fl(\()p Fh(I)7 b(A;)14 b(J)8 b(A)p +Fl(\),)25 b(i.e.,)e(elemen)n(t)g Fh(B)t(LC)6 b(K)g Fl(\(1)p +Fh(;)14 b Fl(1\))21 b(\(if)i(it)g(actually)e(exists\),)i(go)r(es)f(on)n +(to)498 822 y Fh(A)p Fl(\()p Fh(I)7 b(A;)14 b(J)8 b(A)p +Fl(\),)35 b(and)e(so)f(on.)51 b(This)32 b(do)r(es)g Ff(not)h +Fl(imply)g(that)f(previously)g(existing)g(co-)498 922 +y(e\016cien)n(ts)g(are)e(o)n(v)n(erwritten;)i(an)n(y)f(clashes)g(are)f +(resolv)n(ed)g(at)h(assem)n(bly)g(time)h(\(see)498 1021 +y(subroutine)27 b(F90)p 1052 1021 25 4 v 29 w(PSSP)-7 +b(ASB\).)392 1187 y(4.)41 b(This)34 b(routine)g(do)r(es)g(not)g(con)n +(tain)g(an)n(y)f(syncronization)g(p)r(oin)n(t;)38 b(th)n(us,)e(it)f +(can)f(b)r(e)498 1287 y(called)28 b(indep)r(enden)n(tly)i(b)n(y)e(eac)n +(h)g(of)h(the)g(participating)f(pro)r(cesses;)f(it)i(is)g(ho)n(w)n(ev)n +(er)498 1386 y(required)e(that)h(all)f(pro)r(cesses)f(execute)i(later)e +(a)i(single)f(call)g(to)g(F90)p 2666 1386 V 30 w(PSSP)-7 +b(ASB.)392 1553 y(5.)41 b(It)26 b(is)f(p)r(erfectly)g(legal)g(to)g +(call)g(this)h(routine)e(m)n(ultiple)i(times)g(with)g(the)f(same)g(v)-5 +b(alue)498 1652 y(of)36 b Fh(I)7 b(A)p Fl(;)41 b(this)36 +b(can)g(b)r(e)g(useful)g(e.g.)62 b(in)36 b(\014nite)h(elemen)n(t)f +(applications,)h(where)f(the)498 1752 y(most)c(natural)f(w)n(a)n(y)f +(to)i(de\014ne)g(the)g(matrix)f(is)h(to)g(lo)r(op)f(o)n(v)n(er)f(the)i +(elemen)n(ts,)h(and)498 1851 y(not)j(o)n(v)n(er)e(the)i(equations.)61 +b(As)36 b(an)f(extreme)h(example,)h(it)f(is)g(p)r(ossible)g(to)f(build) +498 1951 y(a)k(sparse)f(matrix)h(passing)f(just)i(one)f(nonzero)f(en)n +(try)h(to)g(eac)n(h)g(call.)72 b(Ho)n(w)n(ev)n(er)498 +2051 y(it)30 b(is)g(generally)e(con)n(v)n(enien)n(t)h(from)g(a)h(p)r +(erformance)f(p)r(oin)n(t)g(of)h(view)g(to)f(pac)n(k)g(data)498 +2150 y(spanning)18 b(m)n(ultiple)h(ro)n(ws)e(in)n(to)h(a)g(single)g +(call)g(to)g(this)h(subroutine;)i(the)e(optimal)f(size)498 +2250 y(of)k(eac)n(h)f Fh(B)t(LC)6 b(K)28 b Fl(of)22 b(data)f(dep)r +(ends)i(on)f(the)g(underlying)g(pro)r(cessor)d(con\014guration.)1681 +5255 y(41)p eop +%%Page: 42 42 +42 41 bop 739 745 a Fe(F90)p 966 745 41 4 v 48 w(PSSP)-11 +b(ASB|Assem)l(bly)44 b(Sparse)h(Matrix)739 999 y Fl(This)27 +b(subroutine)h(assem)n(blies)e(global)g(sparse)h(matrix)g +Fh(A)p Fl(.)p 1362 1121 1619 4 v 1412 1191 a Fh(A)924 +b Fm(Subroutine)p 1362 1224 V 1412 1293 a Fl(Long)26 +b(Precision)g(Real)258 b(F90)p 2541 1293 25 4 v 29 w(PSSP)-7 +b(ASB)1412 1393 y(Long)26 b(Precision)g(Complex)100 b(F90)p +2541 1393 V 29 w(PSSP)-7 b(ASB)p 1362 1426 1619 4 v 1775 +1658 a(T)g(able)27 b(3.3:)36 b(Data)27 b(t)n(yp)r(es)739 +2042 y Fe(Syn)l(tax)1345 2225 y Fl(CALL)h(F90)p 1742 +2225 25 4 v 29 w(PSSP)-7 b(ASB)27 b(\()p Ff(a,)k(ierrv,)g(de)l(c)l(omp) +p 2783 2225 26 4 v 32 w(data)p Fl(\))739 2424 y(CALL)d(F90)p +1136 2424 25 4 v 29 w(PSSP)-7 b(ASB)751 2524 y(\()p Ff(a,)31 +b(ierrv,)g(de)l(c)l(omp)p 1375 2524 26 4 v 32 w(data,)g(O)n +(VERLAP=overlap,)g(AFMT=afmt,UP=up,DUP=dup)p Fl(\))739 +2724 y Fm(On)g(En)m(try)739 2896 y(a)42 b Fl(the)28 b(lo)r(cal)f(p)r +(ortion)g(of)g(global)g(sparse)f(matrix)h Fh(A)h Fl(to)g(assem)n(bly)-7 +b(.)946 2996 y(Scop)r(e:)37 b Fm(lo)s(cal)946 3096 y +Fl(T)n(yp)r(e:)g Fm(required)946 3195 y Fl(Sp)r(eci\014ed)29 +b(as:)36 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g Fg(x)f Fl(1.)739 3367 y Fm(decomp)p 1066 3367 29 4 +v 33 w(data)42 b Fl(con)n(tains)27 b(data)g(structures)g(for)g(comm)n +(unications.)946 3467 y(Scop)r(e:)37 b Fm(lo)s(cal)946 +3566 y Fl(T)n(yp)r(e:)g Fm(required)946 3666 y Fl(Sp)r(eci\014ed)29 +b(as:)36 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g Fg(x)f Fl(1.)739 3838 y Fm(ierrv)42 b Fl(error)31 b(v)n(ector.)54 +b(It)33 b(m)n(ust)h(b)r(e)g(initialized)g(b)n(y)f(F90)p +2568 3838 25 4 v 29 w(PSSP)-7 b(ALL)33 b(and)g(m)n(ust)h(b)r(e)g(the) +946 3938 y(same)27 b(used)h(in)g(F90)p 1584 3938 V 29 +w(PSSPINS.)946 4037 y(Scop)r(e:)37 b Fm(lo)s(cal)946 +4137 y Fl(T)n(yp)r(e:)g Fm(required)946 4237 y Fl(Sp)r(eci\014ed)29 +b(as:)36 b(a)27 b(one)g(dimensional)g(arra)n(y)f(of)h(in)n(teger.)739 +4408 y Fm(o)m(v)m(erlap)42 b Fl(P)n(ermits)27 b(to)h(impro)n(v)n(e)e +(F90)p 1952 4408 V 29 w(PSSP)-7 b(ASB)28 b(p)r(erformance,)e(if)j +(there)e(isn't)h(o)n(v)n(erlap)946 4508 y(b)r(et)n(w)n(een)37 +b(lo)r(cal)e(p)r(ortions)h(of)g(global)f(sparse)g(matrix)h +Fh(A)h Fl(assigned)e(o)n(v)n(er)f(all)i(pro-)946 4608 +y(cesses.)946 4707 y(Has)28 b(the)g(follo)n(wing)e(meaning:)946 +4807 y(If)j Fh(ov)s(er)r(l)r(ap)24 b Fl(=)f(0,)28 b(there)g(isn't)h(o)n +(v)n(erlap)d(b)r(et)n(w)n(een)i(lo)r(cal)f(p)r(ortions)h(of)g(global)f +(sparse)946 4907 y(matrix)h Fh(A)f Fl(assigned)g(o)n(v)n(er)f(all)h +(pro)r(cesses.)946 5006 y(If)36 b Fh(ov)s(er)r(l)r(ap)h +Fg(6)p Fl(=)f(0,)h(there)e(is)h(o)n(v)n(erlap)d(b)r(et)n(w)n(een)j(lo)r +(cal)f(p)r(ortions)g(of)g(global)g(sparse)2130 5255 y(42)p +eop +%%Page: 43 43 +43 42 bop 498 523 a Fl(matrix)27 b Fh(A)h Fl(assigned)f(o)n(v)n(er)e +(all)j(pro)r(cesses.)498 623 y(Scop)r(e:)37 b Fm(global)498 +722 y Fl(T)n(yp)r(e:)g Fm(optional)498 822 y Fl(Default:)h +Fh(ov)s(er)r(l)r(ap)23 b Fl(=)f(1.)498 922 y(Sp)r(eci\014ed)28 +b(as:)37 b(an)27 b(in)n(teger)g(v)-5 b(ariable.)291 1080 +y Fm(afm)m(t)41 b Fl(Request)29 b(matrix)g Fh(A)g Fl(to)g(b)r(e)h(con)n +(v)n(erted)e(in)n(to)h(a)g(sp)r(ec\014c)g(storage)e(format;)j(it)g(m)n +(ust)498 1180 y(b)r(e)36 b(supp)r(orted)f(b)n(y)g(the)h(underlying)f +(serial)f(sparse)g(BLAS.)i(It)g(is)f(assumed)g(that)498 +1279 y(at)d(least)f Fj(CSR)f Fl(should)i(b)r(e)g(supp)r(orted;)h(the)f +(default)g(is)g Fj(???)p Fl(,)f(whic)n(h)h(requests)f(the)498 +1379 y(default)d(of)g(the)g(underlying)f(serial)f(spares)g(BLAS.)i +(Scop)r(e:)37 b Fm(global)498 1478 y Fl(T)n(yp)r(e:)g +Fm(optional)498 1578 y Fl(Default:)h Fh(ov)s(er)r(l)r(ap)23 +b Fl(=)f Fj(???)o Fl(.)498 1678 y(Sp)r(eci\014ed)28 b(as:)37 +b(a)27 b Fj(CHARACTER*5)c Fl(v)-5 b(ariable.)291 1836 +y Fm(UP)42 b Fl(Request)23 b(matrix)g Fh(A)h Fl(to)f(b)r(e)h(prepared)e +(for)h(e\016cien)n(t)g(pattern)g(reuse)g(\(see)g(subroutine)498 +1936 y(F90)p 641 1936 25 4 v 29 w(PSSPUPD\).)g(This)g(en)n(tails)g +(additional)f(storage)g(o)n(v)n(erhead.)33 b(P)n(ossible)21 +b(v)-5 b(alues)498 2035 y(are)27 b Fj(Y)g Fl(and)g Fj(N)p +Fl(.)h(Scop)r(e:)37 b Fm(global)498 2135 y Fl(T)n(yp)r(e:)g +Fm(optional)498 2235 y Fl(Default:)h Fh(U)9 b(P)34 b +Fl(=)23 b Fj(N)p Fl(.)498 2334 y(Sp)r(eci\014ed)28 b(as:)37 +b(a)27 b Fj(CHARACTER)d Fl(v)-5 b(ariable.)291 2493 y +Fm(DUP)42 b Fl(Sp)r(eci\014es)30 b(ho)n(w)g(to)f(treat)h(duplicated)g +(matrix)g(en)n(tries;)g(see)g(usage)f(notes)g(b)r(elo)n(w.)498 +2592 y(Scop)r(e:)37 b Fm(global)498 2692 y Fl(T)n(yp)r(e:)g +Fm(optional)498 2792 y Fl(Default:)h Fh(dup)22 b Fl(=)h(1.)498 +2891 y(Sp)r(eci\014ed)28 b(as:)37 b(an)27 b(in)n(teger)g(v)-5 +b(ariable.)291 3050 y Fm(On)31 b(Return)291 3208 y(a)41 +b Fl(the)28 b(lo)r(cal)g(p)r(ortion)f(of)g(global)g(sparse)f(matrix)h +Fh(A)h Fl(assem)n(bled.)498 3308 y(Scop)r(e:)37 b Fm(lo)s(cal)498 +3407 y Fl(T)n(yp)r(e:)g Fm(required)498 3507 y Fl(Sp)r(eci\014ed)28 +b(as:)37 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g Fg(x)f Fl(1.)291 3778 y Fe(Usage)46 b(Notes)392 3960 +y Fl(1.)41 b(Before)28 b(call)g(this)h(subroutine)g(y)n(ou)f(m)n(ust)h +(allo)r(cate)e Fh(A)j Fl(with)f(F90)p 2585 3960 V 29 +w(PSSP)-7 b(ALL)28 b(sub-)498 4060 y(routine)f(and)h(insert)f(all)g(ro) +n(ws)f(of)i(matrix)f Fh(A)h Fl(with)g(F90)p 2271 4060 +V 29 w(PSSPINS.)392 4218 y(2.)41 b(It)35 b(ma)n(y)f(happ)r(en)h(that)g +(calls)f(to)g Fj(F90_PSSPINS)c Fl(ha)n(v)n(e)k(inserted)g(m)n(ultiple)h +(v)-5 b(alues)498 4318 y(with)36 b(the)g(same)f(indices,)i(i.e.)60 +b(y)n(ou)35 b(ma)n(y)f(ha)n(v)n(e)h(duplicated)g(co)r(e\016cien)n(t)h +(en)n(tries.)498 4418 y(This)29 b(subroutine)f(pro)n(vides)f(three)h(w) +n(a)n(ys)f(of)h(coping)g(with)h(duplicated)f(en)n(tries)g(b)n(y)498 +4517 y(sp)r(eci\014yng)f(the)h(relev)-5 b(an)n(t)27 b(v)-5 +b(alue)28 b(in)n(to)f(the)h(DUP)g(v)-5 b(ariable:)598 +4676 y(1)41 b(Raise)27 b(an)g(error,)f(duplicated)i(co)r(e\016cien)n +(ts)f(should)g(not)h(exist;)598 4801 y(2)41 b(Ignore)26 +b(replications,)h(k)n(eep)g(one)g(of)g(the)h(co)r(e\016cien)n(ts)g +(found;)598 4926 y(3)41 b(Sum)28 b(the)g(duplicated)g(en)n(tries)f(to)g +(obtain)g(the)h(\014nal)g(co)r(e\016cien)n(t.)1681 5255 +y(43)p eop +%%Page: 44 44 +44 43 bop 739 719 a Fe(F90)p 966 719 41 4 v 48 w(PSPTINS|Insert)45 +b(P)l(attern)739 947 y Fl(This)29 b(subroutine)f(handles)h(up)r(dates)g +(to)g(a)f(comm)n(unication)g(descriptor)g(based)g(only)h(on)739 +1046 y(the)k(sparsit)n(y)f(pattern.)52 b(This)33 b(is)f(useful)i(in)f +(those)f(cases)g(where)g(w)n(e)h(are)e(only)i(actually)739 +1146 y(using)27 b(a)g(connectivit)n(y)g(graph.)p 1676 +1255 990 4 v 1726 1325 a Fh(B)t(LC)6 b(K)105 b Fm(Subroutine)p +1676 1358 V 2091 1427 a Fl(F90)p 2234 1427 25 4 v 29 +w(PSPTINS)p 1676 1461 990 4 v 1775 1693 a(T)-7 b(able)27 +b(3.4:)36 b(Data)27 b(t)n(yp)r(es)739 2052 y Fe(Syn)l(tax)1179 +2234 y Fl(CALL)h(F90)p 1576 2234 25 4 v 29 w(PSPTINS)f(\()p +Ff(ia,)32 b(ja,)f(blck,)g(ierrv,)g(de)l(c)l(omp)p 2949 +2234 26 4 v 31 w(data)p Fl(\))739 2433 y Fm(On)g(En)m(try)739 +2589 y(ia)41 b Fl(the)29 b(ro)n(w)d(index)i(of)g(global)e(sparse)h +(matrix)g(pattern)h Fh(A)p Fl(,)g(iden)n(tifying)g(the)g(\014rst)g(ro)n +(w)e(of)946 2689 y(the)i(submatrix)f(pattern)h Fh(A)p +Fl(.)946 2789 y(Scop)r(e:)37 b Fm(global)946 2888 y Fl(T)n(yp)r(e:)g +Fm(required)946 2988 y Fl(Sp)r(eci\014ed)20 b(as:)32 +b(an)19 b(in)n(teger)g(v)-5 b(ariable)18 b(1)23 b Fg(\024)f +Fh(ia)h Fg(\024)f Fh(decomp)p 2706 2988 25 4 v 30 w(data)p +Fl(\045)p Fh(matr)r(ix)p 3228 2988 V 31 w(data)p Fl([)p +Fh(M)p 3532 2988 V 38 w Fl(].)739 3144 y Fm(ja)42 b Fl(the)27 +b(column)f(index)g(of)h(the)f(global)f(sparse)g(matrix)h +Fh(A)p Fl(,)h(iden)n(tifying)g(the)f(\014rst)g(column)946 +3243 y(of)i(the)g(submatrix)f Fh(A)p Fl(.)946 3343 y(Scop)r(e:)37 +b Fm(global)946 3443 y Fl(T)n(yp)r(e:)g Fm(required)946 +3542 y Fl(Sp)r(eci\014ed)21 b(as:)33 b(an)19 b(in)n(teger)g(v)-5 +b(ariable)19 b(1)k Fg(\024)g Fh(j)5 b(a)23 b Fg(\024)f +Fh(decomp)p 2720 3542 V 30 w(data)p Fl(\045)p Fh(matr)r(ix)p +3242 3542 V 30 w(data)p Fl([)p Fh(N)p 3532 3542 V 39 +w Fl(].)739 3698 y Fm(blc)m(k)42 b Fl(the)28 b(lo)r(cal)f(submatrix)g +(pattern)h(to)f(insert.)946 3798 y(Scop)r(e:)37 b Fm(lo)s(cal)946 +3897 y Fl(T)n(yp)r(e:)g Fm(required)946 3997 y Fl(Sp)r(eci\014ed)f(as:) +50 b(a)35 b(structured)f(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in)g +Fg(x)g Fl(1;)j(supp)r(orted)c(storage)946 4097 y(formats)27 +b(are)g Fj(CSR)f Fl(and)i Fj(COO)p Fl(..)739 4253 y Fm(decomp)p +1066 4253 29 4 v 33 w(data)42 b Fl(con)n(tains)27 b(data)g(structures)g +(for)g(comm)n(unications.)946 4352 y(Scop)r(e:)37 b Fm(lo)s(cal)946 +4452 y Fl(T)n(yp)r(e:)g Fm(required)946 4551 y Fl(Sp)r(eci\014ed)29 +b(as:)36 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g Fg(x)f Fl(1.)739 4707 y Fm(ierrv)42 b Fl(error)25 b(v)n(ector.)36 +b(It)28 b(m)n(ust)g(b)r(e)g(initialized)f(b)n(y)h(F90)p +2515 4707 25 4 v 29 w(PSSP)-7 b(ALL.)946 4807 y(Scop)r(e:)37 +b Fm(lo)s(cal)946 4907 y Fl(T)n(yp)r(e:)g Fm(required)946 +5006 y Fl(Sp)r(eci\014ed)29 b(as:)36 b(a)27 b(one)g(dimensional)g(arra) +n(y)f(of)h(in)n(teger.)2130 5255 y(44)p eop +%%Page: 45 45 +45 44 bop 291 523 a Fm(On)31 b(Return)291 689 y(ierrv)41 +b Fl(error)25 b(v)n(ector)g(used)i(in)g(successiv)n(e)e(call)h(to)g +(F90)p 2044 689 25 4 v 29 w(PSSPINS)h(and)f(F90)p 2742 +689 V 29 w(PSSP)-7 b(ASB.)498 789 y(Scop)r(e:)37 b Fm(lo)s(cal)498 +888 y Fl(T)n(yp)r(e:)g Fm(required)498 988 y Fl(Sp)r(eci\014ed)28 +b(as:)37 b(a)27 b(one)g(dimensional)g(arra)n(y)e(of)j(in)n(teger.)291 +1263 y Fe(Usage)46 b(Notes)392 1444 y Fl(1.)41 b(Before)f(call)g(this)h +(subroutine)g(y)n(ou)f(m)n(ust)h(allo)r(cate)f Fh(D)r(E)5 +b(C)h(O)r(M)j(P)p 2672 1444 V 42 w(D)r(AT)j(A)40 b Fl(with)498 +1544 y(F90)p 641 1544 V 29 w(PSDSCALL)28 b(subroutine.)392 +1710 y(2.)41 b Fh(I)7 b(A;)14 b(J)8 b(A)37 b Fl(and)g(the)g(indices)g +(in)f Fh(B)t(LC)6 b(K)43 b Fl(are)35 b(written)i(in)g(terms)f(of)h(the) +g(\\global")498 1810 y(n)n(um)n(b)r(ering;)27 b(the)h(mapping)g(to)f +(lo)r(cal)g(storage)f(is)h(handled)h(in)n(ternally)-7 +b(.)392 1976 y(3.)41 b(This)26 b(routine)f(is)h(v)n(ery)f(similar)g(to) +g(F90)p 1746 1976 V 29 w(PSSPINS,)h(but)g(the)h(co)r(e\016cien)n(t)e(v) +-5 b(alues)26 b(in)498 2075 y(the)31 b Fh(B)t(LC)6 b(K)35 +b Fl(parameter)29 b(are)g(completely)h(ignored;)g(only)g(the)g(nonzero) +f(pattern,)498 2175 y(i.e.)37 b(the)28 b(ro)n(w)e(and)i(column)f +(indices,)h(are)f(used)g(to)h(up)r(date)g(DECOMP)p 2783 +2175 V 29 w(D)n(A)-7 b(T)g(A.)392 2341 y(4.)41 b(This)34 +b(routine)g(do)r(es)g(not)g(con)n(tain)g(an)n(y)f(syncronization)g(p)r +(oin)n(t;)38 b(th)n(us,)e(it)f(can)f(b)r(e)498 2441 y(called)28 +b(indep)r(enden)n(tly)i(b)n(y)e(eac)n(h)g(of)h(the)g(participating)f +(pro)r(cesses;)f(it)i(is)g(ho)n(w)n(ev)n(er)498 2540 +y(required)e(that)h(all)f(pro)r(cesses)f(execute)i(later)e(a)i(single)f +(call)g(to)g(F90)p 2666 2540 V 30 w(PSPT)-7 b(ASB.)392 +2706 y(5.)41 b(It)26 b(is)f(p)r(erfectly)g(legal)g(to)g(call)g(this)h +(routine)e(m)n(ultiple)i(times)g(with)g(the)f(same)g(v)-5 +b(alue)498 2806 y(of)21 b Fh(I)7 b(A)p Fl(;)23 b(this)f(can)e(b)r(e)h +(useful)h(e.g.)34 b(in)21 b(\014nite)g(elemen)n(t)g(applications,)h +(where)e(the)h(most)498 2906 y(natural)h(w)n(a)n(y)g(to)g(de\014ne)h +(the)h(matrix)e(is)g(to)h(lo)r(op)f(o)n(v)n(er)f(the)j(elemen)n(ts,)f +(and)g(not)g(o)n(v)n(er)498 3005 y(the)40 b(equations.)72 +b(Ho)n(w)n(ev)n(er)38 b(it)i(is)f(generally)f(con)n(v)n(enien)n(t)h +(from)g(a)g(p)r(erformance)498 3105 y(p)r(oin)n(t)d(of)g(view)g(to)f +(pac)n(k)g(data)g(spanning)h(m)n(ultiple)g(ro)n(ws)e(in)n(to)i(a)f +(single)h(call)f(to)498 3204 y(this)d(subroutine;)h(the)e(optimal)g +(size)g(of)h(eac)n(h)e Fh(B)t(LC)6 b(K)37 b Fl(of)31 +b(data)g(dep)r(ends)h(on)f(the)498 3304 y(underlying)c(pro)r(cessor)f +(con\014guration.)1681 5255 y(45)p eop +%%Page: 46 46 +46 45 bop 739 739 a Fe(F90)p 966 739 41 4 v 48 w(PSPT)-11 +b(ASB|Assem)l(bly)44 b(Sparse)739 888 y(Comm)l(unication)i(P)l(attern) +739 1137 y Fl(This)27 b(subroutine)h(assem)n(blies)e(a)h(comm)n +(unication)g(pattern)g(DECOMP)p 3066 1137 25 4 v 29 w(D)n(A)-7 +b(T)g(A.)p 1798 1256 746 4 v 1948 1325 a Fm(Subroutine)p +1798 1359 V 1948 1428 a Fl(F90)p 2091 1428 25 4 v 29 +w(PSPT)g(ASB)p 1798 1462 746 4 v 1775 1694 a(T)g(able)27 +b(3.5:)36 b(Data)27 b(t)n(yp)r(es)739 2068 y Fe(Syn)l(tax)1387 +2250 y Fl(CALL)h(F90)p 1784 2250 25 4 v 29 w(PSPT)-7 +b(ASB)27 b(\()p Ff(ierrv,)k(de)l(c)l(omp)p 2741 2250 +26 4 v 32 w(data)p Fl(\))739 2449 y Fm(On)g(En)m(try)739 +2615 y(decomp)p 1066 2615 29 4 v 33 w(data)42 b Fl(con)n(tains)27 +b(data)g(structures)g(for)g(comm)n(unications.)946 2715 +y(Scop)r(e:)37 b Fm(lo)s(cal)946 2814 y Fl(T)n(yp)r(e:)g +Fm(required)946 2914 y Fl(Sp)r(eci\014ed)29 b(as:)36 +b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in)g +Fg(x)f Fl(1.)739 3080 y Fm(ierrv)42 b Fl(error)31 b(v)n(ector.)54 +b(It)33 b(m)n(ust)h(b)r(e)g(initialized)g(b)n(y)f(F90)p +2568 3080 25 4 v 29 w(PSSP)-7 b(ALL)33 b(and)g(m)n(ust)h(b)r(e)g(the) +946 3179 y(same)27 b(used)h(in)g(F90)p 1584 3179 V 29 +w(PSSPINS.)946 3279 y(Scop)r(e:)37 b Fm(lo)s(cal)946 +3379 y Fl(T)n(yp)r(e:)g Fm(required)946 3478 y Fl(Sp)r(eci\014ed)29 +b(as:)36 b(a)27 b(one)g(dimensional)g(arra)n(y)f(of)h(in)n(teger.)739 +3644 y Fm(On)k(Return)739 3810 y(decomp)p 1066 3810 29 +4 v 33 w(data)42 b Fl(con)n(tains)27 b(data)g(structures)g(for)g(comm)n +(unications.)946 3910 y(Scop)r(e:)37 b Fm(lo)s(cal)946 +4010 y Fl(T)n(yp)r(e:)g Fm(required)946 4109 y Fl(Sp)r(eci\014ed)29 +b(as:)36 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g Fg(x)f Fl(1.)739 4384 y Fe(Usage)46 b(Notes)840 4566 +y Fl(1.)41 b(Before)18 b(call)g(this)h(subroutine)f(y)n(ou)f(m)n(ust)i +(allo)r(cate)e(DECOMP)p 2919 4566 25 4 v 29 w(D)n(A)-7 +b(T)g(A)19 b(with)g(F90)p 3516 4566 V 29 w(PSDSCALL)946 +4665 y(subroutine)28 b(and)f(insert)g(all)h(relev)-5 +b(an)n(t)27 b(pattern)g(ro)n(ws)f(with)i(F90)p 2999 4665 +V 29 w(PSPTINS.)2130 5255 y(46)p eop +%%Page: 47 47 +47 46 bop 291 726 a Fe(F90)p 518 726 41 4 v 48 w(PSSPFREE|F)-11 +b(ree)45 b(Sparse)g(Matrix)291 962 y Fl(This)27 b(subroutine)g(deallo)r +(cates)g(sparse)f(matrix's)h(structures.)p 880 1075 1687 +4 v 929 1145 a Fh(A)924 b Fm(Subroutine)p 880 1178 V +929 1248 a Fl(Long)27 b(Precision)f(Real)257 b(F90)p +2058 1248 25 4 v 29 w(PSSPFREE)929 1347 y(Long)27 b(Precision)f +(Complex)99 b(F90)p 2058 1347 V 29 w(PSSPFREE)p 880 1380 +1687 4 v 1326 1612 a(T)-7 b(able)28 b(3.6:)36 b(Data)27 +b(t)n(yp)r(es)291 1978 y Fe(Syn)l(tax)962 2160 y Fl(CALL)g(F90)p +1358 2160 25 4 v 29 w(PSSPFREE)g(\()j Ff(a,)g(de)l(c)l(omp)p +2270 2160 26 4 v 32 w(data)p Fl(\))291 2359 y Fm(On)h(En)m(try)291 +2519 y(a)41 b Fl(the)28 b(lo)r(cal)g(p)r(ortion)f(of)g(global)g(sparse) +f(matrix)h Fh(A)h Fl(to)f(deallo)r(cate.)498 2618 y(Scop)r(e:)37 +b Fm(lo)s(cal)498 2718 y Fl(T)n(yp)r(e:)g Fm(required)498 +2817 y Fl(Sp)r(eci\014ed)28 b(as:)37 b(a)27 b(structured)g(data)g(t)n +(yp)r(e)h(sp)r(eci\014ed)g(in)g Fg(x)f Fl(1.)291 2977 +y Fm(decomp)p 618 2977 29 4 v 33 w(data)42 b Fl(con)n(tains)28 +b(data)h(structures)g(for)g(comm)n(unications)f(referred)g(to)h(sparse) +498 3077 y(matrix)e(to)h(deallo)r(cate.)498 3177 y(Scop)r(e:)37 +b Fm(lo)s(cal)498 3276 y Fl(T)n(yp)r(e:)g Fm(required)498 +3376 y Fl(Sp)r(eci\014ed)28 b(as:)37 b(a)27 b(structured)g(data)g(t)n +(yp)r(e)h(sp)r(eci\014ed)g(in)g Fg(x)f Fl(1.)291 3536 +y Fm(On)k(Return)291 3695 y(a)41 b Fl(deallo)r(cated)27 +b(structure)g(for)h(storing)e(other)h(sparse)f(matrices.)498 +3795 y(Scop)r(e:)37 b Fm(lo)s(cal)498 3895 y Fl(T)n(yp)r(e:)g +Fm(required)498 3994 y Fl(Sp)r(eci\014ed)28 b(as:)37 +b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in)g +Fg(x)f Fl(1.)291 4154 y Fm(decomp)p 618 4154 V 33 w(data)42 +b Fl(con)n(tains)27 b(deallo)r(cated)g(data)g(structures)f(for)i(comm)n +(unications.)498 4254 y(Scop)r(e:)37 b Fm(lo)s(cal)498 +4353 y Fl(T)n(yp)r(e:)g Fm(required)498 4453 y Fl(Sp)r(eci\014ed)28 +b(as:)37 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g Fg(x)f Fl(1.)291 4725 y Fe(Usage)46 b(Notes)392 4907 +y Fl(1.)41 b(Before)28 b(call)g(this)h(subroutine)g(y)n(ou)f(m)n(ust)h +(allo)r(cate)e Fh(A)j Fl(with)f(F90)p 2585 4907 25 4 +v 29 w(PSSP)-7 b(ALL)28 b(sub-)498 5006 y(routine.)1681 +5255 y(47)p eop +%%Page: 48 48 +48 47 bop 840 523 a Fl(2.)41 b(After)23 b(this)g(call,)g(if)g(no)g +(error)d(encoun)n(tered,)j(data)f(structures)g Fh(a)g +Fl(and)h Fh(decomp)p 3418 523 25 4 v 29 w(data)946 623 +y Fl(can)28 b(b)r(e)g(used)f(for)g(storing)g(other)g(sparse)f +(matrices.)2130 5255 y(48)p eop +%%Page: 49 49 +49 48 bop 291 739 a Fe(F90)p 518 739 41 4 v 48 w(PSDSCFREE|F)-11 +b(ree)45 b(Comm)l(unication)291 888 y(Descriptor)291 +1137 y Fl(This)27 b(subroutine)g(deallo)r(cates)g(comm)n(unication)g +(descriptors)291 1411 y Fe(Syn)l(tax)978 1593 y Fl(CALL)h(F90)p +1375 1593 25 4 v 29 w(PSDSCFREE)54 b(\()p Ff(de)l(c)l(omp)p +2253 1593 26 4 v 32 w(data)p Fl(\))291 1792 y Fm(On)31 +b(En)m(try)291 1958 y(decomp)p 618 1958 29 4 v 33 w(data)42 +b Fl(con)n(tains)28 b(data)h(structures)g(for)g(comm)n(unications)f +(referred)g(to)h(sparse)498 2058 y(matrix)e(to)h(deallo)r(cate.)498 +2158 y(Scop)r(e:)37 b Fm(lo)s(cal)498 2257 y Fl(T)n(yp)r(e:)g +Fm(required)498 2357 y Fl(Sp)r(eci\014ed)28 b(as:)37 +b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in)g +Fg(x)f Fl(1.)291 2523 y Fm(On)k(Return)291 2689 y(decomp)p +618 2689 V 33 w(data)42 b Fl(It)28 b(is)f(deallo)r(cated.)498 +2789 y(Scop)r(e:)37 b Fm(lo)s(cal)498 2888 y Fl(T)n(yp)r(e:)g +Fm(required)498 2988 y Fl(Sp)r(eci\014ed)28 b(as:)37 +b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in)g +Fg(x)f Fl(1.)291 3262 y Fe(Usage)46 b(Notes)392 3444 +y Fl(1.)41 b(Before)36 b(a)g(call)g(this)h(subroutine)f(y)n(ou)g(m)n +(ust)g(ha)n(v)n(e)g(allo)r(cate)f Fh(decomp)p 2771 3444 +25 4 v 30 w(data)h Fl(with)498 3544 y(F90)p 641 3544 +V 29 w(PSDSCALL)28 b(subroutine.)392 3710 y(2.)41 b(After)34 +b(this)f(call,)i(if)f(no)f(error)e(encoun)n(tered,)j(data)f(structure)f +Fh(decomp)p 2811 3710 V 30 w(data)h Fl(can)498 3809 y(b)r(e)28 +b(reused.)1681 5255 y(49)p eop +%%Page: 50 50 +50 49 bop 739 732 a Fe(F90)p 966 732 41 4 v 48 w(PSDSALL|Allo)t(cate)45 +b(Global)h(Dense)f(Matrix)739 972 y Fl(This)27 b(subroutine)h(allo)r +(cates)e(global)h(dense)g(matrix)g Fh(X)7 b Fl(.)p 1356 +1088 1631 4 v 1405 1158 a Fh(X)917 b Fm(Subroutine)p +1356 1191 V 1405 1260 a Fl(In)n(teger)731 b(F90)p 2534 +1260 25 4 v 29 w(PSDSALL)1405 1360 y(Long)27 b(Precision)f(Real)257 +b(F90)p 2534 1360 V 29 w(PSDSALL)1405 1460 y(Long)27 +b(Precision)f(Complex)99 b(F90)p 2534 1460 V 29 w(PSDSALL)p +1356 1493 1631 4 v 1775 1725 a(T)-7 b(able)27 b(3.7:)36 +b(Data)27 b(t)n(yp)r(es)739 2094 y Fe(Syn)l(tax)1171 +2276 y Fl(CALL)h(F90)p 1568 2276 25 4 v 29 w(PSDSALL)g(\()p +Ff(m,)i(n,)g(x,)g(ierrv,)h(de)l(c)l(omp)p 2843 2276 26 +4 v 31 w(data,)g(js)p Fl(\))1279 2475 y(CALL)d(F90)p +1676 2475 25 4 v 29 w(PSDSALL)g(\()p Ff(m,)i(x,)g(ierrv,)h(de)l(c)l +(omp)p 2849 2475 26 4 v 32 w(data)p Fl(\))739 2674 y +Fm(On)g(En)m(try)739 2837 y(m)40 b Fl(n)n(um)n(b)r(er)27 +b(of)h(ro)n(ws)e(of)i(global)e(dense)h(submatrix)g Fh(X)34 +b Fl(to)28 b(allo)r(cate.)946 2936 y(Scop)r(e:)37 b Fm(global)946 +3036 y Fl(T)n(yp)r(e:)g Fm(required)946 3135 y Fl(Sp)r(eci\014ed)23 +b(as:)34 b(an)22 b(in)n(teger)f(v)-5 b(ariable)21 b(0)i +Fg(\024)g Fh(m)g Fg(\024)f Fh(decomp)p 2720 3135 25 4 +v 30 w(data)p Fl(\045)p Fh(matr)r(ix)p 3242 3135 V 30 +w(data)p Fl([)p Fh(N)p 3532 3135 V 39 w Fl(].)739 3298 +y Fm(n)41 b Fl(n)n(um)n(b)r(er)28 b(of)f(columns)h(of)f(global)g(dense) +g(submatrix)g Fh(X)34 b Fl(to)28 b(allo)r(cate.)946 3397 +y(Scop)r(e:)37 b Fm(global)946 3497 y Fl(T)n(yp)r(e:)g +Fm(required)p Fl(,)28 b(but)g(can)g(only)f(b)r(e)h(presen)n(t)f(if)h +Fh(x)g Fl(is)f(of)h(rank)f(2.)946 3597 y(Sp)r(eci\014ed)i(as:)36 +b(an)27 b(in)n(teger)g(v)-5 b(ariable)26 b Fh(n)d Fg(\025)g +Fl(0.)739 3759 y Fm(x)42 b Fl(a)27 b(rank)g(one)g(or)g(t)n(w)n(o)f +(arra)n(y)g(with)i(the)g(POINTER)e(attribute.)739 3921 +y Fm(ierrv)42 b Fl(see)27 b(\\On)g(Return".)739 4084 +y Fm(decomp)p 1066 4084 29 4 v 33 w(data)42 b Fl(con)n(tains)27 +b(data)g(structures)g(for)g(comm)n(unications.)946 4183 +y(Scop)r(e:)37 b Fm(lo)s(cal)946 4283 y Fl(T)n(yp)r(e:)g +Fm(required)946 4383 y Fl(Sp)r(eci\014ed)29 b(as:)36 +b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in)g +Fg(x)f Fl(1.)739 4545 y Fm(js)41 b Fl(starting)27 b(index)h(for)f(the)h +(columns)f(of)h(global)e(dense)i(submatrix)f Fh(X)34 +b Fl(to)27 b(allo)r(cate.)946 4645 y(Scop)r(e:)37 b Fm(global)946 +4744 y Fl(T)n(yp)r(e:)g Fm(optional)p Fl(;)27 b(can)g(only)g(b)r(e)h +(presen)n(t)f(if)i Fh(x)f Fl(is)f(of)h(rank)e(2.)946 +4844 y(Sp)r(eci\014ed)j(as:)36 b(an)27 b(in)n(teger)g(v)-5 +b(ariable;)27 b(default:)37 b Fh(j)5 b(s)23 b Fl(=)f(1.)739 +5006 y Fm(On)31 b(Return)2130 5255 y Fl(50)p eop +%%Page: 51 51 +51 50 bop 291 523 a Fm(x)41 b Fl(allo)r(cated)27 b(lo)r(cal)g(p)r +(ortion)g(of)h(global)e(dense)i(matrix)f Fh(X)7 b Fl(.)498 +623 y(Scop)r(e:)37 b Fm(lo)s(cal)498 722 y Fl(T)n(yp)r(e:)g +Fm(required)498 822 y Fl(Sp)r(eci\014ed)k(as:)61 b(a)40 +b(rank-one)e(or)h(t)n(w)n(o)h(arra)n(y)d(con)n(taining)j(n)n(um)n(b)r +(ers)f(of)h(the)h(t)n(yp)r(e)498 922 y(indicated)35 b(in)f(T)-7 +b(able)34 b(3.7,)i(of)e(size)g Fh(decomp)p 1912 922 25 +4 v 30 w(data)p Fl(\045)p Fh(matr)r(ix)p 2434 922 V 30 +w(data)p Fl([)p Fh(N)p 2724 922 V 39 w Fl(])h(b)n(y)f +Fh(n)p Fl(;)k(the)498 1021 y(column)28 b(indices)f(are)g(declared)g(as) +g Fh(j)5 b(s)22 b Fl(:)i Fh(j)5 b(s)18 b Fl(+)g Fh(n)g +Fg(\000)g Fl(1.)291 1187 y Fm(ierrv)41 b Fl(error)26 +b(v)n(ector)g(used)i(in)g(subroutines)f(F90)p 1858 1187 +V 29 w(PSDSINS)h(and)f(F90)p 2564 1187 V 30 w(PSDSASB.)498 +1287 y(Scop)r(e:)37 b Fm(lo)s(cal)498 1386 y Fl(T)n(yp)r(e:)g +Fm(required)498 1486 y Fl(Sp)r(eci\014ed)28 b(as:)37 +b(a)27 b(one)g(dimensional)g(arra)n(y)e(of)j(in)n(teger.)291 +1761 y Fe(Usage)46 b(Notes)392 1942 y Fl(1.)41 b(Before)36 +b(call)g(This)g(subroutine)g(y)n(ou)g(m)n(ust)g(initialize)h +Fh(decomp)p 2550 1942 V 29 w(data)g Fl(\014elds)f(with)498 +2042 y(F90)p 641 2042 V 29 w(PSSP)-7 b(ASB)27 b(subroutine.)1681 +5255 y(51)p eop +%%Page: 52 52 +52 51 bop 739 754 a Fe(F90)p 966 754 41 4 v 48 w(PSDSINS|Insert)46 +b(Dense)f(Matrix)739 1018 y Fl(This)27 b(subroutine)f(inserts)h(a)f(lo) +r(cal)g(dense)h(matrix)g Fh(B)t(LC)6 b(K)32 b Fl(in)27 +b(the)g(global)f(dense)h(matrix)739 1117 y Fh(X)7 b Fl(.)p +1369 1228 1604 4 v 1419 1298 a Fh(X)r(;)14 b(B)t(LC)6 +b(K)618 b Fm(Subroutine)p 1369 1331 V 1419 1401 a Fl(In)n(teger)731 +b(F90)p 2548 1401 25 4 v 29 w(PSDSINS)1419 1500 y(Long)27 +b(Precision)f(Real)257 b(F90)p 2548 1500 V 29 w(PSDSINS)1419 +1600 y(Long)27 b(Precision)f(Complex)99 b(F90)p 2548 +1600 V 29 w(PSDSINS)p 1369 1633 1604 4 v 1775 1865 a(T)-7 +b(able)27 b(3.8:)36 b(Data)27 b(t)n(yp)r(es)739 2280 +y Fe(Syn)l(tax)1028 2465 y Fl(CALL)g(F90)p 1424 2465 +25 4 v 29 w(PSDSINS)i(\()p Ff(m,)h(n,)g(x,)f(ix,)i(jx,)f(blck,)h +(ierrv,)g(de)l(c)l(omp)p 3101 2465 26 4 v 31 w(data)p +Fl(\))739 2664 y(CALL)d(F90)p 1136 2664 25 4 v 29 w(PSDSINS)1037 +2764 y(\()p Ff(m,)j(n,)f(x,)f(ix,)h(jx,)h(blck,)g(ierrv,)g(de)l(c)l +(omp)p 2312 2764 26 4 v 31 w(data)g(IBLCK=iblck,)h(JBLCK=jblck)p +Fl(\))1138 2963 y(CALL)c(F90)p 1535 2963 25 4 v 29 w(PSDSINS)g(\()p +Ff(m,)i(x,)g(ix,)g(blck,)h(ierrv,)g(de)l(c)l(omp)p 2990 +2963 26 4 v 32 w(data)p Fl(\))739 3166 y Fm(On)g(En)m(try)739 +3348 y(m)40 b Fl(n)n(um)n(b)r(er)27 b(of)h(ro)n(ws)e(of)i(lo)r(cal)f +(dense)g(submatrix)g Fh(B)t(LC)6 b(K)33 b Fl(to)28 b(insert.)946 +3448 y(Scop)r(e:)37 b Fm(global)946 3547 y Fl(T)n(yp)r(e:)g +Fm(required)946 3647 y Fl(Sp)r(eci\014ed)20 b(as:)32 +b(an)19 b(in)n(teger)f(v)-5 b(ariable)19 b(0)j Fg(\024)h +Fh(m)g Fg(\024)f Fh(decomp)p 2706 3647 25 4 v 30 w(data)p +Fl(\045)p Fh(matr)r(ix)p 3228 3647 V 31 w(data)p Fl([)p +Fh(M)p 3532 3647 V 38 w Fl(].)739 3828 y Fm(n)41 b Fl(n)n(um)n(b)r(er) +28 b(of)f(columns)h(of)f(global)g(dense)g(submatrix)g +Fh(B)t(LC)6 b(K)33 b Fl(to)28 b(insert.)946 3928 y(Scop)r(e:)37 +b Fm(global)946 4028 y Fl(T)n(yp)r(e:)g Fm(required)28 +b Fl(but)g(only)f(if)i Fh(x)f Fl(is)f(of)h(rank)e(2.)946 +4127 y(Sp)r(eci\014ed)j(as:)36 b(an)27 b(in)n(teger)g(v)-5 +b(ariable)26 b(.)739 4309 y Fm(x)42 b Fl(the)34 b(lo)r(cal)g(p)r +(ortion)f(of)h(global)f(dense)h(matrix)g Fh(X)7 b Fl(.)55 +b(This)34 b(subroutine)g(computes)g(the)946 4408 y(lo)r(cation)23 +b(of)h(the)g(\014rst)f(elemen)n(t)h(of)f(the)h(lo)r(cal)f(subarra)n(y)e +(to)i(insert,)i(based)e(on)g Fh(ix;)14 b(j)5 b(x)946 +4508 y Fl(and)28 b Fh(matr)r(ix)p 1375 4508 V 30 w(data)g +Fl(\014eld)g(of)f Fh(decomp)p 2141 4508 V 30 w(data)p +Fl(.)946 4608 y(Scop)r(e:)37 b Fm(lo)s(cal)946 4707 y +Fl(T)n(yp)r(e:)g Fm(required)946 4807 y Fl(Sp)r(eci\014ed)31 +b(as:)41 b(a)30 b(rank)f(one)h(or)f(t)n(w)n(o)g(arra)n(y)f(with)i(the)h +(POINTER)e(attribute)h(con-)946 4907 y(taining)24 b(n)n(um)n(b)r(ers)g +(of)g(the)h(t)n(yp)r(e)f(indicated)h(in)f(T)-7 b(able)24 +b(3.8.)35 b(The)25 b(rank)e(of)h Fh(bl)r(ck)i Fl(m)n(ust)946 +5006 y(b)r(e)i(the)g(same)f(of)h Fh(x)p Fl(.)2130 5255 +y(52)p eop +%%Page: 53 53 +53 52 bop 291 523 a Fm(ix)41 b Fl(the)f(ro)n(w)f(index)h(of)f(global)g +(dense)h(matrix)f Fh(X)7 b Fl(,)42 b(iden)n(tifying)e(the)h(\014rst)e +(ro)n(w)g(of)h(the)498 623 y(submatrix)27 b Fh(X)7 b +Fl(.)498 722 y(Scop)r(e:)37 b Fm(global)498 822 y Fl(T)n(yp)r(e:)g +Fm(required)498 922 y Fl(Sp)r(eci\014ed)22 b(as:)34 b(an)21 +b(in)n(teger)f(v)-5 b(ariable)21 b(1)h Fg(\024)h Fh(ix)g +Fg(\024)g Fh(decomp)p 2272 922 25 4 v 29 w(data)p Fl(\045)p +Fh(matr)r(ix)p 2793 922 V 31 w(data)p Fl([)p Fh(N)p 3084 +922 V 39 w Fl(].)291 1083 y Fm(jx)42 b Fl(the)27 b(column)g(index)g(of) +g(the)g(global)e(dense)i(matrix)g Fh(X)7 b Fl(,)26 b(iden)n(tifying)h +(the)g(\014rst)g(column)498 1183 y(of)h(the)g(submatrix)f +Fh(X)7 b Fl(.)498 1283 y(Scop)r(e:)37 b Fm(global)498 +1382 y Fl(T)n(yp)r(e:)g Fm(required)498 1482 y Fl(Sp)r(eci\014ed)28 +b(as:)37 b(an)27 b(in)n(teger)g(v)-5 b(ariable)26 b Fh(j)5 +b(x)24 b Fg(\025)e Fl(1.)291 1644 y Fm(blc)m(k)42 b Fl(the)28 +b(lo)r(cal)f(submatrix)g(to)g(insert.)498 1743 y(Scop)r(e:)37 +b Fm(lo)s(cal)498 1843 y Fl(T)n(yp)r(e:)g Fm(required)498 +1943 y Fl(Sp)r(eci\014ed)42 b(as:)63 b(a)41 b(rank)f(one)h(or)f(rank)g +(t)n(w)n(o)g(con)n(taining)h(n)n(um)n(b)r(ers)f(of)h(the)h(t)n(yp)r(e) +498 2042 y(indicated)28 b(in)g(T)-7 b(able)27 b(3.8.)36 +b(The)28 b(rank)e(of)i Fh(bl)r(ck)h Fl(m)n(ust)f(b)r(e)g(the)g(same)f +(of)h Fh(x)p Fl(.)291 2204 y Fm(decomp)p 618 2204 29 +4 v 33 w(data)42 b Fl(con)n(tains)27 b(data)g(structures)g(for)g(comm)n +(unications.)498 2304 y(Scop)r(e:)37 b Fm(lo)s(cal)498 +2403 y Fl(T)n(yp)r(e:)g Fm(required)498 2503 y Fl(Sp)r(eci\014ed)28 +b(as:)37 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g Fg(x)f Fl(1.)291 2665 y Fm(ierrv)41 b Fl(error)26 b(v)n(ector.)36 +b(It)28 b(m)n(ust)f(b)r(e)h(initialized)g(b)n(y)f(F90)p +2066 2665 25 4 v 30 w(PSDSALL.)498 2765 y(Scop)r(e:)37 +b Fm(lo)s(cal)498 2864 y Fl(T)n(yp)r(e:)g Fm(required)498 +2964 y Fl(Sp)r(eci\014ed)28 b(as:)37 b(a)27 b(one)g(dimensional)g(arra) +n(y)e(of)j(in)n(teger.)291 3126 y Fm(iblc)m(k)41 b Fl(the)32 +b(ro)n(w)e(index)h(of)g(lo)r(cal)g(dense)g(matrix)g Fh(B)t(LC)6 +b(K)g Fl(,)31 b(iden)n(tifying)h(the)f(\014rst)g(ro)n(w)f(of)498 +3225 y(the)e(submatrix)f Fh(B)t(LC)6 b(K)g Fl(.)498 3325 +y(Scop)r(e:)37 b Fm(lo)s(cal)498 3425 y Fl(T)n(yp)r(e:)g +Fm(optional)498 3524 y Fl(Default:)h Fh(ibl)r(ck)24 b +Fl(=)f(1.)498 3624 y(Sp)r(eci\014ed)28 b(as:)37 b(an)27 +b(in)n(teger)g(v)-5 b(ariable)26 b Fh(j)5 b(bl)r(ck)25 +b Fg(\025)e Fl(1.)291 3786 y Fm(jblc)m(k)42 b Fl(the)30 +b(column)f(index)h(of)f(the)h(lo)r(cal)f(dense)g(matrix)g +Fh(B)t(LC)6 b(K)g Fl(,)30 b(iden)n(tifying)f(the)h(\014rst)498 +3885 y(column)e(of)f(the)h(submatrix)f Fh(B)t(LC)6 b(K)g +Fl(.)498 3985 y(Scop)r(e:)37 b Fm(lo)s(cal)498 4085 y +Fl(T)n(yp)r(e:)g Fm(optional)498 4184 y Fl(Default:)h +Fh(j)5 b(bl)r(ck)25 b Fl(=)d(1.)498 4284 y(Sp)r(eci\014ed)28 +b(as:)37 b(an)27 b(in)n(teger)g(v)-5 b(ariable)26 b Fh(j)5 +b(bl)r(ck)25 b Fg(\025)e Fl(1.)291 4446 y Fm(On)31 b(Return)291 +4608 y(x)41 b Fl(the)35 b(lo)r(cal)e(p)r(ortion)h(of)g(global)f(dense)h +(matrix)f Fh(X)7 b Fl(.)56 b(This)34 b(subroutine)g(computes)g(the)498 +4707 y(lo)r(cation)23 b(of)g(the)h(\014rst)g(elemen)n(t)f(of)h(the)g +(lo)r(cal)f(subarra)n(y)e(to)i(insert,)h(based)f(on)g +Fh(ix;)14 b(j)5 b(x)498 4807 y Fl(and)28 b Fh(matr)r(ix)p +927 4807 V 30 w(data)g Fl(\014eld)g(of)f Fh(decomp)p +1693 4807 V 30 w(data)p Fl(.)498 4907 y(Scop)r(e:)37 +b Fm(lo)s(cal)498 5006 y Fl(T)n(yp)r(e:)g Fm(required)1681 +5255 y Fl(53)p eop +%%Page: 54 54 +54 53 bop 946 523 a Fl(Sp)r(eci\014ed)41 b(as:)62 b(a)39 +b(t)n(w)n(o)h(dimensional)g(arra)n(y)d(con)n(taining)j(n)n(um)n(b)r +(ers)f(of)h(the)h(t)n(yp)r(e)946 623 y(indicated)28 b(in)g(T)-7 +b(able)27 b(3.8.)739 789 y Fm(ierrv)42 b Fl(error)25 +b(v)n(ector)i(used)g(in)h(successiv)n(e)e(call)h(to)h(F90)p +2500 789 25 4 v 29 w(PSDSINS)g(and)g(F90)p 3207 789 V +29 w(D)n(ASB.)946 888 y(Scop)r(e:)37 b Fm(lo)s(cal)946 +988 y Fl(T)n(yp)r(e:)g Fm(required)946 1088 y Fl(Sp)r(eci\014ed)29 +b(as:)36 b(a)27 b(one)g(dimensional)g(arra)n(y)f(of)h(in)n(teger.)739 +1362 y Fe(Usage)46 b(Notes)840 1544 y Fl(1.)41 b(Before)26 +b(call)f(this)i(subroutine)e(y)n(ou)h(m)n(ust)g(allo)r(cate)f +Fh(X)32 b Fl(with)27 b(F90)p 3022 1544 V 29 w(PSDSALL)f(sub-)946 +1644 y(routine.)2130 5255 y(54)p eop +%%Page: 55 55 +55 54 bop 291 736 a Fe(F90)p 518 736 41 4 v 48 w(PSDSASB|Assem)l(bly)44 +b(Dense)i(Matrix)291 981 y Fl(This)27 b(subroutine)g(assem)n(blies)g +(global)f(dense)i(matrix)f Fh(X)7 b Fl(.)p 907 1099 1632 +4 v 957 1169 a Fh(X)916 b Fm(Subroutine)p 907 1202 V +957 1272 a Fl(In)n(teger)730 b(F90)p 2085 1272 25 4 v +30 w(PSDSASB)957 1371 y(Long)26 b(Precision)g(Real)257 +b(F90)p 2085 1371 V 30 w(PSDSASB)957 1471 y(Long)26 b(Precision)g +(Complex)99 b(F90)p 2085 1471 V 30 w(PSDSASB)p 907 1504 +1632 4 v 1326 1736 a(T)-7 b(able)28 b(3.9:)36 b(Data)27 +b(t)n(yp)r(es)291 2108 y Fe(Syn)l(tax)892 2290 y Fl(CALL)h(F90)p +1289 2290 25 4 v 29 w(PSDSASB)g(\()p Ff(x,)i(ierrv,)h(de)l(c)l(omp)p +2340 2290 26 4 v 31 w(data)p Fl(\))291 2489 y Fm(On)g(En)m(try)291 +2654 y(x)41 b Fl(the)28 b(lo)r(cal)f(p)r(ortion)h(of)f(global)g(dense)g +(matrix)g Fh(X)7 b Fl(.)498 2754 y(Scop)r(e:)37 b Fm(lo)s(cal)498 +2853 y Fl(T)n(yp)r(e:)g Fm(required)498 2953 y Fl(Sp)r(eci\014ed)31 +b(as:)41 b(a)30 b(rank)f(one)g(or)g(t)n(w)n(o)h(arra)n(y)d(with)k(the)f +(POINTER)f(attribute)i(con-)498 3053 y(taining)c(n)n(um)n(b)r(ers)h(of) +f(the)h(t)n(yp)r(e)g(indicated)g(in)g(T)-7 b(able)27 +b(3.9.)291 3217 y Fm(decomp)p 618 3217 29 4 v 33 w(data)42 +b Fl(con)n(tains)27 b(data)g(structures)g(for)g(comm)n(unications.)498 +3317 y(Scop)r(e:)37 b Fm(lo)s(cal)498 3416 y Fl(T)n(yp)r(e:)g +Fm(required)498 3516 y Fl(Sp)r(eci\014ed)28 b(as:)37 +b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in)g +Fg(x)f Fl(1.)291 3681 y Fm(ierrv)41 b Fl(error)31 b(v)n(ector.)50 +b(It)33 b(m)n(ust)f(b)r(e)h(initialized)g(b)n(y)f(F90)p +2110 3681 25 4 v 29 w(PSDSALL)h(and)f(m)n(ust)h(b)r(e)g(the)498 +3780 y(same)27 b(used)h(in)g(F90)p 1136 3780 V 29 w(PSDSINS.)498 +3880 y(Scop)r(e:)37 b Fm(lo)s(cal)498 3980 y Fl(T)n(yp)r(e:)g +Fm(required)498 4079 y Fl(Sp)r(eci\014ed)31 b(as:)43 +b(a)30 b(one)g(dimensional)g(arra)n(y)e(of)j(in)n(teger.)45 +b(m)n(ust)31 b(b)r(e)g(the)g(same)f(used)498 4179 y(in)e(F90)p +738 4179 V 29 w(PSDSINS.)498 4278 y(Scop)r(e:)37 b Fm(lo)s(cal)498 +4378 y Fl(T)n(yp)r(e:)g Fm(optional)498 4478 y Fl(Sp)r(eci\014ed)28 +b(as:)37 b(a)27 b(one)g(dimensional)g(arra)n(y)e(of)j(in)n(teger.)291 +4642 y Fm(On)j(Return)291 4807 y(x)41 b Fl(the)28 b(lo)r(cal)f(p)r +(ortion)h(of)f(global)g(dense)g(matrix)g Fh(X)7 b Fl(.)498 +4907 y(Scop)r(e:)37 b Fm(lo)s(cal)498 5006 y Fl(T)n(yp)r(e:)g +Fm(required)1681 5255 y Fl(55)p eop +%%Page: 56 56 +56 55 bop 946 523 a Fl(Sp)r(eci\014ed)31 b(as:)41 b(a)30 +b(rank)f(one)h(or)f(t)n(w)n(o)g(arra)n(y)f(with)i(the)h(POINTER)e +(attribute)h(con-)946 623 y(taining)e(n)n(um)n(b)r(ers)f(of)g(the)h(t)n +(yp)r(e)g(indicated)g(in)g(T)-7 b(able)27 b(3.8.)739 +789 y Fm(ierrv)42 b Fl(error)22 b(v)n(ector)h(used)h(in)h(successiv)n +(e)e(call)g(to)i(F90)p 2477 789 25 4 v 29 w(PSDSINS)g(and)f(F90)p +3177 789 V 29 w(PSDSASB.)946 888 y(Scop)r(e:)37 b Fm(lo)s(cal)946 +988 y Fl(T)n(yp)r(e:)g Fm(required)946 1088 y Fl(Sp)r(eci\014ed)29 +b(as:)36 b(a)27 b(one)g(dimensional)g(arra)n(y)f(of)h(in)n(teger.)739 +1362 y Fe(Usage)46 b(Notes)840 1544 y Fl(1.)41 b(Before)26 +b(call)f(this)i(subroutine)e(y)n(ou)h(m)n(ust)g(allo)r(cate)f +Fh(X)32 b Fl(with)27 b(F90)p 3022 1544 V 29 w(PSDSALL)f(sub-)946 +1644 y(routine.)2130 5255 y(56)p eop +%%Page: 57 57 +57 56 bop 291 715 a Fe(F90)p 518 715 41 4 v 48 w(PSDSFREE|F)-11 +b(ree)45 b(Dense)h(Matrix)291 939 y Fl(These)27 b(subroutine)g(deallo)r +(cates)g(dense)g(matrix's)g(structures.)p 876 1030 1694 +4 v 926 1099 a Fh(A)924 b Fm(Subroutine)p 876 1133 V +926 1202 a Fl(In)n(teger)731 b(F90)p 2055 1202 25 4 v +29 w(PSDSFREE)926 1302 y(Long)27 b(Precision)f(Real)257 +b(F90)p 2055 1302 V 29 w(PSDSFREE)926 1402 y(Long)27 +b(Precision)f(Complex)99 b(F90)p 2055 1402 V 29 w(PSDSFREE)p +876 1435 1694 4 v 1306 1667 a(T)-7 b(able)27 b(3.10:)36 +b(Data)27 b(t)n(yp)r(es)291 2040 y Fe(Syn)l(tax)963 2222 +y Fl(CALL)h(F90)p 1360 2222 25 4 v 29 w(PSDSFREE)f(\()p +Ff(X,)j(de)l(c)l(omp)p 2268 2222 26 4 v 32 w(data)p Fl(\))291 +2421 y Fm(On)h(En)m(try)291 2575 y(x)41 b Fl(the)28 b(lo)r(cal)f(p)r +(ortion)h(of)f(global)g(dense)g(matrix)g Fh(X)7 b Fl(.)498 +2675 y(Scop)r(e:)37 b Fm(lo)s(cal)498 2774 y Fl(T)n(yp)r(e:)g +Fm(required)498 2874 y Fl(Sp)r(eci\014ed)31 b(as:)41 +b(a)30 b(rank)f(one)g(or)g(t)n(w)n(o)h(arra)n(y)d(with)k(the)f(POINTER) +f(attribute)i(con-)498 2974 y(taining)c(n)n(um)n(b)r(ers)h(of)f(the)h +(t)n(yp)r(e)g(indicated)g(in)g(T)-7 b(able)27 b(3.9.)291 +3127 y Fm(decomp)p 618 3127 29 4 v 33 w(data)42 b Fl(con)n(tains)27 +b(data)g(structures)g(for)g(comm)n(unications.)498 3227 +y(Scop)r(e:)37 b Fm(lo)s(cal)498 3327 y Fl(T)n(yp)r(e:)g +Fm(required)498 3426 y Fl(Sp)r(eci\014ed)28 b(as:)37 +b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in)g +Fg(x)f Fl(1.)291 3580 y Fm(On)k(Return)291 3734 y(x)41 +b Fl(deallo)r(cated)27 b(lo)r(cal)g(p)r(ortion)g(of)h(global)e(dense)i +(matrix)f Fh(X)7 b Fl(.)498 3834 y(Scop)r(e:)37 b Fm(lo)s(cal)498 +3934 y Fl(T)n(yp)r(e:)g Fm(required)498 4033 y Fl(Sp)r(eci\014ed)k(as:) +61 b(a)40 b(t)n(w)n(o)g(dimensional)f(arra)n(y)f(con)n(taining)h(n)n +(um)n(b)r(ers)h(of)g(the)h(t)n(yp)r(e)498 4133 y(indicated)28 +b(in)g(T)-7 b(able)27 b(3.10)291 4402 y Fe(Usage)46 b(Notes)392 +4584 y Fl(1.)41 b(Before)28 b(call)h(this)g(subroutine)f(y)n(ou)g(m)n +(ust)h(allo)r(cate)f Fh(x)i Fl(with)f(F90)p 2571 4584 +25 4 v 29 w(PSDSALL)g(sub-)498 4684 y(routine.)392 4838 +y(2.)41 b(These)31 b(pro)r(cedures)g(cannot)g(b)r(e)h(executed)f(if)h +(related)f(sparse)f(matrix)h(descriptor)498 4937 y(has)c(b)r(een)h +(already)e(deallo)r(cated)h(with)h(F90)p 1903 4937 V +29 w(PSDSCFREE)g(call.)1681 5255 y(57)p eop +%%Page: 58 58 +58 57 bop 739 739 a Fe(F90)p 966 739 41 4 v 48 w(PSSPREINIT)45 +b(|Reinitialize)j(Global)e(Sparse)739 888 y(Matrix)739 +1137 y Fl(This)24 b(subroutine)g(reinitializes)f(global)g(sparse)g +(matrix)h Fh(A)p Fl(;)h(the)g(user)f(planning)f(to)i(regen-)739 +1236 y(erate)k(a)g(matrix)h(with)g(exactly)f(the)i(same)e(nonzero)g +(pattern)g(should)h(call)f(this)i(routine,)739 1336 y(then)d +Fj(F90_PSSPUPD)23 b Fl(instead)28 b(of)f Fj(F90_PSSPINS)p +Fl(.)p 1292 1447 1758 4 v 1342 1516 a Fh(A)924 b Fm(Subroutine)p +1292 1550 V 1342 1619 a Fl(Long)27 b(Precision)f(Real)257 +b(F90)p 2471 1619 25 4 v 29 w(PSSPREINIT)1342 1719 y(Long)27 +b(Precision)f(Complex)99 b(F90)p 2471 1719 V 29 w(PSSPREINIT)p +1292 1752 1758 4 v 1754 1984 a(T)-7 b(able)27 b(3.11:)36 +b(Data)27 b(t)n(yp)r(es)739 2367 y Fe(Syn)l(tax)1275 +2548 y Fl(CALL)h(F90)p 1672 2548 25 4 v 29 w(PSSPREINIT)f(\()p +Ff(a,)k(ierrv,)g(de)l(c)l(omp)p 2853 2548 26 4 v 31 w(data)p +Fl(\))739 2748 y Fm(On)g(En)m(try)739 2914 y(a)42 b Fl(see)27 +b(\\On)g(Return".)739 3080 y Fm(decomp)p 1066 3080 29 +4 v 33 w(data)42 b Fl(con)n(tains)27 b(data)g(structures)g(for)g(comm)n +(unications.)946 3179 y(Scop)r(e:)37 b Fm(lo)s(cal)946 +3279 y Fl(T)n(yp)r(e:)g Fm(required)946 3379 y Fl(Sp)r(eci\014ed)29 +b(as:)36 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g Fg(x)f Fl(1.)739 3545 y Fm(ierrv)42 b Fl(see)27 b(\\On)g(Return".)739 +3711 y Fm(On)k(Return)739 3877 y(a)42 b Fl(allo)r(cated)27 +b(lo)r(cal)g(p)r(ortion)g(of)g(global)g(sparse)f(matrix)h +Fh(A)p Fl(.)946 3976 y(Scop)r(e:)37 b Fm(lo)s(cal)946 +4076 y Fl(T)n(yp)r(e:)g Fm(required)946 4176 y Fl(Sp)r(eci\014ed)29 +b(as:)36 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g Fg(x)f Fl(1.)739 4342 y Fm(ierrv)42 b Fl(error)25 b(v)n(ector)i(used) +g(in)h(subroutines)f(F90)p 2306 4342 25 4 v 29 w(PSSPUPD)g(and)h(F90)p +3050 4342 V 29 w(PSSP)-7 b(ASB.)946 4441 y(Scop)r(e:)37 +b Fm(lo)s(cal)946 4541 y Fl(T)n(yp)r(e:)g Fm(required)946 +4641 y Fl(Sp)r(eci\014ed)29 b(as:)36 b(a)27 b(one)g(dimensional)g(arra) +n(y)f(of)h(in)n(teger.)2130 5255 y(58)p eop +%%Page: 59 59 +59 58 bop 291 754 a Fe(F90)p 518 754 41 4 v 48 w(PSSPUPD|Up)t(date)45 +b(Sparse)g(Matrix)291 1018 y Fl(This)e(subroutine)h(up)r(dates)g(the)g +(sparse)e(matrix)h Fh(A)h Fl(with)h(the)f(lo)r(cal)f(sparse)f(matrix) +291 1117 y Fh(B)t(LC)6 b(K)g Fl(.)61 b(This)36 b(assumes)e(that)j +(there)e(has)h(b)r(een)g(a)f(previous)g(call)g(to)h(F90)p +2765 1117 25 4 v 29 w(PSSP)-7 b(ASB)291 1217 y(with)34 +b(argumen)n(t)e Fh(U)9 b(P)45 b Fl(=)33 b Fh(Y)19 b Fl(.)55 +b(Moreo)n(v)n(er,)33 b(for)g(e\016ciency)g(reasons,)h(it)g(is)g +(assumed)f(that)291 1317 y(the)h(blo)r(c)n(k)g(en)n(tries)g(in)g(BLCK)g +(are)f(generated)h(in)g(exactly)g(the)h(same)e(order)g(as)h(in)h(the) +291 1416 y(previous)26 b(run)h(using)h(F90)p 1134 1416 +V 29 w(PSSPINS.)p 903 1543 1641 4 v 952 1613 a Fh(A;)14 +b(B)t(LC)6 b(K)627 b Fm(Subroutine)p 903 1646 V 952 1716 +a Fl(Long)27 b(Precision)f(Real)257 b(F90)p 2081 1716 +25 4 v 29 w(PSSPUPD)952 1815 y(Long)27 b(Precision)f(Complex)99 +b(F90)p 2081 1815 V 29 w(PSSPUPD)p 903 1849 1641 4 v +1306 2081 a(T)-7 b(able)27 b(3.12:)36 b(Data)27 b(t)n(yp)r(es)291 +2479 y Fe(Syn)l(tax)667 2664 y Fl(CALL)h(F90)p 1064 2664 +25 4 v 29 w(PSSPUPD)f(\()p Ff(a,)k(ia,)g(ja,)g(blck,)g(ierrv,)g(de)l(c) +l(omp)p 2564 2664 26 4 v 32 w(data)p Fl(\))291 2864 y(CALL)c(F90)p +687 2864 25 4 v 29 w(PSSPUPD)777 2963 y(\()p Ff(a,)k(ia,)g(ja,)g(blck,) +g(ierrv,)g(de)l(c)l(omp)p 1838 2963 26 4 v 31 w(data,)g(IBLCK=iblck,)h +(JBLCK=jblck)p Fl(\))291 3166 y Fm(On)f(En)m(try)291 +3348 y(a)41 b Fl(the)34 b(lo)r(cal)f(p)r(ortion)g(of)h(global)e(sparse) +g(matrix)h Fh(A)p Fl(.)56 b(This)33 b(subroutine)g(computes)h(the)498 +3448 y(lo)r(cation)j(of)h(the)f(\014rst)h(elemen)n(t)f(of)h(the)g(lo)r +(cal)f(submatrix)g(to)g(insert,)j(based)d(on)498 3547 +y Fh(ia;)14 b(j)5 b(a)27 b Fl(and)g Fh(matr)r(ix)p 1146 +3547 25 4 v 31 w(data)h Fl(\014eld)f(of)h Fh(decomp)p +1913 3547 V 30 w(data)p Fl(.)498 3647 y(Scop)r(e:)37 +b Fm(lo)s(cal)498 3746 y Fl(T)n(yp)r(e:)g Fm(required)498 +3846 y Fl(Sp)r(eci\014ed)28 b(as:)37 b(a)27 b(structured)g(data)g(t)n +(yp)r(e)h(sp)r(eci\014ed)g(in)g Fg(x)f Fl(1.)291 4028 +y Fm(ia)41 b Fl(the)f(ro)n(w)e(index)h(of)g(global)g(sparse)f(matrix)g +Fh(A)p Fl(,)43 b(iden)n(tifying)c(the)h(\014rst)f(ro)n(w)f(of)i(the)498 +4127 y(submatrix)27 b Fh(A)p Fl(.)498 4227 y(Scop)r(e:)37 +b Fm(global)498 4327 y Fl(T)n(yp)r(e:)g Fm(required)498 +4426 y Fl(Sp)r(eci\014ed)20 b(as:)32 b(an)19 b(in)n(teger)f(v)-5 +b(ariable)19 b(1)j Fg(\024)h Fh(ia)f Fg(\024)h Fh(decomp)p +2258 4426 V 30 w(data)p Fl(\045)p Fh(matr)r(ix)p 2780 +4426 V 30 w(data)p Fl([)p Fh(M)p 3084 4426 V 39 w Fl(].)291 +4608 y Fm(ja)42 b Fl(the)26 b(column)h(index)f(of)g(the)h(global)e +(sparse)g(matrix)h Fh(A)p Fl(,)h(iden)n(tifying)f(the)h(\014rst)f +(column)498 4707 y(of)i(the)g(submatrix)f Fh(A)p Fl(.)498 +4807 y(Scop)r(e:)37 b Fm(global)498 4907 y Fl(T)n(yp)r(e:)g +Fm(required)498 5006 y Fl(Sp)r(eci\014ed)21 b(as:)32 +b(an)20 b(in)n(teger)f(v)-5 b(ariable)19 b(1)k Fg(\024)f +Fh(j)5 b(a)23 b Fg(\024)g Fh(decomp)p 2272 5006 V 29 +w(data)p Fl(\045)p Fh(matr)r(ix)p 2793 5006 V 31 w(data)p +Fl([)p Fh(N)p 3084 5006 V 39 w Fl(].)1681 5255 y(59)p +eop +%%Page: 60 60 +60 59 bop 739 523 a Fm(blc)m(k)42 b Fl(the)28 b(lo)r(cal)f(submatrix)g +(to)h(insert.)946 623 y(Scop)r(e:)37 b Fm(lo)s(cal)946 +722 y Fl(T)n(yp)r(e:)g Fm(required)946 822 y Fl(Sp)r(eci\014ed)f(as:)50 +b(a)35 b(structured)f(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in)g +Fg(x)g Fl(1;)j(supp)r(orted)c(storage)946 922 y(formats)27 +b(are:)36 b Fj(CSR)p Fl(.)739 1107 y Fm(decomp)p 1066 +1107 29 4 v 33 w(data)42 b Fl(con)n(tains)27 b(data)g(structures)g(for) +g(comm)n(unications.)946 1207 y(Scop)r(e:)37 b Fm(lo)s(cal)946 +1307 y Fl(T)n(yp)r(e:)g Fm(required)946 1406 y Fl(Sp)r(eci\014ed)29 +b(as:)36 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g Fg(x)f Fl(1.)739 1592 y Fm(ierrv)42 b Fl(error)25 b(v)n(ector.)36 +b(It)28 b(m)n(ust)g(b)r(e)g(initialized)f(b)n(y)h(F90)p +2515 1592 25 4 v 29 w(PSSP)-7 b(ALL.)946 1692 y(Scop)r(e:)37 +b Fm(lo)s(cal)946 1791 y Fl(T)n(yp)r(e:)g Fm(required)946 +1891 y Fl(Sp)r(eci\014ed)29 b(as:)36 b(a)27 b(one)g(dimensional)g(arra) +n(y)f(of)h(in)n(teger.)739 2077 y Fm(iblc)m(k)42 b Fl(the)29 +b(ro)n(w)f(index)i(of)f(lo)r(cal)f(sparse)g(matrix)h +Fh(B)t(LC)6 b(K)g Fl(,)29 b(iden)n(tifying)h(the)f(\014rst)g(ro)n(w)f +(of)946 2177 y(the)g(submatrix)f Fh(B)t(LC)6 b(K)g Fl(.)946 +2276 y(Scop)r(e:)37 b Fm(lo)s(cal)946 2376 y Fl(T)n(yp)r(e:)g +Fm(optional)946 2476 y Fl(Default:)h Fh(ibl)r(ck)25 b +Fl(=)d(1.)946 2575 y(Sp)r(eci\014ed)29 b(as:)36 b(an)27 +b(in)n(teger)g(v)-5 b(ariable)26 b Fh(j)5 b(bl)r(ck)25 +b Fg(\025)e Fl(1.)739 2761 y Fm(jblc)m(k)42 b Fl(the)28 +b(column)f(index)h(of)f(the)h(lo)r(cal)f(sparse)f(matrix)g +Fh(B)t(LC)6 b(K)g Fl(,)27 b(iden)n(tifying)h(the)f(\014rst)946 +2861 y(column)h(of)g(the)g(submatrix)f Fh(B)t(LC)6 b(K)g +Fl(.)946 2960 y(Scop)r(e:)37 b Fm(lo)s(cal)946 3060 y +Fl(T)n(yp)r(e:)g Fm(optional)946 3160 y Fl(Default:)h +Fh(j)5 b(bl)r(ck)25 b Fl(=)e(1.)946 3259 y(Sp)r(eci\014ed)29 +b(as:)36 b(an)27 b(in)n(teger)g(v)-5 b(ariable)26 b Fh(j)5 +b(bl)r(ck)25 b Fg(\025)e Fl(1.)739 3445 y Fm(On)31 b(Return)739 +3631 y(a)42 b Fl(the)28 b(lo)r(cal)f(p)r(ortion)g(of)g(global)g(sparse) +f(matrix)h Fh(A)h Fl(up)r(dated)g(with)g(matrix)f Fh(B)t(LC)6 +b(K)g Fl(.)946 3731 y(Scop)r(e:)37 b Fm(lo)s(cal)946 +3830 y Fl(T)n(yp)r(e:)g Fm(required)946 3930 y Fl(Sp)r(eci\014ed)29 +b(as:)36 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g Fg(x)f Fl(1.)739 4116 y Fm(ierrv)42 b Fl(error)24 b(v)n(ector)i(used) +g(in)h(successiv)n(e)e(call)h(to)h(F90)p 2493 4116 V +29 w(PSSPINS)f(and)g(F90)p 3190 4116 V 30 w(PSSP)-7 b(ASB.)946 +4215 y(Scop)r(e:)37 b Fm(lo)s(cal)946 4315 y Fl(T)n(yp)r(e:)g +Fm(required)946 4415 y Fl(Sp)r(eci\014ed)29 b(as:)36 +b(a)27 b(one)g(dimensional)g(arra)n(y)f(of)h(in)n(teger.)739 +4716 y Fe(Usage)46 b(Notes)840 4907 y Fl(1.)41 b(Before)22 +b(call)g(this)g(subroutine)g(y)n(ou)g(m)n(ust)g(reinitialize)g +Fh(A)g Fl(with)h(F90)p 3074 4907 V 29 w(PSSPREINIT)946 +5006 y(subroutine.)2130 5255 y(60)p eop +%%Page: 61 61 +61 60 bop 392 523 a Fl(2.)41 b(This)29 b(Routine)f(can)h(b)r(e)g +(called)f(a)g(n)n(um)n(b)r(er)g(of)h(di\013eren)n(t)g(times)f(o)n(v)n +(er)f(all)h(pro)r(cesses)498 623 y(b)r(ecause)g(do)r(esn't)h(con)n +(tain)f(an)n(y)f(syncronization)g(p)r(oin)n(ts.)40 b(If)28 +b(an)h(error)d(is)i(encoun-)498 722 y(tered,)g(it)g(will)f(b)r(e)h +(displa)n(y)n(ed)f(in)h(psdspasb)f(routine.)1681 5255 +y(61)p eop +%%Page: 62 62 +62 61 bop 739 756 a Fe(F90)p 966 756 41 4 v 48 w(PSCSRP|Righ)l(t)47 +b(P)l(erum)l(tation)f(of)f(Sparse)739 905 y(Matrix)739 +1169 y Fl(This)33 b(subroutine)f(up)r(dates)h(the)g(sparse)e(matrix)h +Fh(A)h Fl(with)h(the)f(p)r(erm)n(utation)f(matrix)g Fh(P)739 +1269 y Fl(or)27 b(its)g(transp)r(ose)g Fh(P)1394 1239 +y Fc(T)1446 1269 y Fl(,)h(as:)1982 1381 y Fh(A)23 b Fg( )g +Fh(A)c Fg(\001)f Fh(P)p 1382 1555 1579 4 v 1431 1624 +a(A)924 b Fm(Subroutine)p 1382 1657 V 1431 1727 a Fl(Long)27 +b(Precision)f(Real)257 b(F90)p 2560 1727 25 4 v 29 w(PSCSRP)1431 +1827 y(Long)27 b(Precision)f(Complex)99 b(F90)p 2560 +1827 V 29 w(PSCSRP)p 1382 1860 1579 4 v 1754 2092 a(T)-7 +b(able)27 b(3.13:)36 b(Data)27 b(t)n(yp)r(es)739 2509 +y Fe(Syn)l(tax)1230 2694 y Fl(CALL)h(F90)p 1627 2694 +25 4 v 29 w(PSCSRP)f(\()p Ff(tr)l(ans,)j(ip)l(erm,)h(a,)f(de)l(c)l(omp) +p 2898 2694 26 4 v 32 w(data)p Fl(\))739 2897 y Fm(On)h(En)m(try)739 +3080 y(trans)42 b Fl(Whether)28 b(the)g(p)r(erm)n(utation)f(or)g(its)h +(transp)r(ose)e(should)h(b)r(e)h(used.)946 3180 y(Scop)r(e:)37 +b Fm(global)946 3279 y Fl(T)n(yp)r(e:)g Fm(required)946 +3379 y Fl(Sp)r(eci\014ed)29 b(as:)36 b(a)27 b(c)n(haracter)f(v)-5 +b(ariable)26 b Fj(N)h Fl(or)g Fj(T)p Fl(.)739 3562 y +Fm(ip)s(erm)39 b Fl(The)28 b(p)r(erm)n(utation)f(to)h(b)r(e)g(applied,) +f(stored)g(in)h(v)n(ector)e(form.)946 3661 y(Scop)r(e:)37 +b Fm(global)946 3761 y Fl(T)n(yp)r(e:)g Fm(required)946 +3860 y Fl(Sp)r(eci\014ed)29 b(as:)36 b(a)27 b(rank)g(one)g(in)n(teger)g +(arra)n(y)-7 b(.)739 4043 y Fm(a)42 b Fl(the)28 b(lo)r(cal)f(p)r +(ortion)g(of)g(global)g(sparse)f(matrix)h Fh(A)p Fl(.)946 +4143 y(Scop)r(e:)37 b Fm(lo)s(cal)946 4242 y Fl(T)n(yp)r(e:)g +Fm(required)946 4342 y Fl(Sp)r(eci\014ed)29 b(as:)36 +b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in)g +Fg(x)f Fl(1.)739 4525 y Fm(decomp)p 1066 4525 29 4 v +33 w(data)42 b Fl(con)n(tains)27 b(data)g(structures)g(for)g(comm)n +(unications.)946 4624 y(Scop)r(e:)37 b Fm(lo)s(cal)946 +4724 y Fl(T)n(yp)r(e:)g Fm(required)946 4824 y Fl(Sp)r(eci\014ed)29 +b(as:)36 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g Fg(x)f Fl(1.)739 5006 y Fm(On)k(Return)2130 5255 y +Fl(62)p eop +%%Page: 63 63 +63 62 bop 291 523 a Fm(a)41 b Fl(the)29 b(lo)r(cal)f(p)r(ortion)f(of)h +(global)f(sparse)g(matrix)h Fh(A)p Fl(,)g(with)h(its)f(column)g +(indices)g(up)r(dated)498 623 y(according)e(to)h(the)h(sp)r(eci\014ed)g +(p)r(erm)n(utation.)498 722 y(Scop)r(e:)37 b Fm(lo)s(cal)498 +822 y Fl(T)n(yp)r(e:)g Fm(required)498 922 y Fl(Sp)r(eci\014ed)28 +b(as:)37 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g Fg(x)f Fl(1.)291 1196 y Fe(Usage)46 b(Notes)392 1378 +y Fl(1.)41 b(The)28 b(descriptors)e(m)n(ust)i(b)r(e)g(assem)n(bled)e(b) +r(efore)i(calls)f(to)g(this)h(subroutine.)392 1544 y(2.)41 +b(See)28 b(also)e Fj(F90_PSDSCREN)d Fl(for)k(a)g(usage)g(example.)1681 +5255 y(63)p eop +%%Page: 64 64 +64 63 bop 739 756 a Fe(F90)p 966 756 41 4 v 48 w(PSGELP|Left)46 +b(P)l(erm)l(utation)g(of)f(Dense)739 905 y(Matrix)739 +1169 y Fl(This)25 b(subroutine)f(up)r(dates)h(the)g(dense)f(matrix)h +Fh(X)31 b Fl(with)25 b(the)g(p)r(erm)n(utation)f(matrix)h +Fh(P)36 b Fl(or)739 1269 y(its)28 b(transp)r(ose)e Fh(P)1292 +1239 y Fc(T)1344 1269 y Fl(,)i(as:)1969 1381 y Fh(X)h +Fg( )23 b Fh(P)30 b Fg(\001)19 b Fh(X)p 1378 1555 1586 +4 v 1428 1624 a(X)917 b Fm(Subroutine)p 1378 1657 V 1428 +1727 a Fl(Long)27 b(Precision)f(Real)257 b(F90)p 2557 +1727 25 4 v 29 w(PSGELP)1428 1827 y(Long)27 b(Precision)f(Complex)99 +b(F90)p 2557 1827 V 29 w(PSGELP)p 1378 1860 1586 4 v +1754 2092 a(T)-7 b(able)27 b(3.14:)36 b(Data)27 b(t)n(yp)r(es)739 +2509 y Fe(Syn)l(tax)1229 2694 y Fl(CALL)h(F90)p 1626 +2694 25 4 v 29 w(PSGELP)e(\()p Ff(tr)l(ans,)k(ip)l(erm,)h(x,)e(de)l(c)l +(omp)p 2899 2694 26 4 v 32 w(data)p Fl(\))739 2897 y +Fm(On)i(En)m(try)739 3080 y(trans)42 b Fl(Whether)28 +b(the)g(p)r(erm)n(utation)f(or)g(its)h(transp)r(ose)e(should)h(b)r(e)h +(used.)946 3180 y(Scop)r(e:)37 b Fm(global)946 3279 y +Fl(T)n(yp)r(e:)g Fm(required)946 3379 y Fl(Sp)r(eci\014ed)29 +b(as:)36 b(a)27 b(c)n(haracter)f(v)-5 b(ariable)26 b +Fj(N)h Fl(or)g Fj(T)p Fl(.)739 3562 y Fm(ip)s(erm)39 +b Fl(The)28 b(p)r(erm)n(utation)f(to)h(b)r(e)g(applied,)f(stored)g(in)h +(v)n(ector)e(form.)946 3661 y(Scop)r(e:)37 b Fm(global)946 +3761 y Fl(T)n(yp)r(e:)g Fm(required)946 3860 y Fl(Sp)r(eci\014ed)29 +b(as:)36 b(a)27 b(rank)g(one)g(in)n(teger)g(arra)n(y)-7 +b(.)739 4043 y Fm(x)42 b Fl(the)28 b(lo)r(cal)f(p)r(ortion)g(of)g +(global)g(dense)g(matrix)g Fh(X)7 b Fl(.)946 4143 y(Scop)r(e:)37 +b Fm(lo)s(cal)946 4242 y Fl(T)n(yp)r(e:)g Fm(required)946 +4342 y Fl(Sp)r(eci\014ed)29 b(as:)36 b(a)27 b(rank)g(one)g(or)g(t)n(w)n +(o)f(arra)n(y)g(of)h(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in)g(table)f +(3.14.)739 4525 y Fm(decomp)p 1066 4525 29 4 v 33 w(data)42 +b Fl(con)n(tains)27 b(data)g(structures)g(for)g(comm)n(unications.)946 +4624 y(Scop)r(e:)37 b Fm(lo)s(cal)946 4724 y Fl(T)n(yp)r(e:)g +Fm(required)946 4824 y Fl(Sp)r(eci\014ed)29 b(as:)36 +b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in)g +Fg(x)f Fl(1.)739 5006 y Fm(On)k(Return)2130 5255 y Fl(64)p +eop +%%Page: 65 65 +65 64 bop 291 523 a Fm(x)41 b Fl(the)28 b(lo)r(cal)f(p)r(ortion)h(of)f +(global)g(dense)g(matrix)g Fh(X)7 b Fl(.)498 623 y(Scop)r(e:)37 +b Fm(lo)s(cal)498 722 y Fl(T)n(yp)r(e:)g Fm(required)498 +822 y Fl(Sp)r(eci\014ed)28 b(as:)37 b(a)27 b(rank)f(one)i(or)e(t)n(w)n +(o)h(arra)n(y)e(of)j(data)f(t)n(yp)r(e)h(sp)r(eci\014ed)g(in)g(table)f +(3.14.)291 1096 y Fe(Usage)46 b(Notes)392 1278 y Fl(1.)41 +b(The)28 b(descriptors)e(m)n(ust)i(b)r(e)g(assem)n(bled)e(b)r(efore)i +(calls)f(to)g(this)h(subroutine.)392 1444 y(2.)41 b(See)28 +b(also)e Fj(F90_PSDSCREN)d Fl(for)k(a)g(usage)g(example.)1681 +5255 y(65)p eop +%%Page: 66 66 +66 65 bop 739 733 a Fe(F90)p 966 733 41 4 v 48 w(PSDSCREN|Ren)l(um)l(b) +t(ering)47 b(of)739 882 y(Comm)l(unication)f(Descriptors)739 +1125 y Fl(This)28 b(subroutine)g(up)r(dates)g(the)h(comm)n(unication)e +(descriptor)g Fh(decomp)p 3043 1125 25 4 v 30 w(data)h +Fl(according)739 1224 y(to)f(the)h(p)r(erm)n(utation)g(matrix)f +Fh(P)39 b Fl(or)27 b(its)h(transp)r(ose)e Fh(P)2476 1194 +y Fc(T)2528 1224 y Fl(.)739 1498 y Fe(Syn)l(tax)1216 +1680 y Fl(CALL)i(F90)p 1613 1680 V 29 w(PSDSCREN)f(\()p +Ff(tr)l(ans,)j(ip)l(erm,)h(de)l(c)l(omp)p 2912 1680 26 +4 v 32 w(data)p Fl(\))739 1879 y Fm(On)g(En)m(try)739 +2042 y(trans)42 b Fl(Whether)28 b(the)g(p)r(erm)n(utation)f(or)g(its)h +(transp)r(ose)e(should)h(b)r(e)h(used.)946 2142 y(Scop)r(e:)37 +b Fm(global)946 2241 y Fl(T)n(yp)r(e:)g Fm(required)946 +2341 y Fl(Sp)r(eci\014ed)29 b(as:)36 b(a)27 b(c)n(haracter)f(v)-5 +b(ariable)26 b Fj(N)h Fl(or)g Fj(T)p Fl(.)739 2504 y +Fm(ip)s(erm)39 b Fl(The)28 b(p)r(erm)n(utation)f(to)h(b)r(e)g(applied,) +f(stored)g(in)h(v)n(ector)e(form.)946 2604 y(Scop)r(e:)37 +b Fm(global)946 2703 y Fl(T)n(yp)r(e:)g Fm(required)946 +2803 y Fl(Sp)r(eci\014ed)29 b(as:)36 b(a)27 b(rank)g(one)g(in)n(teger)g +(arra)n(y)-7 b(.)739 2966 y Fm(decomp)p 1066 2966 29 +4 v 33 w(data)42 b Fl(con)n(tains)27 b(data)g(structures)g(for)g(comm)n +(unications.)946 3066 y(Scop)r(e:)37 b Fm(lo)s(cal)946 +3165 y Fl(T)n(yp)r(e:)g Fm(required)946 3265 y Fl(Sp)r(eci\014ed)29 +b(as:)36 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g Fg(x)f Fl(1.)739 3428 y Fm(On)k(Return)739 3591 y(decomp)p +1066 3591 V 33 w(data)42 b Fl(con)n(tains)27 b(data)g(structures)g(for) +g(comm)n(unications.)946 3691 y(Scop)r(e:)37 b Fm(lo)s(cal)946 +3790 y Fl(T)n(yp)r(e:)g Fm(required)946 3890 y Fl(Sp)r(eci\014ed)29 +b(as:)36 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g Fg(x)f Fl(1.)739 4163 y Fe(Usage)46 b(Notes)840 4345 +y Fl(1.)41 b(The)28 b(descriptors)e(m)n(ust)i(b)r(e)g(assem)n(bled)f(b) +r(efore)g(calls)g(to)g(this)h(subroutine.)840 4508 y(2.)41 +b(The)36 b(p)r(erm)n(utation)g(routines)f(are)g(de\014ned)h(to)g +(handle)g(item)g(ren)n(um)n(b)r(ering)f(that)946 4608 +y(ma)n(y)h(come)f(ab)r(out)h(b)r(ecause)f(of)h(lo)r(cal)f(data)h +(storage)e(issues.)61 b(If)36 b(the)h(particular)946 +4707 y(data)30 b(structure)g(c)n(hosen)f(for)h(the)g(sparse)f(matrix)h +Fh(A)g Fl(en)n(tails)g(a)g(ro)n(w)f(p)r(erm)n(utation)946 +4807 y(stored)37 b(in)g Fj(A\045PL)p Fl(,)f(eac)n(h)g(application)h(of) +g(the)h(matrix-v)n(ector)c(pro)r(duct)k Fh(y)j Fg( )e +Fh(Ax)946 4907 y Fl(ma)n(y)32 b(b)r(e)h(slo)n(w)n(ed)e(do)n(wn)g(b)n(y) +h(the)h(need)g(to)f(apply)g(it)g(to)h(reco)n(v)n(er)c(the)k(natural)e +(ro)n(w)946 5006 y(ordering)g(in)h(the)h(result)e(v)n(ector)g +Fh(y)s Fl(.)50 b(The)32 b(follo)n(wing)f(co)r(de)h(fragmen)n(t)f(sho)n +(ws)g(ho)n(w)2130 5255 y(66)p eop +%%Page: 67 67 +67 66 bop 498 523 a Fl(to)32 b(un)n(wind)f(the)h(p)r(erm)n(utation)f +(up)r(on)h(en)n(tering)f(the)h(iteration)f(lo)r(op)g(and)g(ho)n(w)g(to) +498 623 y(restore)26 b(natural)h(ordering)f(up)r(on)i(exit:)542 +822 y Fj(do_renum_left)38 b(=)43 b(\(a\045pl\(1\))d(/=)j(0\))542 +922 y(if)f(\(do_renum_left\))c(then)672 1021 y(ipnull\(1\))i(=)k(0)672 +1121 y(do)f(i=1,)f(n_row)760 1220 y(iperm\(i\))84 b(=)43 +b(a\045pl\(i\))672 1320 y(enddo)672 1420 y(do)g(i=n_row+1,n_col)760 +1519 y(iperm\(i\))84 b(=)43 b(i)672 1619 y(enddo)672 +1719 y(ipsave)f(=>)g(a\045pl)672 1818 y(a\045pl)130 b(=>)42 +b(ipnull)672 1918 y(call)g(f90_psdscren\('T',)o(ipe)o(rm)o(,d)o(eco)o +(mp)o(_da)o(ta)o(\))672 2017 y(call)g(f90_pscsrp\('N',ip)o(erm)o(,a)o +(,d)o(eco)o(mp)o(_da)o(ta)o(\))672 2117 y(call)g(f90_psgelp\('T',ip)o +(erm)o(,x)o(,d)o(eco)o(mp)o(_da)o(ta)o(\))672 2217 y(call)g +(f90_psgelp\('T',ip)o(erm)o(,b)o(,d)o(eco)o(mp)o(_da)o(ta)o(\))542 +2316 y(endif)542 2416 y(do)g(while\(convergence)o(\))585 +2516 y(......)542 2615 y(end)g(do)542 2715 y(if)g(\(do_renum_left\))c +(then)629 2814 y(call)k(f90_psdscren\('N')o(,i)o(per)o(m,)o(de)o(com)o +(p_)o(dat)o(a\))629 2914 y(call)g(f90_pscsrp\('T',i)o(pe)o(rm,)o(a,)o +(de)o(com)o(p_)o(dat)o(a\))629 3014 y(call)g(f90_psgelp\('N',i)o(pe)o +(rm,)o(x,)o(de)o(com)o(p_)o(dat)o(a\))629 3113 y(call)g +(f90_psgelp\('N',i)o(pe)o(rm,)o(b,)o(de)o(com)o(p_)o(dat)o(a\))629 +3213 y(a\045pl)85 b(=>)43 b(ipsave)542 3313 y(endif)1681 +5255 y Fl(67)p eop +%%Page: 68 68 +68 67 bop 739 752 a Fe(F90)p 966 752 41 4 v 48 w(PSVERIFY|V)-11 +b(erify)45 b(correctness)g(of)739 902 y(comm)l(unication)h(descriptor) +739 1163 y Fl(This)32 b(subroutine)g(c)n(hec)n(ks)f(the)i(lo)r(cal)f +(and)g(global)f(correctness)g(of)h(the)h(comm)n(unication)739 +1263 y(descriptor,)26 b(p)r(ossibly)g(reordering)f(the)i(comm)n +(unication)f(patterns)g(to)h(a)n(v)n(oid)e(deadlo)r(c)n(ks.)739 +1362 y(It)j(also)e(v)n(eri\014es)h(the)h(correctness)d(of)j(the)g +(matrix)f(structure.)739 1655 y Fe(Syn)l(tax)1143 1843 +y Fl(CALL)h(F90)p 1540 1843 25 4 v 29 w(PSVERIFY)g(\()p +Ff(d,a,de)l(c)l(omp)p 2454 1843 26 4 v 33 w(data,che)l(ck)p +2856 1843 V 33 w(descr,c)l(onvert)p 3363 1843 V 32 w(descr,)2294 +1943 y(h,de)l(c)l(omp)p 2628 1943 V 33 w(data)p 2815 +1943 V 31 w(out,work,tr)l(ans,unitd)p Fl(\))739 2235 +y Fm(On)j(En)m(try)739 2415 y(d)41 b Fl(The)28 b(main)g(diagonal)e(of)h +(the)h(matrix)f(used)h(for)f(scaling.)946 2514 y(Scop)r(e:)37 +b Fm(lo)s(cal)946 2614 y Fl(T)n(yp)r(e:)g Fm(required)946 +2714 y Fl(Sp)r(eci\014ed)29 b(as:)36 b(a)27 b(rank)g(one)g(real)g(arra) +n(y)-7 b(.)739 2993 y Fm(a)42 b Fl(the)28 b(lo)r(cal)f(p)r(ortion)g(of) +g(global)g(sparse)f(matrix)h Fh(A)p Fl(.)946 3092 y(Scop)r(e:)37 +b Fm(lo)s(cal)946 3192 y Fl(T)n(yp)r(e:)g Fm(required)946 +3292 y Fl(Sp)r(eci\014ed)29 b(as:)36 b(a)27 b(structured)g(data)g(t)n +(yp)r(e)h(sp)r(eci\014ed)g(in)g Fg(x)f Fl(1.)739 3471 +y Fm(decomp)p 1066 3471 29 4 v 33 w(data)42 b Fl(con)n(tains)27 +b(data)g(structures)g(for)g(comm)n(unications.)946 3571 +y(Scop)r(e:)37 b Fm(lo)s(cal)946 3670 y Fl(T)n(yp)r(e:)g +Fm(required)946 3770 y Fl(Sp)r(eci\014ed)29 b(as:)36 +b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in)g +Fg(x)f Fl(1.)739 3950 y Fm(c)m(hec)m(k)p 970 3950 V 36 +w(descr)42 b Fl(con)n(tains)26 b(sp)r(eci\014cs)i(ab)r(out)f(c)n(hec)n +(ks)g(to)g(b)r(e)h(p)r(erformed.)946 4049 y(Scop)r(e:)37 +b Fm(global)946 4149 y Fl(T)n(yp)r(e:)g Fm(required)946 +4249 y Fl(Sp)r(eci\014ed)29 b(as:)36 b(an)27 b(in)n(teger)g(arra)n(y)e +(of)j(rank)e(one.)739 4428 y Fm(w)m(ork)42 b Fl(a)27 +b(w)n(ork)g(area)946 4528 y(Scop)r(e:)37 b Fm(lo)s(cal)27 +b Fl(T)n(yp)r(e:)37 b Fm(required)28 b Fl(Sp)r(eci\014ed)g(as:)36 +b(a)27 b(rank)g(one)g(in)n(teger)g(arra)n(y)-7 b(.)739 +4707 y Fm(trans)42 b Fl(Whether)28 b(the)g(p)r(erm)n(utation)f(or)g +(its)h(transp)r(ose)e(should)h(b)r(e)h(used.)946 4807 +y(Scop)r(e:)37 b Fm(global)946 4907 y Fl(T)n(yp)r(e:)g +Fm(optional)946 5006 y Fl(Sp)r(eci\014ed)29 b(as:)36 +b(a)27 b(c)n(haracter)f(v)-5 b(ariable)26 b Fj(N)h Fl(or)g +Fj(T)p Fl(.)2130 5255 y(68)p eop +%%Page: 69 69 +69 68 bop 291 523 a Fm(unitd)41 b Fl(sp)r(eci\014es)28 +b(whether)h(the)g(diagonal)e(matrix)h(is)h(unit)g(or)f(whether)h(ro)n +(w)e(or)h(column)498 623 y(scaling)f(has)g(to)g(b)r(e)h(p)r(erformed.) +498 722 y(Scop)r(e:)37 b Fm(global)498 822 y Fl(T)n(yp)r(e:)g +Fm(optional)498 922 y Fl(Sp)r(eci\014ed)28 b(as:)37 b(a)27 +b(c)n(haracter)e(v)-5 b(ariable)27 b Fj(U)p Fl(,)g Fj(L)p +Fl(,)h Fj(R)f Fl(or)f Fj(B)p Fl(.)291 1087 y Fm(On)31 +b(Return)291 1252 y(con)m(v)m(ert)p 604 1252 29 4 v 35 +w(descr)42 b Fl(sp)r(eci\014es)28 b(what)f(op)r(erations)f(ha)n(v)n(e)h +(b)r(een)h(p)r(erformed.)498 1352 y(Scop)r(e:)37 b Fm(global)498 +1452 y Fl(T)n(yp)r(e:)g Fm(required)498 1551 y Fl(Sp)r(eci\014ed)28 +b(as:)37 b(a)27 b(rank)f(one)i(in)n(teger)e(arra)n(y)-7 +b(.)291 1717 y Fm(h)41 b Fl(the)28 b(ev)n(en)n(tually)f(con)n(v)n +(erted)f(lo)r(cal)h(p)r(ortion)g(of)h(global)e(sparse)g(matrix)h(.)498 +1816 y(Scop)r(e:)37 b Fm(lo)s(cal)498 1916 y Fl(T)n(yp)r(e:)g +Fm(required)28 b Fl(Sp)r(eci\014ed)g(as:)36 b(a)27 b(structured)h(data) +f(t)n(yp)r(e)g(sp)r(eci\014ed)h(in)g Fg(x)g Fl(1.)291 +2081 y Fm(decomp)p 618 2081 V 33 w(data)p 833 2081 V +35 w(out)41 b Fl(con)n(tains)18 b(data)g(structures)f(for)h(comm)n +(unications)f(ev)n(en)n(tually)h(mo)r(d-)498 2181 y(i\014ed.)498 +2280 y(Scop)r(e:)37 b Fm(lo)s(cal)498 2380 y Fl(T)n(yp)r(e:)g +Fm(required)498 2480 y Fl(Sp)r(eci\014ed)28 b(as:)37 +b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in)g +Fg(x)f Fl(1.)291 2754 y Fe(Usage)46 b(Notes)392 2936 +y Fl(1.)41 b(The)28 b(descriptors)e(m)n(ust)i(b)r(e)g(assem)n(bled)e(b) +r(efore)i(calls)f(to)g(this)h(subroutine.)392 3101 y(2.)41 +b(Di\013eren)n(t)33 b(kind)g(of)f(c)n(hec)n(ks)g(can)g(b)r(e)h(p)r +(erformed)f(dep)r(ending)h(on)f(v)-5 b(alues)33 b(supplied)498 +3201 y(with)28 b(c)n(hec)n(k)p 889 3201 25 4 v 29 w(descr)f(parameter:) +612 3366 y(-)41 b(0-th)23 b(bit)h(:)35 b(if)24 b(1)f(then)g(the)h(comm) +n(unication)f(descriptor)f(is)h(c)n(hec)n(k)n(ed)g(for)g(dead-)681 +3466 y(lo)r(c)n(ks.)612 3598 y(-)41 b(1-th)27 b(bit)h(:)37 +b(if)28 b(1)f(then)i(the)f(lo)r(cal)f(matrix)g(is)g(c)n(hec)n(k)n(ed)g +(for)g(correctness)612 3730 y(-)41 b(2-th)32 b(bit)h(:)46 +b(if)33 b(1)f(then)g(a)g(lo)r(cal)g(consistency)f(c)n(hec)n(k)h(of)g +(the)h(comm)n(unication)681 3830 y(descriptor)26 b(is)i(p)r(erformed.) +612 3962 y(-)41 b(4-th)28 b(bit)h(:)39 b(if)29 b(1)f(then)h(a)f(global) +g(consistency)g(c)n(hec)n(k)f(of)i(the)g(comm)n(unication)681 +4062 y(descriptor)d(is)i(p)r(erformed.)392 4227 y(3.)41 +b(Op)r(eration)e(p)r(erformed)g(b)n(y)g(this)h(routine)f(are)g(sp)r +(eci\014ed)h(in)f(the)h(con)n(v)n(ert)p 2946 4227 V 29 +w(descr)498 4327 y(output)28 b(argumen)n(t:)540 4492 +y(If)18 b(PSVERIFY)h(has)f(found)g(a)g(deadlo)r(c)n(k)g(in)g(comm)n +(unication)g(describ)r(ed)g(in)h(DESC)p 3152 4492 V 30 +w(HALO)681 4592 y(list,)33 b(then)f(0-th)g(bit)g(v)-5 +b(alue)32 b(is)g(set)g(to)f(1,)i(0)e(otherwise.)49 b(If)33 +b(PSVERIFY)e(has)681 4691 y(found)25 b(deadlo)r(c)n(k)g(in)g(comm)n +(unication)g(describ)r(ed)g(in)g(DESC)p 2614 4691 V 30 +w(O)n(VRLAP)g(list,)681 4791 y(then)31 b(1-th)f(bit)i(v)-5 +b(alue)30 b(is)h(set)g(to)f(1,)i(0)e(otherwise.)45 b(If)32 +b(PSVERIFY)e(has)h(con-)681 4890 y(v)n(erted)38 b(input)h(matrix)f(A)h +(to)f(a)g(represen)n(tation)f(supp)r(orted)h(b)n(y)g(PSBLAS)681 +4990 y(then)28 b(1-th)f(bit)h(v)-5 b(alue)28 b(is)f(set)h(to)f(1,)h(0)f +(otherwise.)1681 5255 y(69)p eop +%%Page: 70 70 +70 69 bop 840 523 a Fl(4.)41 b(Up)r(dated)32 b(v)-5 b(alues)31 +b(are)f(pro)n(vided)g(within)i Fh(H)38 b Fl(and)31 b(decomp)p +2845 523 25 4 v 30 w(data)p 3037 523 V 29 w(out)g(output)h(pa-)946 +623 y(rameters)27 b(if)h(sp)r(eci\014ed)g(in)g(con)n(v)n(ert)p +2069 623 V 28 w(descr)f(parameter.)2130 5255 y(70)p eop +%%Page: 71 71 +71 70 bop 291 781 a Fe(PSGLOB)p 850 781 41 4 v 47 w(TO)p +1092 781 V 48 w(LOC|Con)l(v)l(ert)46 b(global)g(to)g(lo)t(cal)291 +930 y(n)l(um)l(b)t(ering)291 1219 y Fl(This)26 b(subroutine)f(accepts)h +(an)f(in)n(teger)g(v)n(ector)g(of)h(arbitrary)e(size)i(con)n(taining)f +(indices)h(in)291 1318 y(the)i(global)e(n)n(um)n(b)r(ering)h(sc)n +(heme,)g(and)h(con)n(v)n(erts)e(them)i(in)n(to)f(lo)r(cal)g(n)n(um)n(b) +r(ering)g(sc)n(heme.)291 1649 y Fe(Syn)l(tax)875 1839 +y Fl(CALL)g(PSGLOB)p 1477 1839 25 4 v 29 w(TO)p 1631 +1839 V 30 w(LOC)g(\()p Ff(x,de)l(c)l(omp)p 2223 1839 +26 4 v 32 w(data,act)p Fl(\))842 2039 y(CALL)h(PSGLOB)p +1445 2039 25 4 v 29 w(TO)p 1599 2039 V 29 w(LOC)f(\()p +Ff(x,y,de)l(c)l(omp)p 2255 2039 26 4 v 33 w(data,act)p +Fl(\))291 2248 y Fm(On)k(En)m(try)291 2456 y(x)41 b Fl(An)29 +b(in)n(teger)d(v)n(ector)h(of)g(indices)h(to)f(b)r(e)h(con)n(v)n +(erted.)498 2556 y(Scop)r(e:)37 b Fm(lo)s(cal)498 2655 +y Fl(T)n(yp)r(e:)g Fm(required)498 2755 y Fl(Sp)r(eci\014ed)28 +b(as:)37 b(a)27 b(rank)f(one)i(in)n(teger)e(arra)n(y)-7 +b(.)291 3063 y Fm(decomp)p 618 3063 29 4 v 33 w(data)42 +b Fl(con)n(tains)27 b(data)g(structures)g(for)g(comm)n(unications.)498 +3162 y(Scop)r(e:)37 b Fm(lo)s(cal)498 3262 y Fl(T)n(yp)r(e:)g +Fm(required)498 3362 y Fl(Sp)r(eci\014ed)28 b(as:)37 +b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in)g +Fg(x)f Fl(1.)291 3569 y Fm(act)42 b Fl(sp)r(eci\014es)28 +b(action)f(to)g(b)r(e)h(tak)n(en)f(in)h(case)f(of)g(range)f(errors.)35 +b(Scop)r(e:)i Fm(global)498 3669 y Fl(T)n(yp)r(e:)g Fm(optional)498 +3769 y Fl(Sp)r(eci\014ed)28 b(as:)37 b(a)27 b(c)n(haracter)e(v)-5 +b(ariable)27 b Fj(E)p Fl(,)g Fj(W)g Fl(or)g Fj(A)p Fl(.)291 +3977 y Fm(On)k(Return)291 4184 y(x)41 b Fl(If)30 b Fh(y)i +Fl(is)d(not)g(presen)n(t,)f(then)i Fh(x)f Fl(is)g(o)n(v)n(erwritten)f +(with)h(the)h(translated)e(in)n(teger)g(indices.)498 +4284 y(Scop)r(e:)37 b Fm(global)498 4384 y Fl(T)n(yp)r(e:)g +Fm(required)498 4483 y Fl(Sp)r(eci\014ed)28 b(as:)37 +b(a)27 b(rank)f(one)i(in)n(teger)e(arra)n(y)-7 b(.)291 +4691 y Fm(y)41 b Fl(If)30 b Fh(y)i Fl(is)d(not)h(presen)n(t,)f(then)h +Fh(y)i Fl(is)d(o)n(v)n(erwritten)f(with)h(the)h(translated)e(in)n +(teger)h(indices,)498 4791 y(and)f Fh(x)g Fl(is)f(left)h(unc)n(hanged.) +37 b(Scop)r(e:)f Fm(global)498 4890 y Fl(T)n(yp)r(e:)h +Fm(optional)498 4990 y Fl(Sp)r(eci\014ed)28 b(as:)37 +b(a)27 b(rank)f(one)i(in)n(teger)e(arra)n(y)-7 b(.)1681 +5255 y(71)p eop +%%Page: 72 72 +72 71 bop 739 523 a Fe(Usage)46 b(Notes)840 705 y Fl(1.)41 +b(The)28 b(descriptors)e(m)n(ust)i(b)r(e)g(assem)n(bled)f(b)r(efore)g +(calls)g(to)g(this)h(subroutine.)840 871 y(2.)41 b(The)26 +b(subroutine)g(will)g(con)n(v)n(ert)e(the)i(in)n(teger)f(v)-5 +b(alues)25 b(con)n(tained)g(in)h(the)h(input)f(v)n(ec-)946 +971 y(tor,)i(mapping)f(from)g(the)h(global)f(n)n(um)n(b)r(ering)g(to)g +(lo)r(cal)g(n)n(um)n(b)r(ering.)840 1137 y(3.)41 b(The)c(v)-5 +b(alue)36 b(of)g Fj(ACT)f Fl(will)h(determine)g(whether)g(an)n(y)g +(range)f(errors)e(in)k(the)f(input)946 1236 y(v)n(ector)23 +b(will)h(b)r(e)h(treated)e(as)h(fatal)g(errors,)e(w)n(arnings)h(or)g +(will)h(b)r(e)g(ignored.)35 b(P)n(ossible)946 1336 y(errors)26 +b(are:)1060 1502 y(-)41 b(V)-7 b(alue)18 b Fh(x)p Fl(\()p +Fh(i)p Fl(\))i(is)e(outside)g(the)h(global)e(n)n(um)n(b)r(ering)h +(range)f(1)d Fh(:)g(:)g(:)f(decomp)p 3295 1502 25 4 v +29 w(data)p Fl(\045)p Fh(matr)r(ix)p 3816 1502 V 31 w(data)p +Fl(\()p Fh(M)p 4130 1502 V 39 w Fl(\))1129 1602 y(for)27 +b(some)g Fh(i)p Fl(;)1060 1734 y(-)41 b(V)-7 b(alue)18 +b Fh(x)p Fl(\()p Fh(i)p Fl(\))i(is)e(within)h(the)g(global)e(n)n(um)n +(b)r(ering)h(range)f(1)d Fh(:)g(:)g(:)f(decomp)p 3267 +1734 V 29 w(data)p Fl(\045)p Fh(matr)r(ix)p 3788 1734 +V 31 w(data)p Fl(\()p Fh(M)p 4102 1734 V 39 w Fl(\),)1129 +1834 y(but)28 b(there)g(is)f(no)g(corresp)r(onding)f(lo)r(cal)h(index)h +(to)f(b)r(e)h(mapp)r(ed)g(on)n(to;)840 2000 y(4.)41 b(A)28 +b(global)f(index)h(will)f(ha)n(v)n(e)g(a)g(corresp)r(onding)f(lo)r(cal) +h(index)g(if)h(either:)982 2166 y(\(a\))41 b(The)23 b(global)g(index)g +(is)g(assigned)f(to)i(the)f(curren)n(t)g(pro)r(cess)f(b)n(y)h(the)h +Fj(PARTS)d Fl(user)1129 2266 y(de\014ned)28 b(routine;)977 +2399 y(\(b\))42 b(The)37 b(global)f(index)h(is)f(in)h(the)h(halo,)g +(i.e.)65 b(there)36 b(is)h(at)g(least)f(one)h(nonzero)1129 +2498 y(en)n(try)21 b(in)g(the)h(lo)r(cal)e(part)h(of)g(the)h(matrix)e +Fh(a)i Fl(corresp)r(onding)d(to)i(the)g(descriptor)1129 +2598 y Fh(decomp)p 1407 2598 V 30 w(data)27 b Fl(with)h(the)g(same)f +(column)h(index.)2130 5255 y(72)p eop +%%Page: 73 73 +73 72 bop 291 781 a Fe(PSLOC)p 745 781 41 4 v 48 w(TO)p +988 781 V 48 w(GLOB|Con)l(v)l(ert)45 b(lo)t(cal)h(to)f(global)291 +930 y(n)l(um)l(b)t(ering)291 1219 y Fl(This)26 b(subroutine)f(accepts)h +(an)f(in)n(teger)g(v)n(ector)g(of)h(arbitrary)e(size)i(con)n(taining)f +(indices)h(in)291 1318 y(the)i(lo)r(cal)f(n)n(um)n(b)r(ering)g(sc)n +(heme,)g(and)g(con)n(v)n(erts)f(them)i(in)n(to)g(global)e(n)n(um)n(b)r +(ering)h(sc)n(heme.)291 1649 y Fe(Syn)l(tax)875 1839 +y Fl(CALL)g(PSLOC)p 1413 1839 25 4 v 29 w(TO)p 1567 1839 +V 30 w(GLOB)g(\()p Ff(x,de)l(c)l(omp)p 2223 1839 26 4 +v 32 w(data,act)p Fl(\))842 2039 y(CALL)h(PSLOC)p 1381 +2039 25 4 v 29 w(TO)p 1535 2039 V 29 w(GLOB)f(\()p Ff(x,y,de)l(c)l(omp) +p 2255 2039 26 4 v 33 w(data,act)p Fl(\))291 2248 y Fm(On)k(En)m(try) +291 2456 y(x)41 b Fl(An)29 b(in)n(teger)d(v)n(ector)h(of)g(indices)h +(to)f(b)r(e)h(con)n(v)n(erted.)498 2556 y(Scop)r(e:)37 +b Fm(lo)s(cal)498 2655 y Fl(T)n(yp)r(e:)g Fm(required)498 +2755 y Fl(Sp)r(eci\014ed)28 b(as:)37 b(a)27 b(rank)f(one)i(in)n(teger)e +(arra)n(y)-7 b(.)291 3063 y Fm(decomp)p 618 3063 29 4 +v 33 w(data)42 b Fl(con)n(tains)27 b(data)g(structures)g(for)g(comm)n +(unications.)498 3162 y(Scop)r(e:)37 b Fm(lo)s(cal)498 +3262 y Fl(T)n(yp)r(e:)g Fm(required)498 3362 y Fl(Sp)r(eci\014ed)28 +b(as:)37 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g Fg(x)f Fl(1.)291 3569 y Fm(act)42 b Fl(sp)r(eci\014es)28 +b(action)f(to)g(b)r(e)h(tak)n(en)f(in)h(case)f(of)g(range)f(errors.)35 +b(Scop)r(e:)i Fm(global)498 3669 y Fl(T)n(yp)r(e:)g Fm(optional)498 +3769 y Fl(Sp)r(eci\014ed)28 b(as:)37 b(a)27 b(c)n(haracter)e(v)-5 +b(ariable)27 b Fj(E)p Fl(,)g Fj(W)g Fl(or)g Fj(A)p Fl(.)291 +3977 y Fm(On)k(Return)291 4184 y(x)41 b Fl(If)30 b Fh(y)i +Fl(is)d(not)g(presen)n(t,)f(then)i Fh(x)f Fl(is)g(o)n(v)n(erwritten)f +(with)h(the)h(translated)e(in)n(teger)g(indices.)498 +4284 y(Scop)r(e:)37 b Fm(global)498 4384 y Fl(T)n(yp)r(e:)g +Fm(required)498 4483 y Fl(Sp)r(eci\014ed)28 b(as:)37 +b(a)27 b(rank)f(one)i(in)n(teger)e(arra)n(y)-7 b(.)291 +4691 y Fm(y)41 b Fl(If)30 b Fh(y)i Fl(is)d(not)h(presen)n(t,)f(then)h +Fh(y)i Fl(is)d(o)n(v)n(erwritten)f(with)h(the)h(translated)e(in)n +(teger)h(indices,)498 4791 y(and)f Fh(x)g Fl(is)f(left)h(unc)n(hanged.) +37 b(Scop)r(e:)f Fm(global)498 4890 y Fl(T)n(yp)r(e:)h +Fm(optional)498 4990 y Fl(Sp)r(eci\014ed)28 b(as:)37 +b(a)27 b(rank)f(one)i(in)n(teger)e(arra)n(y)-7 b(.)1681 +5255 y(73)p eop +%%Page: 74 74 +74 73 bop 739 523 a Fe(Usage)46 b(Notes)840 705 y Fl(1.)41 +b(The)28 b(descriptors)e(m)n(ust)i(b)r(e)g(assem)n(bled)f(b)r(efore)g +(calls)g(to)g(this)h(subroutine.)840 871 y(2.)41 b(The)26 +b(subroutine)g(will)g(con)n(v)n(ert)e(the)i(in)n(teger)f(v)-5 +b(alues)25 b(con)n(tained)g(in)h(the)h(input)f(v)n(ec-)946 +971 y(tor,)i(mapping)f(from)g(the)h(lo)r(cal)f(n)n(um)n(b)r(ering)g(to) +h(global)e(n)n(um)n(b)r(ering.)840 1137 y(3.)41 b(The)c(v)-5 +b(alue)36 b(of)g Fj(ACT)f Fl(will)h(determine)g(whether)g(an)n(y)g +(range)f(errors)e(in)k(the)f(input)946 1236 y(v)n(ector)23 +b(will)h(b)r(e)h(treated)e(as)h(fatal)g(errors,)e(w)n(arnings)h(or)g +(will)h(b)r(e)g(ignored.)35 b(P)n(ossible)946 1336 y(errors)26 +b(are:)1060 1502 y(-)41 b(V)-7 b(alue)18 b Fh(x)p Fl(\()p +Fh(i)p Fl(\))i(is)e(outside)g(the)h(lo)r(cal)f(n)n(um)n(b)r(ering)f +(range)g(1)d Fh(:)g(:)g(:)f(decomp)p 3246 1502 25 4 v +30 w(data)p Fl(\045)p Fh(matr)r(ix)p 3768 1502 V 31 w(data)p +Fl(\()p Fh(n)p 4042 1502 V 30 w(col)p 4175 1502 V 31 +w Fl(\))1129 1602 y(for)27 b(some)g Fh(i)p Fl(;)840 1768 +y(4.)41 b(A)28 b(global)f(index)h(will)f(ha)n(v)n(e)g(a)g(corresp)r +(onding)f(lo)r(cal)h(index)g(if)h(either:)982 1934 y(\(a\))41 +b(The)23 b(global)g(index)g(is)g(assigned)f(to)i(the)f(curren)n(t)g +(pro)r(cess)f(b)n(y)h(the)h Fj(PARTS)d Fl(user)1129 2033 +y(de\014ned)28 b(routine;)977 2166 y(\(b\))42 b(The)37 +b(global)f(index)h(is)f(in)h(the)h(halo,)g(i.e.)65 b(there)36 +b(is)h(at)g(least)f(one)h(nonzero)1129 2266 y(en)n(try)21 +b(in)g(the)h(lo)r(cal)e(part)h(of)g(the)h(matrix)e Fh(a)i +Fl(corresp)r(onding)d(to)i(the)g(descriptor)1129 2365 +y Fh(decomp)p 1407 2365 V 30 w(data)27 b Fl(with)h(the)g(same)f(column) +h(index.)2130 5255 y(74)p eop +%%Page: 75 75 +75 74 bop 291 1146 a Fk(Chapter)64 b(4)291 1561 y Fn(Metho)6 +b(d)79 b(Library)291 1993 y Fl(In)31 b(this)g(c)n(hapter)g(w)n(e)f(pro) +n(vide)g(routines)h(for)g(preconditioners)e(and)i(iterativ)n(e)g(metho) +r(ds.)291 2092 y(Their)c(in)n(terfaces)f(are)h(de\014ned)h(in)g(the)g +(mo)r(dule)g Fj(F90METHD)1681 5255 y Fl(75)p eop +%%Page: 76 76 +76 75 bop 739 764 a Fe(PRECONDITIONER)46 b(|Compute)739 +913 y(Preconditioner)739 1185 y Fl(This)27 b(subroutine)h(computes)f(a) +g(preconditioner)g(for)g(sparse)f(matrix)h Fh(A)p Fl(.)739 +1493 y Fe(Syn)l(tax)1003 1680 y Fl(CALL)h(PRECONDITIONER)e(\()p +Ff(ipr)l(e)l(c,a,l,u,vdiag,de)l(c)l(omp)p 2961 1680 26 +4 v 37 w(data,ierr)p Fl(\))739 1885 y Fm(On)31 b(En)m(try)739 +2076 y(iprec)41 b Fl(T)n(yp)r(e)28 b(of)g(preconditioner)e(to)h(b)r(e)h +(computed.)946 2176 y(Scop)r(e:)37 b Fm(global)946 2276 +y Fl(T)n(yp)r(e:)g Fm(required)946 2375 y Fl(Sp)r(eci\014ed)29 +b(as:)36 b(an)27 b(in)n(teger)g(v)-5 b(ariable:)946 2566 +y Fm(0)42 b Fl(No)27 b(preconditioner;)946 2711 y Fm(1)42 +b Fl(Diagonal)26 b(scaling;)946 2856 y Fm(2)42 b Fl(Lo)r(cal)27 +b Fh(I)7 b(LU)i Fl(\(0\))27 b(preconditioning;)739 3047 +y Fm(a)42 b Fl(the)28 b(lo)r(cal)f(p)r(ortion)g(of)g(global)g(sparse)f +(matrix)h Fh(A)p Fl(.)946 3147 y(Scop)r(e:)37 b Fm(lo)s(cal)946 +3247 y Fl(T)n(yp)r(e:)g Fm(required)946 3346 y Fl(Sp)r(eci\014ed)29 +b(as:)36 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g Fg(x)f Fl(1.)739 3537 y Fm(decomp)p 1066 3537 29 4 +v 33 w(data)42 b Fl(con)n(tains)27 b(data)g(structures)g(for)g(comm)n +(unications.)946 3637 y(Scop)r(e:)37 b Fm(lo)s(cal)946 +3736 y Fl(T)n(yp)r(e:)g Fm(required)946 3836 y Fl(Sp)r(eci\014ed)29 +b(as:)36 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g Fg(x)f Fl(1.)739 4027 y Fm(On)k(Return)739 4218 y(ierr)41 +b Fl(error)26 b(co)r(de.)946 4317 y(Scop)r(e:)37 b Fm(lo)s(cal)946 +4417 y Fl(T)n(yp)r(e:)g Fm(required)946 4516 y Fl(Sp)r(eci\014ed)29 +b(as:)36 b(an)27 b(in)n(teger)g(n)n(um)n(b)r(er.)739 +4707 y Fm(l)41 b Fl(the)28 b(lo)r(cal)f(p)r(ortion)g(of)h(global)e +(sparse)g(matrix)h Fh(L)h Fl(of)f(the)h Fh(I)7 b(LU)i +Fl(\(0\))27 b(factorization)f(of)i Fh(A)946 4807 y Fl(Scop)r(e:)37 +b Fm(lo)s(cal)946 4907 y Fl(T)n(yp)r(e:)g Fm(required)p +Fl(;)28 b(it)g(is)g(only)f(used)g(when)h Fh(ipr)r(ec)23 +b Fl(=)g(2.)946 5006 y(Sp)r(eci\014ed)29 b(as:)36 b(a)27 +b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in)g +Fg(x)f Fl(1.)2130 5255 y(76)p eop +%%Page: 77 77 +77 76 bop 291 523 a Fm(u)41 b Fl(the)28 b(lo)r(cal)f(p)r(ortion)g(of)h +(global)e(sparse)g(matrix)i Fh(U)36 b Fl(of)27 b(the)h +Fh(I)7 b(LU)i Fl(\(0\))27 b(factorization)g(of)g Fh(A)498 +623 y Fl(Scop)r(e:)37 b Fm(lo)s(cal)498 722 y Fl(T)n(yp)r(e:)g +Fm(required)p Fl(;)28 b(it)g(is)f(only)g(used)h(when)g +Fh(ipr)r(ec)23 b Fl(=)f(2.)498 822 y(Sp)r(eci\014ed)41 +b(as:)62 b(a)40 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)f(in) +h Fg(x)f Fl(1.)75 b(Sp)r(eci\014ed)41 b(as:)62 b(a)498 +922 y(structured)27 b(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in)g +Fg(x)g Fl(1.)291 1088 y Fm(vdiag)41 b Fl(a)27 b(diagonal)f(scaling)h(v) +n(ector.)498 1187 y(Scop)r(e:)37 b Fm(lo)s(cal)498 1287 +y Fl(T)n(yp)r(e:)g Fm(required)p Fl(;)28 b(it)g(is)f(used)h(when)g +Fh(ipr)r(ec)22 b Fl(=)h(2)k(or)g Fh(ipr)r(ec)c Fl(=)f(1.)498 +1386 y(Sp)r(eci\014ed)g(as:)32 b(a)21 b(rank)e(one)i(arra)n(y)d(with)j +(the)h(POINTER)d(attribute;)k(if)f Fh(ipr)r(ec)h Fl(=)f(1)e(it)498 +1486 y(con)n(tains)26 b(the)h(diagonal)f(scaling)g(matrix;)g(if)i +Fh(ipr)r(ec)23 b Fl(=)f(2)27 b(it)g(con)n(tains)f(the)h(diagonal)498 +1586 y(of)h(the)g Fh(I)7 b(LU)i Fl(\(0\))27 b(factorization.)291 +1860 y Fe(Usage)46 b(Notes)392 2042 y Fl(1.)41 b(This)31 +b(routine)f(allo)r(cates)g(the)h(in)n(ternal)g(comp)r(onen)n(ts)f(of)h +Fh(L)f Fl(and)h Fh(U)39 b Fl(as)31 b(necessary;)498 2142 +y(b)r(et)n(w)n(een)h(t)n(w)n(o)f(successiv)n(e)f(calls)h(to)g(this)h +(routine,)h(the)f(user)f(should)g(place)g(a)g(call)498 +2241 y(to)d Fj(F90_PSSPFREE)22 b Fl(for)27 b(b)r(oth)h +Fh(L)g Fl(and)f Fh(U)36 b Fl(to)28 b(a)n(v)n(oid)e(memory)h(leaks.)1681 +5255 y(77)p eop +%%Page: 78 78 +78 77 bop 739 750 a Fe(F90)p 966 750 41 4 v 48 w(CGS)45 +b(|CGS)g(Iterativ)l(e)i(Metho)t(d)739 1008 y Fl(This)36 +b(subroutine)g(implemen)n(ts)h(the)g(CGS)g(metho)r(d)g(with)g +(restarting.)62 b(The)37 b(stopping)739 1108 y(criterion)24 +b(is)i(the)g(norm)n(wise)e(bac)n(kw)n(ard)g(error,)g(in)i(the)g +(in\014nit)n(y)g(norm,)g(i.e.)36 b(the)26 b(iteration)739 +1208 y(is)h(stopp)r(ed)h(when)1995 1302 y Fg(k)p Fh(r)r +Fg(k)p 1776 1340 561 4 v 1776 1416 a Fl(\()p Fg(k)p Fh(A)p +Fg(kk)p Fh(x)p Fg(k)17 b Fl(+)h Fg(k)p Fh(b)p Fg(k)p +Fl(\))2369 1359 y Fh(<)23 b(eps)739 1666 y Fe(Syn)l(tax)1174 +1850 y Fl(CALL)28 b(F90)p 1571 1850 25 4 v 29 w(CGS)g(\()p +Ff(a,ipr)l(e)l(c,l,u,vdiag,b,x,ep)q(s,de)m(c)l(omp)p +2949 1850 26 4 v 37 w(data)p Fl(\))739 2050 y(CALL)g(F90)p +1136 2050 25 4 v 29 w(CGS)1304 2149 y(\()p Ff(a,ipr)l(e)l +(c,l,u,vdiag,b,x,ep)q(s,)36 b(de)l(c)l(omp)p 2494 2149 +26 4 v 31 w(data,itmax,iter,err,ierr,itr)l(ac)l(e)p Fl(\))739 +2351 y Fm(On)31 b(En)m(try)739 2528 y(iprec)41 b Fl(T)n(yp)r(e)28 +b(of)g(preconditioner)e(to)h(b)r(e)h(computed.)946 2628 +y(Scop)r(e:)37 b Fm(global)946 2727 y Fl(T)n(yp)r(e:)g +Fm(required)946 2827 y Fl(Sp)r(eci\014ed)29 b(as:)36 +b(an)27 b(in)n(teger)g(v)-5 b(ariable:)946 3004 y Fm(0)42 +b Fl(No)27 b(preconditioner;)946 3142 y Fm(1)42 b Fl(Diagonal)26 +b(scaling;)946 3280 y Fm(2)42 b Fl(Lo)r(cal)27 b Fh(I)7 +b(LU)i Fl(\(0\))27 b(preconditioning;)739 3457 y Fm(a)42 +b Fl(the)28 b(lo)r(cal)f(p)r(ortion)g(of)g(global)g(sparse)f(matrix)h +Fh(A)p Fl(.)946 3557 y(Scop)r(e:)37 b Fm(lo)s(cal)946 +3656 y Fl(T)n(yp)r(e:)g Fm(required)946 3756 y Fl(Sp)r(eci\014ed)29 +b(as:)36 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g Fg(x)f Fl(1.)739 3933 y Fm(l)41 b Fl(the)30 b(lo)r(cal)e(p)r(ortion)h +(of)g(global)g(sparse)e(matrix)i Fh(L)g Fl(of)g(the)h +Fh(I)7 b(LU)i Fl(\(0\))29 b(factorization)f(of)h Fh(A)p +Fl(,)946 4032 y(as)e(returned)h(b)n(y)f(the)h Fj(PRECONDITIONER)22 +b Fl(routine.)946 4132 y(Scop)r(e:)37 b Fm(lo)s(cal)946 +4232 y Fl(T)n(yp)r(e:)g Fm(required)p Fl(;)28 b(it)g(is)g(only)f(used)g +(when)h Fh(ipr)r(ec)23 b Fl(=)g(2.)946 4331 y(Sp)r(eci\014ed)29 +b(as:)36 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g Fg(x)f Fl(1.)739 4508 y Fm(u)41 b Fl(the)27 b(lo)r(cal)f(p)r(ortion)g +(of)h(global)e(sparse)g(matrix)h Fh(U)36 b Fl(of)26 b(the)h +Fh(I)7 b(LU)i Fl(\(0\))26 b(factorization)f(of)i Fh(A)p +Fl(,)946 4608 y(as)g(returned)h(b)n(y)f(the)h Fj(PRECONDITIONER)22 +b Fl(routine.)946 4707 y(Scop)r(e:)37 b Fm(lo)s(cal)946 +4807 y Fl(T)n(yp)r(e:)g Fm(required)p Fl(;)28 b(it)g(is)g(only)f(used)g +(when)h Fh(ipr)r(ec)23 b Fl(=)g(2.)946 4907 y(Sp)r(eci\014ed)42 +b(as:)61 b(a)40 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +f Fg(x)h Fl(1.)75 b(Sp)r(eci\014ed)41 b(as:)62 b(a)946 +5006 y(structured)28 b(data)f(t)n(yp)r(e)h(sp)r(eci\014ed)f(in)h +Fg(x)g Fl(1.)2130 5255 y(78)p eop +%%Page: 79 79 +79 78 bop 291 523 a Fm(vdiag)41 b Fl(a)27 b(diagonal)f(scaling)h(v)n +(ector.)498 623 y(Scop)r(e:)37 b Fm(lo)s(cal)498 722 +y Fl(T)n(yp)r(e:)g Fm(required)p Fl(;)28 b(it)g(is)f(used)h(when)g +Fh(ipr)r(ec)22 b Fl(=)h(2)k(or)g Fh(ipr)r(ec)c Fl(=)f(1.)498 +822 y(Sp)r(eci\014ed)29 b(as:)38 b(a)28 b(rank)f(one)h(arra)n(y)e(,)j +(as)f(returned)g(b)n(y)g(the)g Fj(PRECONDITIONER)23 b +Fl(rou-)498 922 y(tine.)291 1088 y Fm(b)41 b Fl(The)28 +b(RHS)g(v)n(ector.)498 1187 y(Scop)r(e:)37 b Fm(lo)s(cal)498 +1287 y Fl(T)n(yp)r(e:)g Fm(required)498 1386 y Fl(Sp)r(eci\014ed)28 +b(as:)37 b(a)27 b(rank)f(one)i(arra)n(y)-7 b(.)291 1553 +y Fm(x)41 b Fl(The)28 b(initial)g(guess.)498 1652 y(Scop)r(e:)37 +b Fm(lo)s(cal)498 1752 y Fl(T)n(yp)r(e:)g Fm(required)498 +1851 y Fl(Sp)r(eci\014ed)28 b(as:)37 b(a)27 b(rank)f(one)i(arra)n(y)-7 +b(.)291 2017 y Fm(eps)41 b Fl(The)27 b(stopping)g(tolerance.)498 +2117 y(Scop)r(e:)37 b Fm(global)498 2217 y Fl(T)n(yp)r(e:)g +Fm(required)498 2316 y Fl(Sp)r(eci\014ed)28 b(as:)37 +b(a)27 b(real)f(n)n(um)n(b)r(er.)291 2482 y Fm(decomp)p +618 2482 29 4 v 33 w(data)42 b Fl(con)n(tains)27 b(data)g(structures)g +(for)g(comm)n(unications.)498 2582 y(Scop)r(e:)37 b Fm(lo)s(cal)498 +2682 y Fl(T)n(yp)r(e:)g Fm(required)498 2781 y Fl(Sp)r(eci\014ed)28 +b(as:)37 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g Fg(x)f Fl(1.)291 2947 y Fm(itmax)40 b Fl(The)28 b(maxim)n(um)f(n)n +(um)n(b)r(er)h(of)f(iterations)g(to)g(p)r(erform.)498 +3047 y(Scop)r(e:)37 b Fm(global)498 3147 y Fl(T)n(yp)r(e:)g +Fm(optional)498 3246 y Fl(Default:)h Fh(itmax)23 b Fl(=)f(1000.)498 +3346 y(Sp)r(eci\014ed)28 b(as:)37 b(an)27 b(in)n(teger)g(v)-5 +b(ariable)26 b Fh(itmax)d Fg(\025)g Fl(1.)291 3512 y +Fm(itrace)42 b Fl(A)28 b(tracing)e(parameter.)498 3611 +y(Scop)r(e:)37 b Fm(global)498 3711 y Fl(T)n(yp)r(e:)g +Fm(optional)498 3811 y Fl(Default:)h Fh(itr)r(ace)23 +b Fl(=)f Fg(\000)p Fl(1.)498 3910 y(Sp)r(eci\014ed)33 +b(as:)45 b(an)32 b(in)n(teger)g(v)-5 b(ariable)31 b Fg(\000)p +Fl(1)f Fg(\024)g Fh(itr)r(ace)g Fg(\024)h Fl(99;)i(if)g +Fh(itr)r(ace)e(>)f Fg(\000)p Fl(1,)i(it)h(is)498 4010 +y(in)n(terpreted)25 b(as)f(a)g(F)n(OR)-7 b(TRAN)25 b(unit)h(n)n(um)n(b) +r(er,)f(and)g(is)g(used)g(to)f(prin)n(t)h(a)g(summary)498 +4110 y(of)j(the)g(con)n(v)n(ergence)d(parameters)h(at)h(eac)n(h)g +(iteration.)291 4276 y Fm(On)k(Return)291 4442 y(x)41 +b Fl(The)28 b(computed)g(solution.)498 4541 y(Scop)r(e:)37 +b Fm(lo)s(cal)498 4641 y Fl(T)n(yp)r(e:)g Fm(required)498 +4741 y Fl(Sp)r(eci\014ed)28 b(as:)37 b(a)27 b(rank)f(one)i(arra)n(y)-7 +b(.)291 4907 y Fm(iter)41 b Fl(The)27 b(n)n(um)n(b)r(er)h(of)f +(iterations)g(p)r(erformed.)498 5006 y(Scop)r(e:)37 b +Fm(global)1681 5255 y Fl(79)p eop +%%Page: 80 80 +80 79 bop 946 523 a Fl(T)n(yp)r(e:)37 b Fm(optional)946 +623 y Fl(Returned)28 b(as:)36 b(an)28 b(in)n(teger)e(v)-5 +b(ariable.)739 789 y Fm(err)42 b Fl(The)27 b(error)f(estimate)i(on)f +(exit.)946 888 y(Scop)r(e:)37 b Fm(global)946 988 y Fl(T)n(yp)r(e:)g +Fm(optional)946 1088 y Fl(Returned)28 b(as:)36 b(a)28 +b(real)e(n)n(um)n(b)r(er.)739 1254 y Fm(ierr)41 b Fl(An)28 +b(error)e(co)r(de.)946 1353 y(Scop)r(e:)37 b Fm(global)946 +1453 y Fl(T)n(yp)r(e:)g Fm(optional)946 1553 y Fl(Returned)28 +b(as:)36 b(an)28 b(in)n(teger)e(v)-5 b(ariable:)946 1719 +y Fg(\000)p Fl(1)41 b(In)n(v)-5 b(alid)27 b(preconditioner;)946 +1851 y(0)41 b(Normal)27 b(termination;)946 1984 y Fh(>)c +Fl(0)41 b(Metho)r(d)27 b(did)g(not)g(con)n(v)n(erge;)e(set)h(to)h(the)g +(n)n(um)n(b)r(er)f(of)h(iterations)f(p)r(erformed.)2130 +5255 y(80)p eop +%%Page: 81 81 +81 80 bop 291 750 a Fe(F90)p 518 750 41 4 v 48 w(BICG)44 +b(|BiCG)i(Iterativ)l(e)h(Metho)t(d)291 1008 y Fl(This)33 +b(subroutine)g(implemen)n(ts)g(the)h(BiCG)f(metho)r(d)h(with)g +(restarting.)52 b(The)33 b(stopping)291 1108 y(criterion)24 +b(is)i(the)g(norm)n(wise)e(bac)n(kw)n(ard)g(error,)g(in)i(the)g +(in\014nit)n(y)g(norm,)f(i.e.)37 b(the)26 b(iteration)291 +1208 y(is)h(stopp)r(ed)h(when)1546 1302 y Fg(k)p Fh(r)r +Fg(k)p 1327 1340 561 4 v 1327 1416 a Fl(\()p Fg(k)p Fh(A)p +Fg(kk)p Fh(x)p Fg(k)18 b Fl(+)g Fg(k)p Fh(b)p Fg(k)p +Fl(\))1921 1359 y Fh(<)23 b(eps)291 1666 y Fe(Syn)l(tax)705 +1850 y Fl(CALL)k(F90)p 1101 1850 25 4 v 29 w(BICG)h(\()p +Ff(a,ipr)l(e)l(c,l,u,vdiag,b,x,ep)q(s,de)m(c)l(omp)p +2522 1850 26 4 v 37 w(data)p Fl(\))291 2050 y(CALL)f(F90)p +687 2050 25 4 v 29 w(BICG)856 2149 y(\()p Ff(a,ipr)l(e)l +(c,l,u,vdiag,b,x,eps,)36 b(de)l(c)l(omp)p 2045 2149 26 +4 v 32 w(data,itmax,iter,err,ierr,itr)l(ac)l(e)p Fl(\))291 +2351 y Fm(On)31 b(En)m(try)291 2528 y(iprec)41 b Fl(T)n(yp)r(e)28 +b(of)f(preconditioner)g(to)g(b)r(e)h(computed.)498 2628 +y(Scop)r(e:)37 b Fm(global)498 2727 y Fl(T)n(yp)r(e:)g +Fm(required)498 2827 y Fl(Sp)r(eci\014ed)28 b(as:)37 +b(an)27 b(in)n(teger)g(v)-5 b(ariable:)498 3004 y Fm(0)41 +b Fl(No)28 b(preconditioner;)498 3142 y Fm(1)41 b Fl(Diagonal)27 +b(scaling;)498 3280 y Fm(2)41 b Fl(Lo)r(cal)27 b Fh(I)7 +b(LU)i Fl(\(0\))27 b(preconditioning;)291 3457 y Fm(a)41 +b Fl(the)28 b(lo)r(cal)g(p)r(ortion)f(of)g(global)g(sparse)f(matrix)h +Fh(A)p Fl(.)498 3557 y(Scop)r(e:)37 b Fm(lo)s(cal)498 +3656 y Fl(T)n(yp)r(e:)g Fm(required)498 3756 y Fl(Sp)r(eci\014ed)28 +b(as:)37 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g Fg(x)f Fl(1.)291 3933 y Fm(l)41 b Fl(the)29 b(lo)r(cal)g(p)r(ortion)g +(of)g(global)f(sparse)g(matrix)h Fh(L)g Fl(of)g(the)h +Fh(I)7 b(LU)i Fl(\(0\))28 b(factorization)g(of)i Fh(A)p +Fl(,)498 4032 y(as)d(returned)g(b)n(y)h(the)g Fj(PRECONDITIONER)21 +b Fl(routine.)498 4132 y(Scop)r(e:)37 b Fm(lo)s(cal)498 +4232 y Fl(T)n(yp)r(e:)g Fm(required)p Fl(;)28 b(it)g(is)f(only)g(used)h +(when)g Fh(ipr)r(ec)23 b Fl(=)f(2.)498 4331 y(Sp)r(eci\014ed)28 +b(as:)37 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g Fg(x)f Fl(1.)291 4508 y Fm(u)41 b Fl(the)27 b(lo)r(cal)f(p)r(ortion)g +(of)h(global)e(sparse)g(matrix)h Fh(U)35 b Fl(of)27 b(the)g +Fh(I)7 b(LU)i Fl(\(0\))26 b(factorization)f(of)i Fh(A)p +Fl(,)498 4608 y(as)g(returned)g(b)n(y)h(the)g Fj(PRECONDITIONER)21 +b Fl(routine.)498 4707 y(Scop)r(e:)37 b Fm(lo)s(cal)498 +4807 y Fl(T)n(yp)r(e:)g Fm(required)p Fl(;)28 b(it)g(is)f(only)g(used)h +(when)g Fh(ipr)r(ec)23 b Fl(=)f(2.)498 4907 y(Sp)r(eci\014ed)41 +b(as:)62 b(a)40 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)f(in) +h Fg(x)f Fl(1.)75 b(Sp)r(eci\014ed)41 b(as:)62 b(a)498 +5006 y(structured)27 b(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in)g +Fg(x)g Fl(1.)1681 5255 y(81)p eop +%%Page: 82 82 +82 81 bop 739 523 a Fm(vdiag)42 b Fl(a)27 b(diagonal)f(scaling)g(v)n +(ector.)946 623 y(Scop)r(e:)37 b Fm(lo)s(cal)946 722 +y Fl(T)n(yp)r(e:)g Fm(required)p Fl(;)28 b(it)g(is)g(used)f(when)h +Fh(ipr)r(ec)23 b Fl(=)f(2)28 b(or)e Fh(ipr)r(ec)d Fl(=)g(1.)946 +822 y(Sp)r(eci\014ed)29 b(as:)38 b(a)28 b(rank)g(one)g(arra)n(y)e(,)i +(as)g(returned)g(b)n(y)g(the)h Fj(PRECONDITIONER)22 b +Fl(rou-)946 922 y(tine.)739 1088 y Fm(b)41 b Fl(The)28 +b(RHS)g(v)n(ector.)946 1187 y(Scop)r(e:)37 b Fm(lo)s(cal)946 +1287 y Fl(T)n(yp)r(e:)g Fm(required)946 1386 y Fl(Sp)r(eci\014ed)29 +b(as:)36 b(a)27 b(rank)g(one)g(arra)n(y)-7 b(.)739 1553 +y Fm(x)42 b Fl(The)27 b(initial)h(guess.)946 1652 y(Scop)r(e:)37 +b Fm(lo)s(cal)946 1752 y Fl(T)n(yp)r(e:)g Fm(required)946 +1851 y Fl(Sp)r(eci\014ed)29 b(as:)36 b(a)27 b(rank)g(one)g(arra)n(y)-7 +b(.)739 2017 y Fm(eps)41 b Fl(The)28 b(stopping)f(tolerance.)946 +2117 y(Scop)r(e:)37 b Fm(global)946 2217 y Fl(T)n(yp)r(e:)g +Fm(required)946 2316 y Fl(Sp)r(eci\014ed)29 b(as:)36 +b(a)27 b(real)g(n)n(um)n(b)r(er.)739 2482 y Fm(decomp)p +1066 2482 29 4 v 33 w(data)42 b Fl(con)n(tains)27 b(data)g(structures)g +(for)g(comm)n(unications.)946 2582 y(Scop)r(e:)37 b Fm(lo)s(cal)946 +2682 y Fl(T)n(yp)r(e:)g Fm(required)946 2781 y Fl(Sp)r(eci\014ed)29 +b(as:)36 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g Fg(x)f Fl(1.)739 2947 y Fm(itmax)40 b Fl(The)28 b(maxim)n(um)g(n)n +(um)n(b)r(er)f(of)g(iterations)g(to)h(p)r(erform.)946 +3047 y(Scop)r(e:)37 b Fm(global)946 3147 y Fl(T)n(yp)r(e:)g +Fm(optional)946 3246 y Fl(Default:)h Fh(itmax)23 b Fl(=)g(1000.)946 +3346 y(Sp)r(eci\014ed)29 b(as:)36 b(an)27 b(in)n(teger)g(v)-5 +b(ariable)26 b Fh(itmax)d Fg(\025)g Fl(1.)739 3512 y +Fm(itrace)42 b Fl(A)28 b(tracing)f(parameter.)946 3611 +y(Scop)r(e:)37 b Fm(global)946 3711 y Fl(T)n(yp)r(e:)g +Fm(optional)946 3811 y Fl(Default:)h Fh(itr)r(ace)23 +b Fl(=)g Fg(\000)p Fl(1.)946 3910 y(Sp)r(eci\014ed)33 +b(as:)46 b(an)32 b(in)n(teger)f(v)-5 b(ariable)31 b Fg(\000)p +Fl(1)f Fg(\024)g Fh(itr)r(ace)h Fg(\024)f Fl(99;)k(if)e +Fh(itr)r(ace)f(>)f Fg(\000)p Fl(1,)j(it)f(is)946 4010 +y(in)n(terpreted)25 b(as)f(a)h(F)n(OR)-7 b(TRAN)25 b(unit)g(n)n(um)n(b) +r(er,)h(and)e(is)h(used)g(to)g(prin)n(t)g(a)f(summary)946 +4110 y(of)k(the)g(con)n(v)n(ergence)d(parameters)h(at)h(eac)n(h)g +(iteration.)739 4276 y Fm(On)k(Return)739 4442 y(x)42 +b Fl(The)27 b(computed)h(solution.)946 4541 y(Scop)r(e:)37 +b Fm(lo)s(cal)946 4641 y Fl(T)n(yp)r(e:)g Fm(required)946 +4741 y Fl(Sp)r(eci\014ed)29 b(as:)36 b(a)27 b(rank)g(one)g(arra)n(y)-7 +b(.)739 4907 y Fm(iter)41 b Fl(The)28 b(n)n(um)n(b)r(er)f(of)h +(iterations)e(p)r(erformed.)946 5006 y(Scop)r(e:)37 b +Fm(global)2130 5255 y Fl(82)p eop +%%Page: 83 83 +83 82 bop 498 523 a Fl(T)n(yp)r(e:)37 b Fm(optional)498 +623 y Fl(Returned)28 b(as:)36 b(an)27 b(in)n(teger)g(v)-5 +b(ariable.)291 789 y Fm(err)41 b Fl(The)28 b(error)e(estimate)h(on)h +(exit.)498 888 y(Scop)r(e:)37 b Fm(global)498 988 y Fl(T)n(yp)r(e:)g +Fm(optional)498 1088 y Fl(Returned)28 b(as:)36 b(a)27 +b(real)g(n)n(um)n(b)r(er.)291 1254 y Fm(ierr)41 b Fl(An)28 +b(error)e(co)r(de.)498 1353 y(Scop)r(e:)37 b Fm(global)498 +1453 y Fl(T)n(yp)r(e:)g Fm(optional)498 1553 y Fl(Returned)28 +b(as:)36 b(an)27 b(in)n(teger)g(v)-5 b(ariable:)498 1719 +y Fg(\000)p Fl(1)41 b(In)n(v)-5 b(alid)27 b(preconditioner;)498 +1851 y(0)41 b(Normal)27 b(termination;)498 1984 y Fh(>)c +Fl(0)41 b(Metho)r(d)27 b(did)g(not)g(con)n(v)n(erge;)d(set)j(to)f(the)i +(n)n(um)n(b)r(er)e(of)h(iterations)e(p)r(erformed.)1681 +5255 y(83)p eop +%%Page: 84 84 +84 83 bop 946 766 a Fe(F90)p 1173 766 41 4 v 49 w(BICGST)-11 +b(AB)43 b(|BiCGST)-11 b(AB)43 b(Iterativ)l(e)946 916 +y(Metho)t(d)946 1191 y Fl(This)24 b(subroutine)f(implemen)n(ts)h(the)h +(BiCGST)-7 b(AB)24 b(metho)r(d)g(with)g(restarting.)34 +b(The)946 1291 y(stopping)f(criterion)f(is)h(the)h(norm)n(wise)e(bac)n +(kw)n(ard)f(error,)h(in)i(the)f(in\014nit)n(y)h(norm,)946 +1390 y(i.e.)j(the)28 b(iteration)f(is)h(stopp)r(ed)g(when)2098 +1554 y Fg(k)p Fh(r)r Fg(k)p 1880 1591 561 4 v 1880 1667 +a Fl(\()p Fg(k)p Fh(A)p Fg(kk)p Fh(x)p Fg(k)17 b Fl(+)h +Fg(k)p Fh(b)p Fg(k)p Fl(\))2473 1610 y Fh(<)23 b(eps)946 +1934 y Fe(Syn)l(tax)1043 2116 y Fl(CALL)28 b(F90)p 1440 +2116 25 4 v 29 w(BICGST)-7 b(AB)27 b(\()p Ff(a,ipr)l(e)l +(c,l,u,vdiag,b,x,e)q(ps,d)q(e)l(c)l(omp)p 3081 2116 26 +4 v 37 w(data)p Fl(\))739 2315 y(CALL)h(F90)p 1136 2315 +25 4 v 29 w(BICGST)-7 b(AB)1304 2415 y(\()p Ff(a,ipr)l(e)l +(c,l,u,vdiag,b,x,ep)q(s,)36 b(de)l(c)l(omp)p 2494 2415 +26 4 v 31 w(data,itmax,iter,err,ierr,itr)l(ac)l(e)p Fl(\))946 +2630 y Fm(On)c(En)m(try)946 2760 y(iprec)42 b Fl(T)n(yp)r(e)28 +b(of)f(preconditioner)f(to)i(b)r(e)g(computed.)1129 2860 +y(Scop)r(e:)37 b Fm(global)1129 2959 y Fl(T)n(yp)r(e:)g +Fm(required)1129 3059 y Fl(Sp)r(eci\014ed)28 b(as:)37 +b(an)27 b(in)n(teger)g(v)-5 b(ariable:)1129 3189 y Fm(0)41 +b Fl(No)28 b(preconditioner;)1129 3304 y Fm(1)41 b Fl(Diagonal)27 +b(scaling;)1129 3419 y Fm(2)41 b Fl(Lo)r(cal)27 b Fh(I)7 +b(LU)i Fl(\(0\))27 b(preconditioning;)946 3550 y Fm(a)42 +b Fl(the)28 b(lo)r(cal)f(p)r(ortion)g(of)h(global)f(sparse)f(matrix)h +Fh(A)p Fl(.)1129 3649 y(Scop)r(e:)37 b Fm(lo)s(cal)1129 +3749 y Fl(T)n(yp)r(e:)g Fm(required)1129 3849 y Fl(Sp)r(eci\014ed)28 +b(as:)37 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g Fg(x)f Fl(1.)946 3979 y Fm(l)41 b Fl(the)30 b(lo)r(cal)f(p)r(ortion)h +(of)f(global)g(sparse)f(matrix)h Fh(L)g Fl(of)h(the)g +Fh(I)7 b(LU)i Fl(\(0\))29 b(factorization)1129 4079 y(of)f +Fh(A)p Fl(,)g(as)f(returned)g(b)n(y)g(the)h Fj(PRECONDITIONER)22 +b Fl(routine.)1129 4178 y(Scop)r(e:)37 b Fm(lo)s(cal)1129 +4278 y Fl(T)n(yp)r(e:)g Fm(required)p Fl(;)28 b(it)g(is)f(only)g(used)h +(when)g Fh(ipr)r(ec)23 b Fl(=)f(2.)1129 4378 y(Sp)r(eci\014ed)28 +b(as:)37 b(a)27 b(structured)g(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in) +g Fg(x)f Fl(1.)946 4508 y Fm(u)42 b Fl(the)27 b(lo)r(cal)f(p)r(ortion)f +(of)i(global)e(sparse)g(matrix)h Fh(U)35 b Fl(of)26 b(the)h +Fh(I)7 b(LU)i Fl(\(0\))26 b(factorization)1129 4608 y(of)i +Fh(A)p Fl(,)g(as)f(returned)g(b)n(y)g(the)h Fj(PRECONDITIONER)22 +b Fl(routine.)1129 4707 y(Scop)r(e:)37 b Fm(lo)s(cal)1129 +4807 y Fl(T)n(yp)r(e:)g Fm(required)p Fl(;)28 b(it)g(is)f(only)g(used)h +(when)g Fh(ipr)r(ec)23 b Fl(=)f(2.)1129 4907 y(Sp)r(eci\014ed)30 +b(as:)39 b(a)28 b(structured)h(data)g(t)n(yp)r(e)g(sp)r(eci\014ed)g(in) +g Fg(x)g Fl(1.)41 b(Sp)r(eci\014ed)30 b(as:)39 b(a)1129 +5006 y(structured)27 b(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in)g +Fg(x)f Fl(1.)2130 5255 y(84)p eop +%%Page: 85 85 +85 84 bop 498 523 a Fm(vdiag)42 b Fl(a)27 b(diagonal)f(scaling)h(v)n +(ector.)681 623 y(Scop)r(e:)37 b Fm(lo)s(cal)681 722 +y Fl(T)n(yp)r(e:)g Fm(required)p Fl(;)27 b(it)h(is)g(used)g(when)f +Fh(ipr)r(ec)c Fl(=)g(2)k(or)g Fh(ipr)r(ec)22 b Fl(=)h(1.)681 +822 y(Sp)r(eci\014ed)28 b(as:)37 b(a)27 b(rank)g(one)h(arra)n(y)d(,)j +(as)f(returned)h(b)n(y)f(the)i Fj(PRECONDITIONER)681 +922 y Fl(routine.)498 1065 y Fm(b)42 b Fl(The)27 b(RHS)i(v)n(ector.)681 +1165 y(Scop)r(e:)37 b Fm(lo)s(cal)681 1265 y Fl(T)n(yp)r(e:)g +Fm(required)681 1364 y Fl(Sp)r(eci\014ed)28 b(as:)36 +b(a)27 b(rank)g(one)g(arra)n(y)-7 b(.)498 1508 y Fm(x)42 +b Fl(The)28 b(initial)g(guess.)681 1608 y(Scop)r(e:)37 +b Fm(lo)s(cal)681 1707 y Fl(T)n(yp)r(e:)g Fm(required)681 +1807 y Fl(Sp)r(eci\014ed)28 b(as:)36 b(a)27 b(rank)g(one)g(arra)n(y)-7 +b(.)498 1951 y Fm(eps)41 b Fl(The)28 b(stopping)f(tolerance.)681 +2051 y(Scop)r(e:)37 b Fm(global)681 2150 y Fl(T)n(yp)r(e:)g +Fm(required)681 2250 y Fl(Sp)r(eci\014ed)28 b(as:)36 +b(a)27 b(real)g(n)n(um)n(b)r(er.)498 2394 y Fm(decomp)p +825 2394 29 4 v 33 w(data)43 b Fl(con)n(tains)26 b(data)h(structures)g +(for)g(comm)n(unications.)681 2493 y(Scop)r(e:)37 b Fm(lo)s(cal)681 +2593 y Fl(T)n(yp)r(e:)g Fm(required)681 2693 y Fl(Sp)r(eci\014ed)28 +b(as:)36 b(a)27 b(structured)h(data)f(t)n(yp)r(e)h(sp)r(eci\014ed)f(in) +h Fg(x)g Fl(1.)498 2837 y Fm(itmax)41 b Fl(The)27 b(maxim)n(um)h(n)n +(um)n(b)r(er)f(of)h(iterations)e(to)i(p)r(erform.)681 +2936 y(Scop)r(e:)37 b Fm(global)681 3036 y Fl(T)n(yp)r(e:)g +Fm(optional)681 3135 y Fl(Default:)g Fh(itmax)23 b Fl(=)g(1000.)681 +3235 y(Sp)r(eci\014ed)28 b(as:)36 b(an)28 b(in)n(teger)e(v)-5 +b(ariable)27 b Fh(itmax)c Fg(\025)f Fl(1.)498 3379 y +Fm(itrace)42 b Fl(A)28 b(tracing)f(parameter.)681 3479 +y(Scop)r(e:)37 b Fm(global)681 3578 y Fl(T)n(yp)r(e:)g +Fm(optional)681 3678 y Fl(Default:)g Fh(itr)r(ace)23 +b Fl(=)g Fg(\000)p Fl(1.)681 3778 y(Sp)r(eci\014ed)32 +b(as:)45 b(an)32 b(in)n(teger)f(v)-5 b(ariable)30 b Fg(\000)p +Fl(1)g Fg(\024)g Fh(itr)r(ace)g Fg(\024)f Fl(99;)34 b(if)e +Fh(itr)r(ace)e(>)g Fg(\000)p Fl(1,)681 3877 y(it)c(is)g(in)n(terpreted) +g(as)g(a)f(F)n(OR)-7 b(TRAN)27 b(unit)g(n)n(um)n(b)r(er,)f(and)g(is)g +(used)g(to)g(prin)n(t)g(a)681 3977 y(summary)h(of)g(the)h(con)n(v)n +(ergence)d(parameters)h(at)i(eac)n(h)e(iteration.)498 +4121 y Fm(On)32 b(Return)498 4265 y(x)42 b Fl(The)28 +b(computed)g(solution.)681 4364 y(Scop)r(e:)37 b Fm(lo)s(cal)681 +4464 y Fl(T)n(yp)r(e:)g Fm(required)681 4563 y Fl(Sp)r(eci\014ed)28 +b(as:)36 b(a)27 b(rank)g(one)g(arra)n(y)-7 b(.)498 4707 +y Fm(iter)41 b Fl(The)28 b(n)n(um)n(b)r(er)f(of)h(iterations)f(p)r +(erformed.)681 4807 y(Scop)r(e:)37 b Fm(global)681 4907 +y Fl(T)n(yp)r(e:)g Fm(optional)681 5006 y Fl(Returned)28 +b(as:)36 b(an)27 b(in)n(teger)g(v)-5 b(ariable.)1681 +5255 y(85)p eop +%%Page: 86 86 +86 85 bop 946 523 a Fm(err)42 b Fl(The)28 b(error)e(estimate)h(on)h +(exit.)1129 623 y(Scop)r(e:)37 b Fm(global)1129 722 y +Fl(T)n(yp)r(e:)g Fm(optional)1129 822 y Fl(Returned)28 +b(as:)36 b(a)27 b(real)g(n)n(um)n(b)r(er.)946 955 y Fm(ierr)42 +b Fl(An)28 b(error)e(co)r(de.)1129 1054 y(Scop)r(e:)37 +b Fm(global)1129 1154 y Fl(T)n(yp)r(e:)g Fm(optional)1129 +1254 y Fl(Returned)28 b(as:)36 b(an)27 b(in)n(teger)g(v)-5 +b(ariable:)1129 1386 y Fg(\000)p Fl(1)41 b(In)n(v)-5 +b(alid)27 b(preconditioner;)1129 1503 y(0)41 b(Normal)27 +b(termination;)1129 1619 y Fh(>)c Fl(0)41 b(Metho)r(d)33 +b(did)g(not)g(con)n(v)n(erge;)g(set)g(to)g(the)g(n)n(um)n(b)r(er)g(of)g +(iterations)f(p)r(er-)1284 1719 y(formed.)2130 5255 y(86)p +eop +%%Page: 87 87 +87 86 bop 498 766 a Fe(F90)p 725 766 41 4 v 49 w(BICGST)-11 +b(ABL)42 b(|BiCGST)-11 b(AB\(L\))44 b(Iterativ)l(e)498 +916 y(Metho)t(d)498 1191 y Fl(This)33 b(subroutine)f(implemen)n(ts)h +(the)g(BiCGST)-7 b(AB\(L\))33 b(metho)r(d)g(with)g(restarting.)498 +1291 y(The)40 b(stopping)g(criterion)e(is)i(the)g(norm)n(wise)f(bac)n +(kw)n(ard)f(error,)j(in)f(the)h(in\014nit)n(y)498 1390 +y(norm,)27 b(i.e.)37 b(the)28 b(iteration)f(is)h(stopp)r(ed)f(when)1650 +1554 y Fg(k)p Fh(r)r Fg(k)p 1431 1591 561 4 v 1431 1667 +a Fl(\()p Fg(k)p Fh(A)p Fg(kk)p Fh(x)p Fg(k)18 b Fl(+)g +Fg(k)p Fh(b)p Fg(k)p Fl(\))2025 1610 y Fh(<)k(eps)498 +1934 y Fe(Syn)l(tax)569 2116 y Fl(CALL)27 b(F90)p 965 +2116 25 4 v 29 w(BICGST)-7 b(ABL)28 b(\()p Ff(a,ipr)l(e)l +(c,l,u,vdiag,b,x,ep)q(s,de)m(c)l(omp)p 2658 2116 26 4 +v 37 w(data)p Fl(\))291 2315 y(CALL)f(F90)p 687 2315 +25 4 v 29 w(BICGST)-7 b(ABL)741 2415 y(\()p Ff(a,ipr)l(e)l +(c,l,u,vdiag,b,x,ep)q(s,)36 b(de)l(c)l(omp)p 1931 2415 +26 4 v 31 w(data,itmax,iter,err,ierr,itr)l(ac)l(e,ml)p +Fl(\))498 2630 y Fm(On)c(En)m(try)498 2760 y(iprec)42 +b Fl(T)n(yp)r(e)27 b(of)h(preconditioner)e(to)i(b)r(e)g(computed.)681 +2860 y(Scop)r(e:)37 b Fm(global)681 2959 y Fl(T)n(yp)r(e:)g +Fm(required)681 3059 y Fl(Sp)r(eci\014ed)28 b(as:)36 +b(an)28 b(in)n(teger)e(v)-5 b(ariable:)681 3189 y Fm(0)41 +b Fl(No)27 b(preconditioner;)681 3304 y Fm(1)41 b Fl(Diagonal)26 +b(scaling;)681 3419 y Fm(2)41 b Fl(Lo)r(cal)27 b Fh(I)7 +b(LU)i Fl(\(0\))27 b(preconditioning;)498 3550 y Fm(a)42 +b Fl(the)28 b(lo)r(cal)f(p)r(ortion)g(of)h(global)e(sparse)g(matrix)h +Fh(A)p Fl(.)681 3649 y(Scop)r(e:)37 b Fm(lo)s(cal)681 +3749 y Fl(T)n(yp)r(e:)g Fm(required)681 3849 y Fl(Sp)r(eci\014ed)28 +b(as:)36 b(a)27 b(structured)h(data)f(t)n(yp)r(e)h(sp)r(eci\014ed)f(in) +h Fg(x)g Fl(1.)498 3979 y Fm(l)41 b Fl(the)30 b(lo)r(cal)f(p)r(ortion)g +(of)h(global)e(sparse)h(matrix)g Fh(L)g Fl(of)g(the)h +Fh(I)7 b(LU)i Fl(\(0\))29 b(factorization)681 4079 y(of)e +Fh(A)p Fl(,)h(as)f(returned)g(b)n(y)h(the)g Fj(PRECONDITIONER)22 +b Fl(routine.)681 4178 y(Scop)r(e:)37 b Fm(lo)s(cal)681 +4278 y Fl(T)n(yp)r(e:)g Fm(required)p Fl(;)27 b(it)h(is)g(only)f(used)h +(when)f Fh(ipr)r(ec)c Fl(=)g(2.)681 4378 y(Sp)r(eci\014ed)28 +b(as:)36 b(a)27 b(structured)h(data)f(t)n(yp)r(e)h(sp)r(eci\014ed)f(in) +h Fg(x)g Fl(1.)498 4508 y Fm(u)42 b Fl(the)26 b(lo)r(cal)g(p)r(ortion)g +(of)g(global)g(sparse)f(matrix)g Fh(U)35 b Fl(of)27 b(the)g +Fh(I)7 b(LU)i Fl(\(0\))25 b(factorization)681 4608 y(of)i +Fh(A)p Fl(,)h(as)f(returned)g(b)n(y)h(the)g Fj(PRECONDITIONER)22 +b Fl(routine.)681 4707 y(Scop)r(e:)37 b Fm(lo)s(cal)681 +4807 y Fl(T)n(yp)r(e:)g Fm(required)p Fl(;)27 b(it)h(is)g(only)f(used)h +(when)f Fh(ipr)r(ec)c Fl(=)g(2.)681 4907 y(Sp)r(eci\014ed)29 +b(as:)39 b(a)29 b(structured)g(data)f(t)n(yp)r(e)h(sp)r(eci\014ed)h(in) +f Fg(x)g Fl(1.)41 b(Sp)r(eci\014ed)29 b(as:)39 b(a)681 +5006 y(structured)27 b(data)g(t)n(yp)r(e)h(sp)r(eci\014ed)g(in)g +Fg(x)f Fl(1.)1681 5255 y(87)p eop +%%Page: 88 88 +88 87 bop 946 523 a Fm(vdiag)42 b Fl(a)27 b(diagonal)f(scaling)h(v)n +(ector.)1129 623 y(Scop)r(e:)37 b Fm(lo)s(cal)1129 722 +y Fl(T)n(yp)r(e:)g Fm(required)p Fl(;)28 b(it)g(is)f(used)h(when)g +Fh(ipr)r(ec)22 b Fl(=)h(2)k(or)g Fh(ipr)r(ec)c Fl(=)f(1.)1129 +822 y(Sp)r(eci\014ed)29 b(as:)36 b(a)28 b(rank)f(one)g(arra)n(y)f(,)i +(as)f(returned)g(b)n(y)h(the)g Fj(PRECONDITIONER)1129 +922 y Fl(routine.)946 1054 y Fm(b)42 b Fl(The)28 b(RHS)g(v)n(ector.) +1129 1154 y(Scop)r(e:)37 b Fm(lo)s(cal)1129 1254 y Fl(T)n(yp)r(e:)g +Fm(required)1129 1353 y Fl(Sp)r(eci\014ed)28 b(as:)37 +b(a)27 b(rank)f(one)i(arra)n(y)-7 b(.)946 1486 y Fm(x)42 +b Fl(The)28 b(initial)g(guess.)1129 1586 y(Scop)r(e:)37 +b Fm(lo)s(cal)1129 1685 y Fl(T)n(yp)r(e:)g Fm(required)1129 +1785 y Fl(Sp)r(eci\014ed)28 b(as:)37 b(a)27 b(rank)f(one)i(arra)n(y)-7 +b(.)946 1918 y Fm(eps)41 b Fl(The)28 b(stopping)f(tolerance.)1129 +2017 y(Scop)r(e:)37 b Fm(global)1129 2117 y Fl(T)n(yp)r(e:)g +Fm(required)1129 2217 y Fl(Sp)r(eci\014ed)28 b(as:)37 +b(a)27 b(real)f(n)n(um)n(b)r(er.)946 2350 y Fm(decomp)p +1273 2350 29 4 v 33 w(data)43 b Fl(con)n(tains)27 b(data)g(structures)f +(for)i(comm)n(unications.)1129 2449 y(Scop)r(e:)37 b +Fm(lo)s(cal)1129 2549 y Fl(T)n(yp)r(e:)g Fm(required)1129 +2648 y Fl(Sp)r(eci\014ed)28 b(as:)37 b(a)27 b(structured)g(data)g(t)n +(yp)r(e)h(sp)r(eci\014ed)g(in)g Fg(x)f Fl(1.)946 2781 +y Fm(itmax)41 b Fl(The)28 b(maxim)n(um)f(n)n(um)n(b)r(er)g(of)h +(iterations)f(to)g(p)r(erform.)1129 2881 y(Scop)r(e:)37 +b Fm(global)1129 2980 y Fl(T)n(yp)r(e:)g Fm(optional)1129 +3080 y Fl(Default:)h Fh(itmax)23 b Fl(=)f(1000.)1129 +3180 y(Sp)r(eci\014ed)28 b(as:)37 b(an)27 b(in)n(teger)g(v)-5 +b(ariable)26 b Fh(itmax)d Fg(\025)g Fl(1.)946 3313 y +Fm(itrace)43 b Fl(A)27 b(tracing)g(parameter.)1129 3412 +y(Scop)r(e:)37 b Fm(global)1129 3512 y Fl(T)n(yp)r(e:)g +Fm(optional)1129 3611 y Fl(Default:)h Fh(itr)r(ace)23 +b Fl(=)f Fg(\000)p Fl(1.)1129 3711 y(Sp)r(eci\014ed)33 +b(as:)44 b(an)32 b(in)n(teger)f(v)-5 b(ariable)31 b Fg(\000)p +Fl(1)e Fg(\024)h Fh(itr)r(ace)g Fg(\024)g Fl(99;)j(if)f +Fh(itr)r(ace)e(>)g Fg(\000)p Fl(1,)1129 3811 y(it)d(is)f(in)n +(terpreted)f(as)h(a)g(F)n(OR)-7 b(TRAN)26 b(unit)h(n)n(um)n(b)r(er,)f +(and)g(is)g(used)h(to)f(prin)n(t)g(a)1129 3910 y(summary)h(of)g(the)h +(con)n(v)n(ergence)e(parameters)f(at)j(eac)n(h)f(iteration.)946 +4043 y Fm(ml)40 b Fl(The)28 b(\(L\))g(parameter.)1129 +4143 y(Scop)r(e:)37 b Fm(global)1129 4242 y Fl(T)n(yp)r(e:)g +Fm(optional)1129 4342 y Fl(Default:)h Fh(ml)24 b Fl(=)f(1.)1129 +4442 y(Sp)r(eci\014ed)28 b(as:)37 b(an)27 b(in)n(teger)g(v)-5 +b(ariable)26 b Fh(ml)e Fg(\025)f Fl(1)946 4575 y Fm(On)32 +b(Return)946 4707 y(x)42 b Fl(The)28 b(computed)g(solution.)1129 +4807 y(Scop)r(e:)37 b Fm(lo)s(cal)1129 4907 y Fl(T)n(yp)r(e:)g +Fm(required)1129 5006 y Fl(Sp)r(eci\014ed)28 b(as:)37 +b(a)27 b(rank)f(one)i(arra)n(y)-7 b(.)2130 5255 y(88)p +eop +%%Page: 89 89 +89 88 bop 498 523 a Fm(iter)41 b Fl(The)28 b(n)n(um)n(b)r(er)f(of)h +(iterations)f(p)r(erformed.)681 623 y(Scop)r(e:)37 b +Fm(global)681 722 y Fl(T)n(yp)r(e:)g Fm(optional)681 +822 y Fl(Returned)28 b(as:)36 b(an)27 b(in)n(teger)g(v)-5 +b(ariable.)498 955 y Fm(err)42 b Fl(The)28 b(error)d(estimate)j(on)f +(exit.)681 1054 y(Scop)r(e:)37 b Fm(global)681 1154 y +Fl(T)n(yp)r(e:)g Fm(optional)681 1254 y Fl(Returned)28 +b(as:)36 b(a)27 b(real)g(n)n(um)n(b)r(er.)498 1386 y +Fm(ierr)42 b Fl(An)28 b(error)d(co)r(de.)681 1486 y(Scop)r(e:)37 +b Fm(global)681 1586 y Fl(T)n(yp)r(e:)g Fm(optional)681 +1685 y Fl(Returned)28 b(as:)36 b(an)27 b(in)n(teger)g(v)-5 +b(ariable:)681 1818 y Fg(\000)p Fl(1)40 b(In)n(v)-5 b(alid)28 +b(preconditioner;)681 1934 y(0)41 b(Normal)27 b(termination;)681 +2051 y Fh(>)22 b Fl(0)41 b(Metho)r(d)34 b(did)f(not)g(con)n(v)n(erge;)g +(set)g(to)g(the)g(n)n(um)n(b)r(er)g(of)f(iterations)g(p)r(er-)836 +2150 y(formed.)1681 5255 y(89)p eop +%%Page: 90 90 +90 89 bop 739 1187 a Fn(Related)78 b(Publications)840 +1619 y Fl(1.)41 b(Du\013,)c(I.,)f(Marrone,)e(M.,)i(Radicati,)f(G.,)h +(and)e(Vittoli,)i(C.)f Ff(L)l(evel)h(3)g(b)l(asic)g(line)l(ar)946 +1719 y(algebr)l(a)23 b(subpr)l(o)l(gr)l(ams)f(for)g(sp)l(arse)g(matric) +l(es:)35 b(a)22 b(user)f(level)h(interfac)l(e)e Fl(A)n(CM)e(T)-7 +b(rans.)946 1818 y(Math.)37 b(Soft)n(w.)28 b(23,)f(3)g(\(Septem)n(b)r +(er\),)h(379{401,)d(1997.)840 1984 y(2.)41 b(Filipp)r(one,)j(S.)c(and)f +(Cola)5 b(janni,)42 b(M.,)h Ff(PSBLAS:)e(a)g(libr)l(ary)i(for)e(p)l(ar) +l(al)t(lel)i(lib)l(e)l(ar)946 2084 y(algebr)l(a)32 b(c)l(omputation)e +(on)g(sp)l(arse)h(matric)l(es)p Fl(,)d(A)n(CM)g(T)-7 +b(rans.)36 b(Math.)i(Soft)n(w.)28 b(26,)f(4)946 2183 +y(\(Decem)n(b)r(er\),)i(2000.)2130 5255 y(90)p eop +%%Trailer +end +userdict /end-hook known{end-hook}if +%%EOF diff --git a/lib/comm.fh b/lib/comm.fh new file mode 100644 index 00000000..40f6cbde --- /dev/null +++ b/lib/comm.fh @@ -0,0 +1,5 @@ + integer, parameter :: nohalo_=0, halo_=4 + integer, parameter :: none_=0,sum_=1,avg_=2,square_root_=3 + integer, parameter :: swap_send=1, swap_recv=2 + integer, parameter :: swap_sync=4,swap_mpi=8 + character, parameter :: all='A',topdef=' ' diff --git a/lib/const.fh b/lib/const.fh new file mode 100644 index 00000000..20299fca --- /dev/null +++ b/lib/const.fh @@ -0,0 +1,6 @@ + integer, parameter :: deadlock_check=0,local_mtrx_check=1 + integer, parameter :: local_comm_check=2,consistency_check=3 + integer, parameter :: global_check=4,order_communication=5 + integer, parameter :: change_represent=6,loc_to_glob_check=7 + integer, parameter :: convert_halo=1,convert_ovrlap=2 + integer, parameter :: act_ret=0, act_abort=1, no_err=0 diff --git a/lib/desc.fh b/lib/desc.fh new file mode 100644 index 00000000..4bd009b3 --- /dev/null +++ b/lib/desc.fh @@ -0,0 +1,13 @@ + integer, parameter :: dec_type_=1,m_=2,n_=3 + integer, parameter :: n_row_=4,n_col_=5,ctxt_=6 + integer, parameter :: loc_to_glob_=7,mpi_c_=9,mdata_size=10 + integer, parameter :: desc_asb=3099, desc_bld=desc_asb+1 + integer, parameter :: desc_upd=desc_bld+1 + integer, parameter :: desc_upd_asb=desc_upd+1 + integer, parameter :: upd_glb=998, upd_loc=997 + integer, parameter :: proc_id_=0,n_elem_recv_=1 + integer, parameter :: elem_recv_=2,n_elem_send_=2 + integer, parameter :: elem_send_=3,n_ovrlp_elem_=1 + integer, parameter :: ovrlp_elem_to_=2 + integer, parameter :: ovrlp_elem_=0, n_dom_ovr_=1 + diff --git a/lib/f90blacs.mod b/lib/f90blacs.mod new file mode 100644 index 00000000..bf08a02c Binary files /dev/null and b/lib/f90blacs.mod differ diff --git a/lib/parts.f90 b/lib/parts.f90 new file mode 100644 index 00000000..10e323d0 --- /dev/null +++ b/lib/parts.f90 @@ -0,0 +1,7 @@ +interface + !.....user passed subroutine..... + subroutine parts(glob_index,nrow,np,pv,nv) + integer, intent (in) :: glob_index,np,nrow + integer, intent (out) :: nv, pv(*) + end subroutine parts +end interface diff --git a/lib/psb_comm_mod.mod b/lib/psb_comm_mod.mod new file mode 100644 index 00000000..119713e8 Binary files /dev/null and b/lib/psb_comm_mod.mod differ diff --git a/lib/psb_const.fh b/lib/psb_const.fh new file mode 100644 index 00000000..71015db6 --- /dev/null +++ b/lib/psb_const.fh @@ -0,0 +1,38 @@ + integer, parameter :: psb_nohalo_=0, psb_halo_=4 + integer, parameter :: psb_none_=0,psb_sum_=1 + integer, parameter :: psb_avg_=2,psb_square_root_=3 + integer, parameter :: psb_swap_send_=1,psb_swap_recv_=2 + integer, parameter :: psb_swap_sync_=4,psb_swap_mpi_=8 + integer, parameter :: psb_deadlock_check_=0 + integer, parameter :: psb_local_mtrx_check_=1 + integer, parameter :: psb_local_comm_check_=2 + integer, parameter :: psb_consistency_check_=3 + integer, parameter :: psb_global_check_=4 + integer, parameter :: psb_order_communication_=5 + integer, parameter :: psb_change_represent_=6 + integer, parameter :: psb_loc_to_glob_check_=7 + integer, parameter :: psb_convert_halo_=1 + integer, parameter :: psb_convert_ovrlap_=2 + integer, parameter :: psb_act_ret_=0 + integer, parameter :: psb_act_abort_=1, no_err_=0 + integer, parameter :: psb_dec_type_=1,psb_m_=2,psb_n_=3 + integer, parameter :: psb_n_row_=4,psb_n_col_=5,psb_ctxt_=6 + integer, parameter :: psb_loc_to_glob_=7 + integer, parameter :: psb_mpi_c_=9,psb_mdata_size_=10 + integer, parameter :: psb_desc_asb_=3099 + integer, parameter :: psb_desc_bld_=psb_desc_asb_+1 + integer, parameter :: psb_desc_upd_=psb_desc_bld_+1 + integer, parameter :: psb_desc_upd_asb_=psb_desc_upd_+1 + integer, parameter :: psb_upd_glb_=998,psb_upd_loc_=997 + integer, parameter :: psb_proc_id_=0,psb_n_elem_recv_=1 + integer, parameter :: psb_elem_recv_=2,psb_n_elem_send_=2 + integer, parameter :: psb_elem_send_=3,psb_n_ovrlp_elem_=1 + integer, parameter :: psb_ovrlp_elem_to_=2,psb_ovrlp_elem_=0 + integer, parameter :: psb_nnz_=1, psb_n_dom_ovr_=1 + integer, parameter :: psb_no_comm_=-1, psb_nzsizereq_=3 + integer, parameter :: ione=1, done=1.d0,izero=0, dzero=0.d0 + integer, parameter :: itwo=2, ithree=3,root=0, act_abort=1 + integer, parameter :: psb_nztotreq_=1,psb_nzrowreq_=2 + character, parameter :: psb_all_='A',psb_topdef_=' ' + + diff --git a/lib/psb_const_mod.mod b/lib/psb_const_mod.mod new file mode 100644 index 00000000..95fa30ce Binary files /dev/null and b/lib/psb_const_mod.mod differ diff --git a/lib/psb_desc_mod.mod b/lib/psb_desc_mod.mod new file mode 100644 index 00000000..40d2e2c6 Binary files /dev/null and b/lib/psb_desc_mod.mod differ diff --git a/lib/psb_descriptor_type.mod b/lib/psb_descriptor_type.mod new file mode 100644 index 00000000..1b99628d Binary files /dev/null and b/lib/psb_descriptor_type.mod differ diff --git a/lib/psb_error_mod.mod b/lib/psb_error_mod.mod new file mode 100644 index 00000000..9783fd64 Binary files /dev/null and b/lib/psb_error_mod.mod differ diff --git a/lib/psb_methd_mod.mod b/lib/psb_methd_mod.mod new file mode 100644 index 00000000..097c526a Binary files /dev/null and b/lib/psb_methd_mod.mod differ diff --git a/lib/psb_parts.mod b/lib/psb_parts.mod new file mode 100644 index 00000000..ba7b3052 Binary files /dev/null and b/lib/psb_parts.mod differ diff --git a/lib/psb_parts_mod.mod b/lib/psb_parts_mod.mod new file mode 100644 index 00000000..e62a4a4b Binary files /dev/null and b/lib/psb_parts_mod.mod differ diff --git a/lib/psb_prec_mod.mod b/lib/psb_prec_mod.mod new file mode 100644 index 00000000..7b7c2dad Binary files /dev/null and b/lib/psb_prec_mod.mod differ diff --git a/lib/psb_prec_type.mod b/lib/psb_prec_type.mod new file mode 100644 index 00000000..529f469b Binary files /dev/null and b/lib/psb_prec_type.mod differ diff --git a/lib/psb_psblas_mod.mod b/lib/psb_psblas_mod.mod new file mode 100644 index 00000000..a758197a Binary files /dev/null and b/lib/psb_psblas_mod.mod differ diff --git a/lib/psb_realloc_mod.mod b/lib/psb_realloc_mod.mod new file mode 100644 index 00000000..6abd2daa Binary files /dev/null and b/lib/psb_realloc_mod.mod differ diff --git a/lib/psb_serial_mod.mod b/lib/psb_serial_mod.mod new file mode 100644 index 00000000..b9213389 Binary files /dev/null and b/lib/psb_serial_mod.mod differ diff --git a/lib/psb_spmat_type.mod b/lib/psb_spmat_type.mod new file mode 100644 index 00000000..302def40 Binary files /dev/null and b/lib/psb_spmat_type.mod differ diff --git a/lib/psb_string_mod.mod b/lib/psb_string_mod.mod new file mode 100644 index 00000000..34af5fdc Binary files /dev/null and b/lib/psb_string_mod.mod differ diff --git a/lib/psb_tools_mod.mod b/lib/psb_tools_mod.mod new file mode 100644 index 00000000..6579e8c1 Binary files /dev/null and b/lib/psb_tools_mod.mod differ diff --git a/lib/psblas.fh b/lib/psblas.fh new file mode 100644 index 00000000..01d7e51b --- /dev/null +++ b/lib/psblas.fh @@ -0,0 +1,3 @@ + include 'desc.fh' + include 'comm.fh' + include 'const.fh' diff --git a/lib/psblas.h b/lib/psblas.h new file mode 100644 index 00000000..5bc24c9c --- /dev/null +++ b/lib/psblas.h @@ -0,0 +1,819 @@ +/* --------------------------------------------------------------------- +* +* -- PSBLAS routine (version 1.0) -- +* +* --------------------------------------------------------------------- +*/ + +/* +* This file includes the standard C libraries, as well as system +* dependent include files. All PSBLAS routines include this file. +*/ +#include + +#ifndef PSBLASH +#define PSBLASH +/* +* ======================================================================== +* Machine Specific PBLAS macros +* ======================================================================== +*/ +/* This is a debugging option. + #define PS_CONTROL_LEVEL */ + +#define _HAL_ 0 +#define _T3D_ 1 + +#ifdef T3D +#define _MACH_ _T3D_ +#endif + +#ifndef _MACH_ +#define _MACH_ _HAL_ +#endif + +/* +* ======================================================================== +* Include files +* ======================================================================== +*/ +#include +#include +#include + +#if( _MACH_ == _T3D_ ) +#include +#endif + +#ifdef USE_FBLACS +#ifndef CTOF_BLACS +#include "ctof_blacs.h" +#endif +#endif + + + +/* +* ======================================================================== +* FORTRAN <-> C interface +* ======================================================================== +* +* These macros define how the PBLAS will be called. _F2C_ADD_ assumes +* that they will be called by FORTRAN, which expects C routines to have +* an underscore postfixed to the name (Suns, and Intel machines expect +* this). _F2C_NOCHANGE indicates that FORTRAN will be calling, and that +* it expects the name called by FORTRAN to be identical to that compiled +* by the C (RS6K's do this). _F2C_UPCASE says it expects C routines +* called by FORTRAN to be in all upcase (CRAY wants this). +*/ + +#define _F2C_ADD_ 0 +#define _F2C_NOCHANGE 1 +#define _F2C_UPCASE 2 + +#ifdef UpCase +#define _F2C_CALL_ _F2C_UPCASE +#endif + +#ifdef NoChange +#define _F2C_CALL_ _F2C_NOCHANGE +#endif + +#ifdef Add_ +#define _F2C_CALL_ _F2C_ADD_ +#endif + +#ifndef _F2C_CALL_ +#define _F2C_CALL_ _F2C_ADD_ +#endif + +/* +* ======================================================================== +* TYPE DEFINITIONS AND CONVERSION UTILITIES +* ======================================================================== +*/ + +typedef struct { float re, im; } complex; +typedef struct { double re, im; } complex16; + +#if( _MACH_ == _T3D_ ) + /* Type of character argument in a FORTRAN call */ +#define F_CHAR _fcd + /* Character conversion utilities */ +#define F2C_CHAR(a) ( _fcdtocp( (a) ) ) +#define C2F_CHAR(a) ( _cptofcd( (a), 1 ) ) + /* Type of FORTRAN functions */ +#define F_VOID_FCT void fortran /* Subroutine */ +#define F_INTG_FCT int fortran /* INTEGER function */ +#define F_DBLE_FCT double fortran /* DOUBLE PRECISION function */ + +#else + /* Type of character argument in a FORTRAN call */ +typedef char * F_CHAR; + /* Character conversion utilities */ +#define F2C_CHAR(a) (a) +#define C2F_CHAR(a) (a) + /* Type of FORTRAN functions */ +#define F_VOID_FCT void /* Subroutine */ +#define F_INTG_FCT int /* INTEGER function */ +#define F_DBLE_FCT double /* DOUBLE PRECISION function */ + +#endif + +/* +* ====================================================================== +* FUNCTIONS PROTOTYPES +* ====================================================================== +*/ +void DVSct(int n, int k,int idx[],int flag, double X[], int lx, + double beta, double Y[], int ly); +void DVGth(int n, int k,int idx[],int flag, double X[], int lx,double Y[], int ly); +void IVSct(int n, int k,int idx[],int flag, int X[], int lx, + int beta, int Y[], int ly); +void IVGth(int n, int k,int idx[],int flag, int X[], int lx,int Y[], int ly); + +void PSI_dSwapData(int iflag, int n, double beta, double Y[], int ly, + int desc_data[], int desc_halo[], + double *work, int *lwork, int *ierror); + +void PSI_dSwapTran(int flag, int n, double beta, double Y[], int ly, + int desc_data[], int desc_halo[], + double *work, int *lwork, int *ierror); + +void PSI_zSwapData(int n, double Y[], int ly, int desc_data[], int desc_halo[], + double *work, int *lwork, int *ierror); + +void PSI_zSwapOverlap(double Y[], double Sum_Ovrlap[], int desc_data[], + int desc_ovrlap[], double work[], int *lwork, int *ierror); +void PSI_iSwapData(int iflag, int n, int beta, int Y[], int ly, + int desc_data[], int desc_halo[], + int *work, int *lwork, int *ierror); + +void PSI_iSwapTran(int flag, int n, int beta, int Y[], int ly, + int desc_data[], int desc_halo[], + int *work, int *lwork, int *ierror); + +/* +* ======================================================================== +* #DEFINE MACRO CONSTANTS +* ======================================================================== +*/ +/* MACRO max */ +#define max(x,y) ((x)>(y)?(x):(y)) + +/*MACRO for ovrlap update*/ +#define NOHALO_ 0 +#define HALO_ 4 +#define NONE_ 0 +#define SUM_ 1 +#define AVG_ 2 +#define SQUARE_ROOT_ 3 + +/* Bit fields to control swapdata/ovrlap behaviour. + BEWARE: check consistency with tools_const.f. + Should it be automated? */ +#define SWAP_SEND 1 +#define SWAP_RECV 2 +#define SWAP_SYNC 4 +#define SWAP_MPI 8 + + +/* Macro for MATRIX_DATA array */ +#define DEC_TYPE_ 0 /* The type of decomposition of global + matrix A. */ +#define M_ 1 /* Number of equations */ +#define N_ 2 /* Number of variables */ +#define N_ROW_ 3 /* The number of row of local matrix. */ +#define N_COL_ 4 /* The number of columns of local + matrix. */ +#define CTXT_ 5 /* The BLACS context handle, indicating + the global context of the operation + on the matrix. + The context itself is global. */ +#define LOC_TO_GLOB_ 6 /* The pointer to the array + loc_to_glob */ +#define MPI_C_ 8 /* The MPI Fortran handle */ +/* values for DEC_TYPE_ */ +#define DESC_ASB 3099 +#define DESC_BLD (DESC_ASB+1) + +/* Macro for HALO array */ +#define PROC_ID_ 0 /* The identifier of domain. */ +#define N_ELEM_RECV_ 1 /* The number of elements to receive*/ +#define ELEM_RECV_ 2 /* The first index of local elements */ +#define N_ELEM_SEND_ 2 /* The number of elements to send */ +#define ELEM_SEND_ 3 /* The first index of local elements */ + +/* Macro for OVERLAP array */ +#define N_OVRLP_ELEM_ 1 /* The number of overlap elements to recv/send */ +#define OVRLP_ELEM_TO_ 2 /* The first index of local elements */ + +/* Macro for OVR_ELEM_D array */ +#define OVRLP_ELEM_ 0 +#define N_DOM_OVR_ 1 + +#define BROADCAST "B" /* Blacs operation definitions */ +#define COMBINE "C" + +#define ALL "A" /* Scope definitions */ +#define COLUMN "C" +#define ROW "R" + +#define TOPDEF " " /* Default BLACS topology, PB-BLAS routines */ +#define CTOPDEF ' ' +#define TOPGET "!" + +#define YES "Y" +#define NO "N" + +#define MULLENFAC 2 + +#define ONE 1.0 +#define ZERO 0.0 + +/* Integer values for error checking */ +#define no_err 0 +#define act_ret 0 +#define act_abort 1 + + +/* +* ======================================================================== +* PREPROCESSOR MACRO FUNCTIONS USED FOR OPTIMIZATION & CONVENIENCE +* ======================================================================== +*/ + +#define ABS(a) ((a > 0) ? (a) : (-a)) + +#define MIN(a,b) ((a < b) ? (a) : (b)) + +#define MAX(a,b) ((a > b) ? (a) : (b)) + +#define CEIL(a,b) ( (a+b-1) / (b) ) + +#define Mlowcase(C) ( ((C) > 64 && (C) < 91) ? (C) | 32 : (C) ) + +#define Mupcase(C) ( ((C) > 96 && (C) < 123) ? (C) & 0xDF : (C) ) + +#define INDXG2L( iglob, nb, iproc, isrcproc, nprocs )\ + ( (nb) * ( ( (iglob)-1) / ( (nb) * (nprocs) ) ) +\ + ( ( (iglob) - 1 ) % (nb) ) + 1 ) + +#define INDXL2G( iloc, nb, iproc, isrcproc, nprocs )\ + ( (nprocs) * (nb) * ( ( (iloc) - 1 ) / (nb) ) +\ + ( ( (iloc) - 1 ) % (nb) ) +\ + ( ( (nprocs) + (iproc) - (isrcproc) ) % (nprocs) ) * (nb) + 1 ) + +#define INDXG2P( iglob, nb, iproc, isrcproc, nprocs ) \ + ( ( (isrcproc) + ( (iglob) - 1 ) / (nb) ) % (nprocs) ) + +#define MYROC0( nblocks, n, nb, nprocs )\ + ( ( (nblocks) % (nprocs) ) ? ( ( (nblocks) / (nprocs) ) * (nb) + (nb) )\ + : ( ( (nblocks) / (nprocs) )* (nb) + ( (n) % (nb) ) ) ) + +#if( _F2C_CALL_ == _F2C_ADD_ ) +/* +* These defines set up the naming scheme required to have a FORTRAN +* routine call a C routine (which is what the PBLAS are written in). +* No redefinition necessary to have following FORTRAN to C interface: +* FORTRAN CALL C DECLARATION +* call pdgemm(...) void pdgemm_(...) +* +* This is the default. +*/ +#define dcsmm dcsmm_ +#define dcssm dcssm_ +#define dcsnmi dcsnmi_ +#define idamax idamax_ +#define izamax izamax_ +#define ddot ddot_ +#define dasum dasum_ +#define daxpby daxpby_ +#define dscal dscal_ +#define zcsmm zcsmm_ +#define zcssm zcssm_ +#define zcsnmi zcsnmi_ +#define zdot zdot_ +#define dzasum dzasum_ +#define zaxpby zaxpby_ +#define zscal zscal_ +#define pbchkvectf pbchkvectf_ +#define fcpsb_errcomm fcpsb_errcomm_ +#define fcpsb_erractionsave fcpsb_erractionsave_ +#define fcpsb_erractionrestore fcpsb_erractionrestore_ +#define fcpsb_perror fcpsb_perror_ +#define fcpsb_serror fcpsb_serror_ +#define fcpsb_errpush fcpsb_errpush_ +#endif + +#if( _F2C_CALL_ == _F2C_UPCASE ) +/* +* These defines set up the naming scheme required to have a FORTRAN +* routine call a C routine (which is what the PBLAS are written in) +* following FORTRAN to C interface: +* FORTRAN CALL C DECLARATION +* call pdgemm(...) void PDGEMM(...) +*/ +#define pbchkvectf PBCHKVECTF /* PSBLAS */ +#define psddot_ PSDDOT +#define psdmdot_ PSDMDOT +#define psddot_sub_ PSDDOT_SUB +#define psdaxpby_ PSDAXPBY +#define psdamax_ PSDAMAX +#define psdmamax_ PSDMAMAX +#define psdasum_ PSDASUM +#define psdnrm2_ PSDNRM2 +#define psdnrmi_ PSDNRMI +#define psdnrmisym_ PSDNRMISYM +#define psdhalo_ PSDHALO +#define psihalo_ PSIHALO +#define psdhred_ PSDHRED +#define psdovrl_ PSDOVRL +#define psdspmm_ PSDSPMM +#define psdswaptran_ PSDSWAPTRAN +#define psdspmmsym_ PSDSPMMSYM +#define psdspsm_ PSDSPSM +#define psderror_ PSDERROR +#define psdverify_ PSDVERIFY +#define psdscatterm_ PSDSCATTERM +#define psdgatherm PSDGATHERM + /* PSBLAS */ +#define pszdotc_ PSZDOTC +#define pszdotu_ PSZDOTU +#define pszmdot_ PSZMDOT +#define pszaxpby_ PSZAXPBY +#define pszamax_ PSZAMAX +#define pszmamax_ PSZMAMAX +#define pszasum_ PSZASUM +#define psznrm2_ PSZNRM2 +#define psznrmi_ PSZNRMI +#define psznrmisym_ PSZNRMISYM +#define pszhalo_ PSZHALO +#define pszovrl_ PSZOVRL +#define pszspmm_ PSZSPMM +#define pszspmmsym_ PSZSPMMSYM +#define pszspsm_ PSZSPSM +#define pszerror_ PSZERROR +#define pszverify_ PSZVERIFY +#define pszscatterm_ PSZSCATTERM +#define pszgatherm_ PSZGATHERM + /* BLACS */ +#define blacs_abort_ BLACS_ABORT +#define blacs_gridinfo_ BLACS_GRIDINFO + +#define igesd2d_ IGESD2D +#define igebs2d_ IGEBS2D +#define itrsd2d_ ITRSD2D +#define itrbs2d_ ITRBS2D +#define igerv2d_ IGERV2D +#define igebr2d_ IGEBR2D +#define itrrv2d_ ITRRV2D +#define itrbr2d_ ITRBR2D +#define igamx2d_ IGAMX2D +#define igamn2d_ IGAMN2D +#define igsum2d_ IGSUM2D + +#define sgesd2d_ SGESD2D +#define sgebs2d_ SGEBS2D +#define strsd2d_ STRSD2D +#define strbs2d_ STRBS2D +#define sgerv2d_ SGERV2D +#define sgebr2d_ SGEBR2D +#define strrv2d_ STRRV2D +#define strbr2d_ STRBR2D +#define sgamx2d_ SGAMX2D +#define sgamn2d_ SGAMN2D +#define sgsum2d_ SGSUM2D + +#define dgesd2d_ DGESD2D +#define dgebs2d_ DGEBS2D +#define dtrsd2d_ DTRSD2D +#define dtrbs2d_ DTRBS2D +#define dgerv2d_ DGERV2D +#define dgebr2d_ DGEBR2D +#define dtrrv2d_ DTRRV2D +#define dtrbr2d_ DTRBR2D +#define dgamx2d_ DGAMX2D +#define dgamn2d_ DGAMN2D +#define dgsum2d_ DGSUM2D + +#define cgesd2d_ CGESD2D +#define cgebs2d_ CGEBS2D +#define ctrsd2d_ CTRSD2D +#define ctrbs2d_ CTRBS2D +#define cgerv2d_ CGERV2D +#define cgebr2d_ CGEBR2D +#define ctrrv2d_ CTRRV2D +#define ctrbr2d_ CTRBR2D +#define cgamx2d_ CGAMX2D +#define cgamn2d_ CGAMN2D +#define cgsum2d_ CGSUM2D + +#define zgesd2d_ ZGESD2D +#define zgebs2d_ ZGEBS2D +#define ztrsd2d_ ZTRSD2D +#define ztrbs2d_ ZTRBS2D +#define zgerv2d_ ZGERV2D +#define zgebr2d_ ZGEBR2D +#define ztrrv2d_ ZTRRV2D +#define ztrbr2d_ ZTRBR2D +#define zgamx2d_ ZGAMX2D +#define zgamn2d_ ZGAMN2D +#define zgsum2d_ ZGSUM2D + /* Level-1 BLAS */ +#define srotg_ SROTG +#define srotmg_ SROTMG +#define srot_ SROT +#define srotm_ SROTM +#define sswap_ SSWAP +#define sscal_ SSCAL +#define scopy_ SCOPY +#define saxpy_ SAXPY +#define ssdot_ SSDOT +#define isamax_ ISAMAX + +#define drotg_ DROTG +#define drotmg_ DROTMG +#define drot_ DROT +#define drotm_ DROTM +#define dswap_ DSWAP +#define dscal_ DSCAL +#define dcopy_ DCOPY +#define daxpy_ DAXPY +#define dddot_ DDDOT +#define dnrm2_ DNRM2 +#define dsnrm2_ DSNRM2 +#define dasum_ DASUM +#define dsasum_ DSASUM +#define idamax_ IDAMAX +#define daxpby_ DAXPBY + +#define zaxpby_ ZAXPBY /* to match added internal function */ + +#define cswap_ CSWAP +#define cscal_ CSCAL +#define csscal_ CSSCAL +#define ccopy_ CCOPY +#define caxpy_ CAXPY +#define ccdotu_ CCDOTU +#define ccdotc_ CCDOTC +#define icamax_ ICAMAX + +#define zswap_ ZSWAP +#define zscal_ ZSCAL +#define zdscal_ ZDSCAL +#define zcopy_ ZCOPY +#define zaxpy_ ZAXPY +#define zzdotu_ ZZDOTU +#define zzdotc_ ZZDOTC +#define dscnrm2_ DSCNRM2 +#define dznrm2_ DZNRM2 +#define dscasum_ DSCASUM +#define dzasum_ DZASUM +#define izamax_ IZAMAX + /* Level-2 BLAS */ +#define sgemv_ SGEMV +#define ssymv_ SSYMV +#define strmv_ STRMV +#define strsv_ STRSV +#define sger_ SGER +#define ssyr_ SSYR +#define ssyr2_ SSYR2 + +#define dgemv_ DGEMV +#define dsymv_ DSYMV +#define dtrmv_ DTRMV +#define dtrsv_ DTRSV +#define dger_ DGER +#define dsyr_ DSYR +#define dsyr2_ DSYR2 + +#define cgemv_ CGEMV +#define chemv_ CHEMV +#define ctrmv_ CTRMV +#define ctrsv_ CTRSV +#define cgeru_ CGERU +#define cgerc_ CGERC +#define cher_ CHER +#define cher2_ CHER2 + +#define zgemv_ ZGEMV +#define zhemv_ ZHEMV +#define ztrmv_ ZTRMV +#define ztrsv_ ZTRSV +#define zgeru_ ZGERU +#define zgerc_ ZGERC +#define zher_ ZHER +#define zher2_ ZHER2 + /* Level-3 BLAS */ +#define sgemm_ SGEMM +#define ssymm_ SSYMM +#define ssyrk_ SSYRK +#define ssyr2k_ SSYR2K +#define strmm_ STRMM +#define strsm_ STRSM + +#define dgemm_ DGEMM +#define dsymm_ DSYMM +#define dsyrk_ DSYRK +#define dsyr2k_ DSYR2K +#define dtrmm_ DTRMM +#define dtrsm_ DTRSM + +#define cgemm_ CGEMM +#define chemm_ CHEMM +#define csymm_ CSYMM +#define csyrk_ CSYRK +#define cherk_ CHERK +#define csyr2k_ CSYR2K +#define cher2k_ CHER2K +#define ctrmm_ CTRMM +#define ctrsm_ CTRSM + +#define zgemm_ ZGEMM +#define zhemm_ ZHEMM +#define zsymm_ ZSYMM +#define zsyrk_ ZSYRK +#define zherk_ ZHERK +#define zsyr2k_ ZSYR2K +#define zher2k_ ZHER2K +#define ztrmm_ ZTRMM +#define ztrsm_ ZTRSM + /* Auxilliary PBLAS */ +#define pberror_ PBERROR +#define pbfreebuf_ PBFREEBUF + +#define dcsmm DCSMM +#define dcssm DCSSM +#define dcsnmi DCSNMI +#define zcsnmi ZCSNMI + +#endif + +#if( _F2C_CALL_ == _F2C_NOCHANGE ) +/* +* These defines set up the naming scheme required to have a FORTRAN +* routine call a C routine (which is what the PBLAS are written in) +* for following FORTRAN to C interface: +* FORTRAN CALL C DECLARATION +* call pdgemm(...) void pdgemm(...) +*/ + + + /* PSBLAS */ +#define psddot_ psddot +#define psdmdot_ psdmdot +#define psdaxpby_ psdaxpby +#define psdamax_ psdamax +#define psdmamax_ psdmamax +#define psdasum_ psdasum +#define psdnrm2_ psdnrm2 +#define psdnrmi_ psdnrmi +#define psdnrmisym_ psdnrmisym +#define psdhalo_ psdhalo +#define psihalo_ psihalo +#define psdhred_ psdhred +#define psdovrl_ psdovrl +#define psdspmm_ psdspmm +#define psdswaptran_ psdswaptran +#define psdspmmsym_ psdspmmsym +#define psdspsm_ psdspsm +#define psderror_ psderror +#define psdverify_ psdverify +#define psdscatterm_ psdscatterm +#define psdgatherm_ psdgatherm + +#define pszmdot_ pszmdot +#define pszdotc_ pszdotc +#define pszdotu_ pszdotu +#define pszaxpby_ pszaxpby +#define pszamax_ pszamax +#define pszmamax_ pszmamax +#define pszasum_ pszasum +#define psznrm2_ psznrm2 +#define psznrmi_ psznrmi +#define psznrmisym_ psznrmisym +#define pszhalo_ pszhalo +#define pszovrl_ pszovrl +#define pszspmm_ pszspmm +#define pszspmmsym_ pszspmmsym +#define pszspsm_ pszspsm +#define pszerror_ pszerror +#define pszverify_ pszverify +#define pszscatterm_ pszscatterm +#define pszgatherm_ pszgatherm + + + /* BLACS */ +#define blacs_abort_ blacs_abort +#define blacs_gridinfo_ blacs_gridinfo + +#define igesd2d_ igesd2d +#define igebs2d_ igebs2d +#define itrsd2d_ itrsd2d +#define itrbs2d_ itrbs2d +#define igerv2d_ igerv2d +#define igebr2d_ igebr2d +#define itrrv2d_ itrrv2d +#define itrbr2d_ itrbr2d +#define igamx2d_ igamx2d +#define igamn2d_ igamn2d +#define igsum2d_ igsum2d + +#define sgesd2d_ sgesd2d +#define sgebs2d_ sgebs2d +#define strsd2d_ strsd2d +#define strbs2d_ strbs2d +#define sgerv2d_ sgerv2d +#define sgebr2d_ sgebr2d +#define strrv2d_ strrv2d +#define strbr2d_ strbr2d +#define sgamx2d_ sgamx2d +#define sgamn2d_ sgamn2d +#define sgsum2d_ sgsum2d + +#define dgesd2d_ dgesd2d +#define dgebs2d_ dgebs2d +#define dtrsd2d_ dtrsd2d +#define dtrbs2d_ dtrbs2d +#define dgerv2d_ dgerv2d +#define dgebr2d_ dgebr2d +#define dtrrv2d_ dtrrv2d +#define dtrbr2d_ dtrbr2d +#define dgamx2d_ dgamx2d +#define dgamn2d_ dgamn2d +#define dgsum2d_ dgsum2d + +#define cgesd2d_ cgesd2d +#define cgebs2d_ cgebs2d +#define ctrsd2d_ ctrsd2d +#define ctrbs2d_ ctrbs2d +#define cgerv2d_ cgerv2d +#define cgebr2d_ cgebr2d +#define ctrrv2d_ ctrrv2d +#define ctrbr2d_ ctrbr2d +#define cgamx2d_ cgamx2d +#define cgamn2d_ cgamn2d +#define cgsum2d_ cgsum2d + +#define zgesd2d_ zgesd2d +#define zgebs2d_ zgebs2d +#define ztrsd2d_ ztrsd2d +#define ztrbs2d_ ztrbs2d +#define zgerv2d_ zgerv2d +#define zgebr2d_ zgebr2d +#define ztrrv2d_ ztrrv2d +#define ztrbr2d_ ztrbr2d +#define zgamx2d_ zgamx2d +#define zgamn2d_ zgamn2d +#define zgsum2d_ zgsum2d + /* Level-1 BLAS */ +#define srotg_ srotg +#define srotmg_ srotmg +#define srot_ srot +#define srotm_ srotm +#define sswap_ sswap +#define sscal_ sscal +#define scopy_ scopy +#define saxpy_ saxpy +#define ssdot_ ssdot +#define isamax_ isamax + +#define drotg_ drotg +#define drotmg_ drotmg +#define drot_ drot +#define drotm_ drotm +#define dswap_ dswap +#define dscal_ dscal +#define dcopy_ dcopy +#define daxpy_ daxpy +#define dddot_ dddot +#define dnrm2_ dnrm2 +#define dsnrm2_ dsnrm2 +#define dasum_ dasum +#define dsasum_ dsasum +#define idamax_ idamax +#define daxpby_ daxpby + +#define zaxpby_ zaxpby + +#define cswap_ cswap +#define cscal_ cscal +#define csscal_ csscal +#define ccopy_ ccopy +#define caxpy_ caxpy +#define ccdotu_ ccdotu +#define ccdotc_ ccdotc +#define icamax_ icamax + +#define zswap_ zswap +#define zscal_ zscal +#define zdscal_ zdscal +#define zcopy_ zcopy +#define zaxpy_ zaxpy +#define zzdotu_ zzdotu +#define zzdotc_ zzdotc +#define dscnrm2_ dscnrm2 +#define dznrm2_ dznrm2 +#define dscasum_ dscasum +#define dzasum_ dzasum +#define izamax_ izamax + /* Level-2 BLAS */ +#define sgemv_ sgemv +#define ssymv_ ssymv +#define strmv_ strmv +#define strsv_ strsv +#define sger_ sger +#define ssyr_ ssyr +#define ssyr2_ ssyr2 + +#define dgemv_ dgemv +#define dsymv_ dsymv +#define dtrmv_ dtrmv +#define dtrsv_ dtrsv +#define dger_ dger +#define dsyr_ dsyr +#define dsyr2_ dsyr2 + +#define cgemv_ cgemv +#define chemv_ chemv +#define ctrmv_ ctrmv +#define ctrsv_ ctrsv +#define cgeru_ cgeru +#define cgerc_ cgerc +#define cher_ cher +#define cher2_ cher2 + +#define zgemv_ zgemv +#define zhemv_ zhemv +#define ztrmv_ ztrmv +#define ztrsv_ ztrsv +#define zgeru_ zgeru +#define zgerc_ zgerc +#define zher_ zher +#define zher2_ zher2 + /* Level-3 BLAS */ +#define sgemm_ sgemm +#define ssymm_ ssymm +#define ssyrk_ ssyrk +#define ssyr2k_ ssyr2k +#define strmm_ strmm +#define strsm_ strsm + +#define dgemm_ dgemm +#define dsymm_ dsymm +#define dsyrk_ dsyrk +#define dsyr2k_ dsyr2k +#define dtrmm_ dtrmm +#define dtrsm_ dtrsm + +#define cgemm_ cgemm +#define chemm_ chemm +#define csymm_ csymm +#define csyrk_ csyrk +#define cherk_ cherk +#define csyr2k_ csyr2k +#define cher2k_ cher2k +#define ctrmm_ ctrmm +#define ctrsm_ ctrsm + +#define zgemm_ zgemm +#define zhemm_ zhemm +#define zsymm_ zsymm +#define zsyrk_ zsyrk +#define zherk_ zherk +#define zsyr2k_ zsyr2k +#define zher2k_ zher2k +#define ztrmm_ ztrmm +#define ztrsm_ ztrsm + /* Auxilliary PBLAS */ +#define pberror_ pberror +#define pbfreebuf_ pbfreebuf + +#endif +#endif + + + + + +void pbchkvect( int, int, int, int, int, int, int *, int, int, int *, int *, + int *) ; + +void pbchkmat( int, int, int, int, int, int, int *, int, int, int *, int *, int *); + + + + + + + + + + + + + + + + diff --git a/lib/psi_mod.mod b/lib/psi_mod.mod new file mode 100644 index 00000000..91533e1e Binary files /dev/null and b/lib/psi_mod.mod differ diff --git a/lib/sparker.fh b/lib/sparker.fh new file mode 100644 index 00000000..49b13e54 --- /dev/null +++ b/lib/sparker.fh @@ -0,0 +1,26 @@ + INTEGER MINJDROWS, MAXJDROWS + PARAMETER (MINJDROWS=4, MAXJDROWS=8) + DOUBLE PRECISION PERCENT + INTEGER DBLEINT_ + INTEGER DCMPLXINT_ +C ... This parameter represent sizeof(DOUBLE)/sizeof(INTEGER) ... + PARAMETER (PERCENT=0.7,DBLEINT_=2) + PARAMETER (DCMPLXINT_ = 4) + character fidef*5 + parameter (fidef='CSR') + integer, parameter :: nnz_=1 + integer, parameter :: del_bnd_=6, srtd_=7 + integer, parameter :: state_=8, upd_=9 + integer, parameter :: upd_pnt_=10, ifasize_=10 + integer, parameter :: spmat_null=0, spmat_bld=1 + integer, parameter :: spmat_asb=2, spmat_upd=4 + + integer perm_update + parameter (perm_update=98765) + integer isrtdcoo + parameter (isrtdcoo=98764) + integer ireg_flgs + parameter (ireg_flgs=10) + integer ip2_, iflag_, ipc_, ichk_, nnzt_, zero_ + parameter (ip2_=0, iflag_=2, ichk_=3) + parameter ( nnzt_=4, zero_=5,ipc_=6) diff --git a/littledoc.txt b/littledoc.txt new file mode 100644 index 00000000..d6d57236 --- /dev/null +++ b/littledoc.txt @@ -0,0 +1,118 @@ +L'inizializzazione del sistema prevede, ora, che l'assemblaggio del +descrittore e quello della matrice possano essere eseguiti +indipendentemente. +Durante la sua vita, il descrittore può trovarsi in due differenti +stati: + 1. bld: stato di build. in questo stato è possibile aggiornare il + contenuto del descrittore attraverso la routine psb_dscins. + 2. asb: stato assembled. questo è lo stato della rappresentazione + finale del descrittore ed è raggiunto a valle di una chiamata alla + routine psb_dscasb. + +Durante la sua vita, la matrice può trovarsi in tre differenti +stati: + 1. bld: stato di build. in questo stato è possibile aggiornare il + contenuto dela matrice attraverso la routine psb_spins. + 2. asb: stato assembled. questo è lo stato della rappresentazione + finale della matrice ed è raggiunto a valle di una chiamata alla + routine psb_spasb. + 3. upd: stato di update. è lo stato in cui è possibile (attraverso + una chiamata alla routine psb_spasb) rigenerare la matrice. + + +- Assemblaggio contestuale di matrice e descrittore + + Il procedimento da seguire prevede il seguente ordine di chiamate: + 1. psb_dscall: allocazione del descrittore. Alla fine di questo + step lo stato del descrittore sarà bld + 2. psb_spall: allocazione della matrice. Alla fine di questo + step lo stato della matrice sarà bld + 3. psb_spins: in questo caso sia il descrittore che la matrice + saranno nello stato bld. Quindi la psb_spins invoca la + psb_dscins per portare il descrittore in uno stato pre-asb e + poi effettivamente inserisce i coefficienti nella + matrice (che quindi sarà anch'essa in uno stato + pre-asb). Dunque, nel caso di costruzione/assemblaggio + contestuale di matrise e descrittore, il contenuto del + descrittore è implicitamente aggiornato da questa + chiamata. (nel caso separato bisognerà esplicitamente + prevedere questa fase attraverso una chiamata alla psb_dscins) + 4. psb_dscasb: il descrittore viene assemblato e quindi portato + allo stato asb. + 5. psb_spasb: la matrice viene assemblata e quindi portata + allo stato asb. + + +- Assemblaggio di descrittore e matrice indipendenti + + Il procedimento da seguire per costruire/assemblare il descrittore + prevede il seguente ordine di chiamate: + 1. psb_dscall: allocazione del descrittore. Alla fine di questo + step lo stato del descrittore sarà bld + 2. psb_dscins: il descrittore viene inizializzato a partire dal + pattern di sparsità della matrice e dal partizionamento. Alla + fine di questo step sarà in uno stato pre-asb + 3. psb_dscasb: il descrittore viene assemblato e quindi portato + allo stato asb. + + Il procedimento da seguire per costruire/assemblare la matrice + prevede il seguente ordine di chiamate: + 1. psb_spall: allocazione della matrice. Alla fine di questo + step lo stato della matrice sarà bld + 2. psb_spins: i coefficienti vengono effettivamente inseriti + nella matrice che sarà portata ad uno stato pre-asb. + 3. psb_spasb: la matrice viene assemblata e quindi portata + allo stato asb. + + +- Aggiornamento della matrice + Se il pattern di sparsità della matrice non cambia, la matrice può + essere aggiornata attraverso il seguente procedimento: + 1. psb_sprn: reinizializza la matrice. Alla fine di questo step + la matrice sarà nello stato upd + 2. psb_spins: i coefficienti della matrice vengono reinseriti + 3. psb_spasb: la matrice viene assemblata e riportata nello stato + asb. + + + +La gestione degli errori + + La nuova gestione degli errori prevede la creazione di uno stack di + messaggi di errore che possa consentire di seguire a ritroso la + sequenza di chiamate di routine fino ad arrivare a quella in cui + l'errore è stato rilevato. Tutte le nuove interfacce prevedono un + argomento "info" il quale ritorna un valore > 0 se all'interno della + routine chiamata è stato rilevato un errore. Dunque ogni volta che + si rileva una condizione di errore (o per verifica diretta o perchè + una routine chiamata ha ritornato info>0) occorre mettere l'errore + in cima allo stack per mezzo della routine + psb_errpush(info,name,i_err,a_err) in cui: + info: codice di errore (si veda SRC/F90/errormod.f90 per una + corrispondenza codice-messaggiodierrore) + name: stringa di lunghezza 20 contenente il nome della routine + che invoca la psb_errpush() + i_err: opzionale. E' un array di 5 interi contenente informazioni + aggiuntive per il messaggio di errore (si veda errormod.f90) + a_err: opzionale. E' una stringa di 20 contenente informazioni + aggiuntive per il messaggio di errore (si veda errormod.f90) + + attraverso la routine psb_seterrverbosity si può impostare la + verbosità del messaggio d'errore (se =1 viene stampato solo l'errore + in cima allo stack; se >1 vengono stampati tutti) + + la routine psb_error(ictxt) provoca la stampa degli (dell') errori + (errore) sullo stack ed, eventualmente, stronca il set di processi. + l'argomento ictxt è opzionale: se è assente viene semplicemente + stampato il messaggio d'errore altrimenti viene anche abortita + l'esecuzione di tutti i processi. + + la routine psb_seterraction(action) determina quale azione deve essere + intrapresa a fronte del rilevamento di un errore: + action =0 : la routine in cui è stato rilevato un errore (e quindi + dopo che l'errore stesso sia stato inserito sullo stack) + semplicemente ritorna al chiamante un codice di errore + action =1 : la routine in cui è stato rilevato un errore (e quindi + dopo che l'errore stesso sia stato inserito sullo stack) + prima di ritornare invoca la psb_error (e quindi, può, eventualmente + stroncare l'esecuzione di tutti i processi). diff --git a/notes b/notes new file mode 100644 index 00000000..59719ffc --- /dev/null +++ b/notes @@ -0,0 +1,73 @@ +Struttura: +psblas: +psblas/src: la directory contenente il codice sorgente +psblas/src/comm: contiene tutte le routine preposte allo scambio di + dati +psblas/src/internals: contiene una serie di routine utilizzate per + l'assemblaggio dei descrittori di comunicazione e per + lo scambio di dati (psi_dswap_data e psi_dswap_tran) +psblas/src/methd: contiene l'implementazione dei metodi iterativi +psblas/src/modules: contiene i moduli con le interfacce, le + definizioni di tipi e di costanti +psblas/src/prec: contiene tutte le routine preposte alla generazione e + applicazione dei precondizionatori +psblas/src/psblas: contiene le routine algebriche parallele +psblas/src/serial: contiene l'implementazione seriale di routine + algebriche e ausiliarie +psblas/src/serial/aux: routine ausiliarie (in realtà c'è rimasto be poco) +psblas/src/serial/coo: routine relative al formato coo +psblas/src/serial/csr: routine relative al formato csr +psblas/src/serial/dp: routine per l'assemblaggio e la conversione da + un formato all'altro +psblas/src/serial/f77: si tratta delle routine algebriche. queste + vengono chiamate all'interno delle routine in + psblas/src/psblas. +psblas/src/serial/jad: routine relative al formato jad +psblas/src/tools: tutte le routine per la generazione e rigenerazione + di descrittori e matrici +psblas/test: programmi di test + + +Schema di nomenclatura: +tutti i simboli (quindi routine, tipi dato, costanti, moduli etc...) +devono avere il prefisso psb_. I tipi hanno il suffisso "_type" (quindi +quello che prima era d_spmat adesso diventa psb_dspmat_type), tutti i +moduli hanno il suffisso "_mod". + + +Interfacce subroutine & argomenti: +l'articolo di Carney et al. richiede questa convenzione per l'ordine +degli argomenti: +1- arguments specifying options (tipo TRANS, UNITD etc...) +2- arguments specifying problem dimensions +3- input scalar associated with input matrices +4- description of sparse input matrices (che sarebbe i nostri FIDA e + DESCRA) +5- description of dense input matrices +6- input scalar associated with input-output matrices +7- description of input-output matrices +8- error processing informations +9- workspace +10- length of workspace + +tutto questo va rivisto nell'ottica f90 e, quindi, con i tipi dato +user-defined e con gli argomenti opzionali. +Attualmente, in linea di massima, le interfacce delle routine +algebriche hanno sempre desc_a (il descrittore) e info (il codice di +errore riportato) nelle ultime posizioni prima dei parametri +opzionali. I parametri opzionali contengono sempre gli argomenti al +punto 1 oltre che jx, jy e k che definiscono il sottopreblema su cui +effettuare l'operazione richiesta. Per tutto il resto non c'è uno +schema ben definito e viene, generalmente seguito l'ordine con cui gli +operandi appaiono nella scrittura della formula matematica relativa +all'operazione implementata nella subroutine. si potrebbe pensare a +come risistemare la cosa ma secondo me è abbastanza chiara. + + + + + + + + + diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 00000000..7934c375 --- /dev/null +++ b/src/Makefile @@ -0,0 +1,30 @@ +include ../Make.inc + +lib: + (cd modules; make lib) + (cd comm; make lib) + (cd internals; make lib) + (cd tools; make lib) + (cd serial; make lib) + (cd psblas; make lib) + (cd prec; make lib) + (cd methd; make lib) +clean: + (cd modules; make clean) + (cd comm; make clean) + (cd internals; make clean) + (cd tools; make clean) + (cd serial; make clean) + (cd psblas; make clean) + (cd prec; make clean) + (cd methd; make clean) + +veryclean: + (cd modules; make veryclean) + (cd comm; make veryclean) + (cd internals; make veryclean) + (cd tools; make veryclean) + (cd serial; make veryclean) + (cd psblas; make veryclean) + (cd prec; make veryclean) + (cd methd; make veryclean) diff --git a/src/comm/Makefile b/src/comm/Makefile new file mode 100644 index 00000000..5d87db27 --- /dev/null +++ b/src/comm/Makefile @@ -0,0 +1,17 @@ +include ../../Make.inc + +OBJS = psb_dgather.o psb_dhalo.o psb_dovrl.o \ + psb_ihalo.o + +MPFOBJS = psb_dscatter.o +INCDIRS = ../../lib + +lib: mpfobjs $(OBJS) + + +mpfobjs: + (make $(MPFOBJS) F90="$(MPF90)" FC="$(MPF90)" FCOPT="$(F90COPT)") + + +clean: + /bin/rm -f $(MPFOBJS) $(OBJS) diff --git a/src/comm/psb_dgather.f90 b/src/comm/psb_dgather.f90 new file mode 100644 index 00000000..52000b52 --- /dev/null +++ b/src/comm/psb_dgather.f90 @@ -0,0 +1,321 @@ +! File: psb_dgather.f90 +! +! Subroutine: psb_dgatherm +! This subroutine gathers pieces of a distributed dense matrix into a local one. +! +! Parameters: +! globx - real,dimension(:,:). The local matrix into which gather the distributed pieces. +! locx - real,dimension(:,:). The local piece of the ditributed matrix to be gathered. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! iroot - integer. The process that has to own the global matrix. If -1 all +! the processes will have a copy. +! iiglobx - integer(optional). The starting row of the global matrix. +! ijglobx - integer(optional). The starting column of the global matrix. +! iilocx - integer(optional). The starting row of the local piece of matrix. +! ijlocx - integer(optional). The starting column of the local piece of matrix. +! ik - integer(optional). The number of columns to gather. +! +subroutine psb_dgatherm(globx, locx, desc_a, info, iroot,& + & iiglobx, ijglobx, iilocx,ijlocx,ik) + use psb_descriptor_type + use psb_error_mod + implicit none + + real(kind(1.d0)), intent(in) :: locx(:,:) + real(kind(1.d0)), intent(out) :: globx(:,:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + integer, intent(in), optional :: iroot, iiglobx, ijglobx, iilocx, ijlocx, ik + + + ! locals + integer :: int_err(5), icontxt, nprow, npcol, myrow, mycol,& + & err_act, n, iix, jjx, temp(2), root, iiroot, ilocx, iglobx, jlocx,& + & jglobx, lda_locx, lda_globx, m, lock, globk, maxk, k, jlx, ilx, i, j, idx + real(kind(1.d0)) :: locmax(2), amax + real(kind(1.d0)),pointer :: tmpx(:) + character(len=20) :: name, ch_err + + name='psb_dgatherm' + info=0 + call psb_erractionsave(err_act) + + icontxt=desc_a%matrix_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, myrow, mycol) + if (nprow == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + if (present(iroot)) then + root = iroot + if((root.lt.-1).or.(root.gt.nprow)) then + info=30 + int_err(1:2)=(/5,root/) + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + else + root = -1 + end if + if (root==-1) then + iiroot=0 + endif + + if (present(iiglobx)) then + iglobx = iiglobx + else + iglobx = 1 + end if + + if (present(ijglobx)) then + jglobx = ijglobx + else + jglobx = 1 + end if + + if (present(iilocx)) then + ilocx = iilocx + else + ilocx = 1 + end if + + if (present(ijlocx)) then + jlocx = ijlocx + else + jlocx = 1 + end if + + lda_globx = size(globx,1) + lda_locx = size(locx, 1) + + m = desc_a%matrix_data(psb_m_) + n = desc_a%matrix_data(psb_n_) + + lock=size(locx,2)-jlocx+1 + globk=size(globx,2)-jglobx+1 + maxk=min(lock,globk) + + if(present(ik)) then + if(ik.gt.maxk) then + k=maxk + else + k=ik + end if + else + k = maxk + end if + + if (myrow == iiroot) then + call igebs2d(icontxt, 'all', ' ', 1, 1, k, 1) + else + call igebr2d(icontxt, 'all', ' ', 1, 1, k, 1, iiroot, 0) + end if + + ! there should be a global check on k here!!! + + call psb_chkglobvect(m,n,size(globx,1),iglobx,jglobx,desc_a%matrix_data,info) + call psb_chkvect(m,n,size(locx,1),ilocx,jlocx,desc_a%matrix_data,info,ilx,jlx) + if(info.ne.0) then + info=4010 + ch_err='psb_chk(glob)vect' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if ((ilx.ne.1).or.(iglobx.ne.1)) then + info=3040 + call psb_errpush(info,name) + goto 9999 + end if + + globx(:,:)=0.d0 + + do j=1,k + do i=1,desc_a%matrix_data(psb_n_row_) + idx = desc_a%loc_to_glob(i) + globx(idx,jglobx+j-1) = locx(i,jlx+j-1) + end do + ! adjust overlapped elements + i=0 + do while (desc_a%ovrlap_elem(i).ne.-1) + idx=desc_a%ovrlap_elem(i+psb_ovrlp_elem_) + idx=desc_a%loc_to_glob(idx) + globx(idx,jglobx+j-1) = globx(idx,jglobx+j-1)/desc_a%ovrlap_elem(i+psb_n_dom_ovr_) + i=i+2 + end do + end do + + call dgsum2d(icontxt,'a',' ',m,k,globx(1,jglobx),size(globx,1),root,mycol) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return + +end subroutine psb_dgatherm + + + + + + +! Subroutine: psb_dgatherv +! This subroutine gathers pieces of a distributed dense vector into a local one. +! +! Parameters: +! globx - real,dimension(:). The local vector into which gather the distributed pieces. +! locx - real,dimension(:). The local piece of the ditributed vector to be gathered. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! iroot - integer. The process that has to own the global vector. If -1 all +! the processes will have a copy. +! iiglobx - integer(optional). The starting row of the global vector. +! iilocx - integer(optional). The starting row of the local piece of vector. +! +subroutine psb_dgatherv(globx, locx, desc_a, info, iroot,& + & iiglobx, iilocx) + use psb_descriptor_type + use psb_error_mod + implicit none + + real(kind(1.d0)), intent(in) :: locx(:) + real(kind(1.d0)), intent(out) :: globx(:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + integer, intent(in), optional :: iroot, iiglobx, iilocx + + + ! locals + integer :: int_err(5), icontxt, nprow, npcol, myrow, mycol,& + & err_act, n, iix, jjx, temp(2), root, iiroot, ilocx, iglobx, jlocx,& + & jglobx, lda_locx, lda_globx, lock, maxk, globk, m, k, jlx, ilx, i, j, idx + real(kind(1.d0)) :: locmax(2), amax + real(kind(1.d0)),pointer :: tmpx(:) + character(len=20) :: name, ch_err + + name='psb_dgatherv' + info=0 + call psb_erractionsave(err_act) + + icontxt=desc_a%matrix_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, myrow, mycol) + if (nprow == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + if (present(iroot)) then + root = iroot + if((root.lt.-1).or.(root.gt.nprow)) then + info=30 + int_err(1:2)=(/5,root/) + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + else + root = -1 + end if + if (root==-1) then + iiroot=0 + endif + + jglobx=1 + if (present(iiglobx)) then + iglobx = iiglobx + else + iglobx = 1 + end if + + jlocx=1 + if (present(iilocx)) then + ilocx = iilocx + else + ilocx = 1 + end if + + lda_globx = size(globx) + lda_locx = size(locx) + + m = desc_a%matrix_data(psb_m_) + n = desc_a%matrix_data(psb_n_) + + k = 1 + + if (myrow == iiroot) then + call igebs2d(icontxt, 'all', ' ', 1, 1, k, 1) + else + call igebr2d(icontxt, 'all', ' ', 1, 1, k, 1, iiroot, 0) + end if + + ! there should be a global check on k here!!! + + call psb_chkglobvect(m,n,size(globx),iglobx,jglobx,desc_a%matrix_data,info) + call psb_chkvect(m,n,size(locx),ilocx,jlocx,desc_a%matrix_data,info,ilx,jlx) + if(info.ne.0) then + info=4010 + ch_err='psb_chk(glob)vect' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if ((ilx.ne.1).or.(iglobx.ne.1)) then + info=3040 + call psb_errpush(info,name) + goto 9999 + end if + + globx(:)=0.d0 + + do i=1,desc_a%matrix_data(psb_n_row_) + idx = desc_a%loc_to_glob(i) + globx(idx) = locx(i) + end do + ! adjust overlapped elements + i=0 + do while (desc_a%ovrlap_elem(i).ne.-1) + idx=desc_a%ovrlap_elem(i+psb_ovrlp_elem_) + idx=desc_a%loc_to_glob(idx) + globx(idx) = globx(idx)/desc_a%ovrlap_elem(i+psb_n_dom_ovr_) + i=i+2 + end do + + call dgsum2d(icontxt,'a',' ',m,k,globx,size(globx),root,mycol) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return + +end subroutine psb_dgatherv diff --git a/src/comm/psb_dhalo.f90 b/src/comm/psb_dhalo.f90 new file mode 100644 index 00000000..e42ef05c --- /dev/null +++ b/src/comm/psb_dhalo.f90 @@ -0,0 +1,323 @@ +! File: psb_dhalo.f90 +! +! Subroutine: psb_dhalom +! This subroutine performs the exchange of the halo elements in a distributed dense matrix between all the processes. +! +! Parameters: +! x - real,dimension(:,:). The local part of the dense matrix. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! alpha - real(optional). ???. +! jx - integer(optional). The starting column of the global matrix. +! ik - integer(optional). The number of columns to gather. +! work - real(optional). A working area. +! tran - character(optional). ???. +! mode - integer(optional). +! +subroutine psb_dhalom(x,desc_a,info,alpha,jx,ik,work,tran,mode) + use psb_descriptor_type + use psb_const_mod + use psi_mod + use psb_error_mod + implicit none + + real(kind(1.d0)), intent(inout), target :: x(:,:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + real(kind(1.d0)), intent(in), optional :: alpha + real(kind(1.d0)), intent(inout), target, optional :: work(:) + integer, intent(in), optional :: mode,jx,ik + character, intent(in), optional :: tran + + ! locals + integer :: int_err(5), icontxt, nprow, npcol, myrow, mycol,& + & err_act, m, n, iix, jjx, temp(2), ix, ijx, k, maxk, nrow, imode, i,& + & err, liwork, ncol + real(kind(1.d0)),pointer :: iwork(:), xp(:,:) + character :: ltran + character(len=20) :: name, ch_err + + name='psb_dhalom' + info=0 + call psb_erractionsave(err_act) + + icontxt=desc_a%matrix_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, myrow, mycol) + if (nprow == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + ix = 1 + if (present(jx)) then + ijx = jx + else + ijx = 1 + endif + + m = desc_a%matrix_data(psb_m_) + n = desc_a%matrix_data(psb_n_) + nrow = desc_a%matrix_data(psb_n_row_) + + maxk=size(x,2)-jx+1 + + if(present(ik)) then + if(ik.gt.maxk) then + k=maxk + else + k=ik + end if + else + k = maxk + end if + + if (present(tran)) then + ltran = tran + else + ltran = 'N' + endif + if (present(mode)) then + imode = mode + else + imode = IOR(psb_swap_send_,psb_swap_recv_) + endif + + ! check vector correctness + call psb_chkvect(m,1,size(x,1),ix,ijx,desc_a%matrix_data,info,iix,jjx) + if(info.ne.0) then + info=4010 + ch_err='psb_chkvect' + call psb_errpush(info,name,a_err=ch_err) + end if + + if (iix.ne.1) then + info=3040 + call psb_errpush(info,name) + end if + + err=info + call psb_errcomm(icontxt,err) + if(err.ne.0) goto 9999 + + if(present(alpha)) then + if(alpha.ne.1.d0) then + do i=0, k-1 + call dscal(nrow,alpha,x(1,jjx+i),1) + end do + end if + end if + + liwork=ncol + if (present(work)) then + if(size(work).lt.liwork) then + call psrealloc(liwork,work,info) + if(info.ne.0) then + info=4010 + ch_err='psrealloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + end if + iwork => work + else + call psrealloc(liwork,iwork,info) + if(info.ne.0) then + info=4010 + ch_err='psrealloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + end if + + ! exchange halo elements + xp => x(iix:size(x,1),jjx:jjx+k-1) + if(ltran.eq.'N') then + call psi_swapdata(imode,k,0.d0,xp,& + & desc_a,iwork,info) +!!$ call PSI_dSwapData(imode,k,0.d0,x(1,jjx),& +!!$ & size(x,1),desc_a%matrix_data,& +!!$ & desc_a%halo_index,iwork,liwork,info) + else if((ltran.eq.'T').or.(ltran.eq.'H')) then + call spi_swaptran(imode,k,1.d0,xp,& + &desc_a,iwork,info) +!!$ call PSI_dSwapTran(imode,k,1.d0,x(1,jjx),& +!!$ & size(x,1),desc_a%matrix_data,& +!!$ & desc_a%halo_index,iwork,liwork,info) + end if + + if(info.ne.0) then + call psb_errpush(4010,name,a_err='PSI_dSwap...') + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return +end subroutine psb_dhalom + + + + + +! Subroutine: psb_dhalov +! This subroutine performs the exchange of the halo elements in a distributed dense vector between all the processes. +! +! Parameters: +! x - real,dimension(:). The local part of the dense vector. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! alpha - real(optional). ???. +! work - real(optional). A working area. +! tran - character(optional). ???. +! mode - integer(optional). +! +subroutine psb_dhalov(x,desc_a,info,alpha,work,tran,mode) + use psb_descriptor_type + use psb_const_mod + use psi_mod + use psb_error_mod + implicit none + + real(kind(1.d0)), intent(inout) :: x(:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + real(kind(1.d0)), intent(in), optional :: alpha + real(kind(1.d0)), intent(inout), target, optional :: work(:) + integer, intent(in), optional :: mode + character, intent(in), optional :: tran + + ! locals + integer :: int_err(5), icontxt, nprow, npcol, myrow, mycol,& + & err_act, m, n, iix, jjx, temp(2), ix, ijx, k, maxk, nrow, imode, i,& + & err, liwork, ncol + real(kind(1.d0)),pointer :: iwork(:) + character :: ltran + character(len=20) :: name, ch_err + + name='psb_dhalom' + info=0 + call psb_erractionsave(err_act) + + icontxt=desc_a%matrix_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, myrow, mycol) + if (nprow == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + ix = 1 + ijx = 1 + + m = desc_a%matrix_data(psb_m_) + n = desc_a%matrix_data(psb_n_) + nrow = desc_a%matrix_data(psb_n_row_) + + if (present(tran)) then + ltran = tran + else + ltran = 'N' + endif + if (present(mode)) then + imode = mode + else + imode = IOR(psb_swap_send_,psb_swap_recv_) + endif + + ! check vector correctness + call psb_chkvect(m,1,size(x,1),ix,ijx,desc_a%matrix_data,info,iix,jjx) + if(info.ne.0) then + info=4010 + ch_err='psb_chkvect' + call psb_errpush(info,name,a_err=ch_err) + end if + + if (iix.ne.1) then + info=3040 + call psb_errpush(info,name) + end if + + err=info + call psb_errcomm(icontxt,err) + if(err.ne.0) goto 9999 + + if(present(alpha)) then + if(alpha.ne.1.d0) then + call dscal(nrow,alpha,x,1) + end if + end if + + liwork=ncol + if (present(work)) then + if(size(work).lt.liwork) then + call psrealloc(liwork,work,info) + if(info.ne.0) then + info=4010 + ch_err='psrealloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + end if + iwork => work + else + call psrealloc(liwork,iwork,info) + if(info.ne.0) then + info=4010 + ch_err='psrealloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + end if + + ! exchange halo elements + if(ltran.eq.'N') then + call psi_swapdata(imode,0.d0,x(iix:size(x)),& + & desc_a,iwork,info) + else if((ltran.eq.'T').or.(ltran.eq.'H')) then + call psi_swaptran(imode,1.d0,x(iix:size(x)),& + & desc_a,iwork,info) + end if + + if(info.ne.0) then + call psb_errpush(4010,name,a_err='PSI_dSwap...') + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return +end subroutine psb_dhalov + + + diff --git a/src/comm/psb_dovrl.f90 b/src/comm/psb_dovrl.f90 new file mode 100644 index 00000000..8068cf04 --- /dev/null +++ b/src/comm/psb_dovrl.f90 @@ -0,0 +1,358 @@ +! File: psb_dovrl.f90 +! +! Subroutine: psb_dovrlm +! This subroutine performs the exchange of the overlap elements in a distributed dense matrix between all the processes. +! +! Parameters: +! x - real,dimension(:,:). The local part of the dense matrix. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! jx - integer(optional). The starting column of the global matrix. +! ik - integer(optional). The number of columns to gather. +! work - real(optional). A working area. +! choice - logical(optional). ???. +! update_type - integer(optional). ???. +! +subroutine psb_dovrlm(x,desc_a,info,jx,ik,work,choice,update_type) + use psb_descriptor_type + use psb_const_mod + use psb_error_mod + implicit none + + real(kind(1.d0)), intent(inout) :: x(:,:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + real(kind(1.d0)), intent(inout), optional, target :: work(:) + logical, intent(in), optional :: choice + integer, intent(in), optional :: update_type,jx,ik + + ! locals + integer :: int_err(5), icontxt, nprow, npcol, myrow, mycol,& + & err_act, m, n, iix, jjx, temp(2), ix, ijx, nrow, ncol, k, maxk, iupdate,& + & imode, err, liwork, i + real(kind(1.d0)),pointer :: iwork(:) + logical :: ichoice + character(len=20) :: name, ch_err + + name='psb_dovrlm' + info=0 + call psb_erractionsave(err_act) + + icontxt=desc_a%matrix_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, myrow, mycol) + if (nprow == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + ix = 1 + if (present(jx)) then + ijx = jx + else + ijx = 1 + endif + + m = desc_a%matrix_data(psb_m_) + n = desc_a%matrix_data(psb_n_) + nrow = desc_a%matrix_data(psb_n_row_) + ncol = desc_a%matrix_data(psb_n_col_) + + maxk=size(x,2)-jx+1 + + if(present(ik)) then + if(ik.gt.maxk) then + k=maxk + else + k=ik + end if + else + k = maxk + end if + + if (present(choice)) then + ichoice = choice + else + ichoice = .true. + endif + if (present(update_type)) then + iupdate = update_type + else + iupdate = psb_none_ + endif + + imode = IOR(psb_swap_send_,psb_swap_recv_) + + ! check vector correctness + call psb_chkvect(m,1,size(x,1),ix,ijx,desc_a%matrix_data,info,iix,jjx) + if(info.ne.0) then + info=4010 + ch_err='psb_chkvect' + call psb_errpush(info,name,a_err=ch_err) + end if + + if (iix.ne.1) then + info=3040 + call psb_errpush(info,name) + end if + + err=info + call psb_errcomm(icontxt,err) + if(err.ne.0) goto 9999 + + ! check for presence/size of a work area + liwork=ncol + if (present(work)) then + if(size(work).lt.liwork) then + call psrealloc(liwork,work,info) + if(info.ne.0) then + info=4010 + ch_err='psrealloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + end if + iwork => work + else + call psrealloc(liwork,iwork,info) + if(info.ne.0) then + info=4010 + ch_err='psrealloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + end if + + ! exchange overlap elements + if(ichoice) then + call PSI_dSwapData(imode,k,1.d0,x(1,jjx),& + & size(x,1),desc_a%matrix_data,& + & desc_a%halo_index,iwork,liwork,info) + end if + + if(info.ne.0) then + call psb_errpush(4010,name,a_err='PSI_dSwapData') + goto 9999 + end if + + i=0 + ! switch on update type + select case (iupdate) + case(psb_square_root_) + do while(desc_a%ovrlap_elem(i).ne.-ione) + x(desc_a%ovrlap_elem(i+psb_ovrlp_elem_),:) =& + & x(desc_a%ovrlap_elem(i+psb_ovrlp_elem_),:)/& + & sqrt(real(desc_a%ovrlap_elem(i+psb_n_dom_ovr_))) + i = i+2 + end do + case(psb_avg_) + do while(desc_a%ovrlap_elem(i).ne.-ione) + x(desc_a%ovrlap_elem(i+psb_ovrlp_elem_),:) =& + & x(desc_a%ovrlap_elem(i+psb_ovrlp_elem_),:)/& + & real(desc_a%ovrlap_elem(i+psb_n_dom_ovr_)) + i = i+2 + end do + case(psb_sum_) + ! do nothing + case default + ! wrong value for choice argument + info = 70 + int_err=(/10,iupdate,0,0,0/) + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end select + + + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return +end subroutine psb_dovrlm + + + + + + +! Subroutine: psb_dovrlv +! This subroutine performs the exchange of the overlap elements in a distributed dense vector between all the processes. +! +! Parameters: +! x - real,dimension(:). The local part of the dense vector. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! work - real(optional). A working area. +! choice - logical(optional). ???. +! update_type - integer(optional). ???. +! +subroutine psb_dovrlv(x,desc_a,info,work,choice,update_type) + use psb_descriptor_type + use psb_const_mod + use psb_error_mod + implicit none + + real(kind(1.d0)), intent(inout) :: x(:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + real(kind(1.d0)), intent(inout), optional, target :: work(:) + logical, intent(in), optional :: choice + integer, intent(in), optional :: update_type + + ! locals + integer :: int_err(5), icontxt, nprow, npcol, myrow, mycol,& + & err_act, m, n, iix, jjx, temp(2), ix, ijx, nrow, ncol, k, maxk, iupdate,& + & imode, err, liwork, i + real(kind(1.d0)),pointer :: iwork(:) + logical :: ichoice + character(len=20) :: name, ch_err + + name='psb_dovrlv' + info=0 + call psb_erractionsave(err_act) + + icontxt=desc_a%matrix_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, myrow, mycol) + if (nprow == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + ix = 1 + ijx = 1 + + m = desc_a%matrix_data(psb_m_) + n = desc_a%matrix_data(psb_n_) + nrow = desc_a%matrix_data(psb_n_row_) + ncol = desc_a%matrix_data(psb_n_col_) + + k = 1 + + if (present(choice)) then + ichoice = choice + else + ichoice = .true. + endif + if (present(update_type)) then + iupdate = update_type + else + iupdate = psb_none_ + endif + + imode = IOR(psb_swap_send_,psb_swap_recv_) + + ! check vector correctness + call psb_chkvect(m,1,x,1,ix,ijx,desc_a%matrix_data,info,iix,jjx) + if(info.ne.0) then + info=4010 + ch_err='psb_chkvect' + call psb_errpush(info,name,a_err=ch_err) + end if + + if (iix.ne.1) then + info=3040 + call psb_errpush(info,name) + end if + + err=info + call psb_errcomm(icontxt,err) + if(err.ne.0) goto 9999 + + ! check for presence/size of a work area + liwork=ncol + if (present(work)) then + if(size(work).lt.liwork) then + call psrealloc(liwork,work,info) + if(info.ne.0) then + info=4010 + ch_err='psrealloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + end if + iwork => work + else + call psrealloc(liwork,iwork,info) + if(info.ne.0) then + info=4010 + ch_err='psrealloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + end if + + ! exchange overlap elements + if(ichoice) then + call PSI_dSwapData(imode,k,1.d0,x,& + & x,desc_a%matrix_data,& + & desc_a%halo_index,iwork,liwork,info) + end if + + if(info.ne.0) then + call psb_errpush(4010,name,a_err='PSI_dSwapData') + goto 9999 + end if + + i=0 + ! switch on update type + select case (iupdate) + case(psb_square_root_) + do while(desc_a%ovrlap_elem(i).ne.-ione) + x(desc_a%ovrlap_elem(i+psb_ovrlp_elem_)) =& + & x(desc_a%ovrlap_elem(i+psb_ovrlp_elem_))/& + & sqrt(real(desc_a%ovrlap_elem(i+psb_n_dom_ovr_))) + i = i+2 + end do + case(psb_avg_) + do while(desc_a%ovrlap_elem(i).ne.-ione) + x(desc_a%ovrlap_elem(i+psb_ovrlp_elem_)) =& + & x(desc_a%ovrlap_elem(i+psb_ovrlp_elem_))/& + & real(desc_a%ovrlap_elem(i+psb_n_dom_ovr_)) + i = i+2 + end do + case(psb_sum_) + ! do nothing + case default + ! wrong value for choice argument + info = 70 + int_err=(/10,iupdate,0,0,0/) + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end select + + + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return +end subroutine psb_dovrlv diff --git a/src/comm/psb_dscatter.f90 b/src/comm/psb_dscatter.f90 new file mode 100644 index 00000000..045b2c2d --- /dev/null +++ b/src/comm/psb_dscatter.f90 @@ -0,0 +1,387 @@ +! File: psb_dscatter.f90 +! +! Subroutine: psb_dscatterm +! This subroutine scatters a global matrix locally owned by one process +! into pieces that are local to alle the processes. +! +! Parameters: +! globx - real,dimension(:,:). The global matrix to scatter. +! locx - real,dimension(:,:). The local piece of the ditributed matrix. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! iroot - integer(optional). The process that owns the global matrix. If -1 all +! the processes have a copy. +! iiglobx - integer(optional). The starting row of the global matrix. +! ijglobx - integer(optional). The starting column of the global matrix. +! iilocx - integer(optional). The starting row of the local piece of matrix. +! ijlocx - integer(optional). The starting column of the local piece of matrix. +! ik - integer(optional). The number of columns to gather. +! +subroutine psb_dscatterm(globx, locx, desc_a, info, iroot,& + & iiglobx, ijglobx, iilocx,ijlocx,ik) + + use psb_descriptor_type + use psb_error_mod + implicit none + include 'mpif.h' + + real(kind(1.d0)), intent(out) :: locx(:,:) + real(kind(1.d0)), intent(in) :: globx(:,:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + integer, intent(in), optional :: iroot,iiglobx,& + & ijglobx,iilocx,ijlocx,ik + + + ! locals + integer :: int_err(5), icontxt, nprow, npcol, myrow, mycol,& + & err_act, m, n, iix, jjx, temp(2), i, j, idx, nrow, iiroot, iglobx, jglobx,& + & ilocx, jlocx, lda_locx, lda_globx, lock, globk, icomm, k, maxk, root, ilx,& + & jlx, myrank, rootrank, c, pos + real(kind(1.d0)) :: locmax(2), amax + real(kind(1.d0)),pointer :: scatterv(:) + integer, pointer :: displ(:), l_t_g_all(:), all_dim(:) + integer :: blacs_pnum + character(len=20) :: name, ch_err + + name='psb_scatterm' + info=0 + call psb_erractionsave(err_act) + + icontxt=desc_a%matrix_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, myrow, mycol) + if (nprow == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + if (present(iroot)) then + root = iroot + if((root.lt.-1).or.(root.gt.nprow)) then + info=30 + int_err(1:2)=(/5,root/) + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + else + root = -1 + end if + if (root==-1) then + iiroot=0 + endif + + if (present(iiglobx)) then + iglobx = iiglobx + else + iglobx = 1 + end if + + if (present(ijglobx)) then + jglobx = ijglobx + else + jglobx = 1 + end if + + if (present(iilocx)) then + ilocx = iilocx + else + ilocx = 1 + end if + + if (present(ijlocx)) then + jlocx = ijlocx + else + jlocx = 1 + end if + + lda_globx = size(globx,1) + lda_locx = size(locx, 1) + + m = desc_a%matrix_data(psb_m_) + n = desc_a%matrix_data(psb_n_) + + lock=size(locx,2)-jlocx+1 + globk=size(globx,2)-jglobx+1 + maxk=min(lock,globk) + + if(present(ik)) then + if(ik.gt.maxk) then + k=maxk + else + k=ik + end if + else + k = maxk + end if + + call blacs_get(icontxt,10,icomm) + myrank = blacs_pnum(icontxt,myrow,mycol) + + lda_globx = size(globx) + lda_locx = size(locx) + + m = desc_a%matrix_data(psb_m_) + n = desc_a%matrix_data(psb_n_) + + if (myrow == iiroot) then + call igebs2d(icontxt, 'all', ' ', 1, 1, k, 1) + else + call igebr2d(icontxt, 'all', ' ', 1, 1, k, 1, iiroot, 0) + end if + + ! there should be a global check on k here!!! + + call psb_chkglobvect(m,n,size(globx),iglobx,jglobx,desc_a%matrix_data,info) + call psb_chkvect(m,n,size(locx),ilocx,jlocx,desc_a%matrix_data,info,ilx,jlx) + if(info.ne.0) then + info=4010 + ch_err='psb_chk(glob)vect' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if ((ilx.ne.1).or.(iglobx.ne.1)) then + info=3040 + call psb_errpush(info,name) + goto 9999 + end if + + nrow=desc_a%matrix_data(psb_n_row_) + + if(root.eq.-1) then + ! extract my chunk + do j=1,k + do i=1, nrow + idx=desc_a%loc_to_glob(i) + locx(i,jlocx+j-1)=globx(idx,jglobx+j-1) + end do + end do + else + rootrank = blacs_pnum(icontxt,root,mycol) + end if + + ! root has to gather size information + allocate(displ(nprow),all_dim(nprow)) + call mpi_gather(nrow,1,mpi_integer,all_dim,& + & nprow,mpi_integer,rootrank,icomm,info) + + displ(1)=1 + displ(2:)=all_dim(1:nprow-1)+1 + + ! root has to gather loc_glob from each process + if(myrow.eq.root) then + allocate(l_t_g_all(sum(all_dim)),scatterv(sum(all_dim))) + end if + + call mpi_gatherv(desc_a%loc_to_glob,nrow,& + & mpi_integer,l_t_g_all,sum(all_dim),& + & displ,mpi_integer,rootrank,icomm,info) + + + do c=1, k + ! prepare vector to scatter + if(myrow.eq.root) then + do i=1,nprow + pos=displ(i) + do j=1, all_dim(i) + idx=l_t_g_all(pos+j-1) + scatterv(pos+j-1)=globx(idx,jglobx+c-1) + end do + end do + end if + + ! scatter !!! + call mpi_scatterv(scatterv,sum(all_dim),displ,& + & mpi_double_precision,locx(1,jlocx+c-1),nrow,& + & mpi_double_precision,rootrank,icomm,info) + + end do + + deallocate(all_dim, l_t_g_all, displ, scatterv) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return + +end subroutine psb_dscatterm + + + + + +! Subroutine: psb_dscatterv +! This subroutine scatters a global vector locally owned by one process +! into pieces that are local to alle the processes. +! +! Parameters: +! globx - real,dimension(:). The global vector to scatter. +! locx - real,dimension(:). The local piece of the ditributed vector. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! iroot - integer(optional). The process that owns the global vector. If -1 all +! the processes have a copy. +! +subroutine psb_dscatterv(globx, locx, desc_a, info, iroot) + use psb_descriptor_type + use psb_error_mod + implicit none + include 'mpif.h' + + real(kind(1.d0)), intent(out) :: locx(:) + real(kind(1.d0)), intent(in) :: globx(:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + integer, intent(in), optional :: iroot + + + ! locals + integer :: int_err(5), icontxt, nprow, npcol, myrow, mycol,& + & err_act, m, n, iix, jjx, temp(2), i, j, idx, nrow, iiroot, iglobx, jglobx,& + & ilocx, jlocx, lda_locx, lda_globx, lock, globk, root, k, maxk, icomm, myrank,& + & rootrank, c, pos, ilx, jlx + real(kind(1.d0)) :: locmax(2), amax + real(kind(1.d0)),pointer :: scatterv(:) + integer, pointer :: displ(:), l_t_g_all(:), all_dim(:) + integer :: blacs_pnum + character(len=20) :: name, ch_err + + name='psb_scatterv' + info=0 + call psb_erractionsave(err_act) + + icontxt=desc_a%matrix_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, myrow, mycol) + if (nprow == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + if (present(iroot)) then + root = iroot + if((root.lt.-1).or.(root.gt.nprow)) then + info=30 + int_err(1:2)=(/5,root/) + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + else + root = -1 + end if + + call blacs_get(icontxt,10,icomm) + myrank = blacs_pnum(icontxt,myrow,mycol) + + lda_globx = size(globx) + lda_locx = size(locx) + + m = desc_a%matrix_data(psb_m_) + n = desc_a%matrix_data(psb_n_) + + k = 1 + + if (myrow == iiroot) then + call igebs2d(icontxt, 'all', ' ', 1, 1, k, 1) + else + call igebr2d(icontxt, 'all', ' ', 1, 1, k, 1, iiroot, 0) + end if + + ! there should be a global check on k here!!! + + call psb_chkglobvect(m,n,size(globx),iglobx,jglobx,desc_a%matrix_data,info) + call psb_chkvect(m,n,size(locx),ilocx,jlocx,desc_a%matrix_data,info,ilx,jlx) + if(info.ne.0) then + info=4010 + ch_err='psb_chk(glob)vect' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if ((ilx.ne.1).or.(iglobx.ne.1)) then + info=3040 + call psb_errpush(info,name) + goto 9999 + end if + + nrow=desc_a%matrix_data(psb_n_row_) + + if(root.eq.-1) then + ! extract my chunk + do i=1, nrow + idx=desc_a%loc_to_glob(i) + locx(i)=globx(idx) + end do + else + rootrank = blacs_pnum(icontxt,root,mycol) + end if + + ! root has to gather size information + allocate(displ(nprow),all_dim(nprow)) + call mpi_gather(nrow,1,mpi_integer,all_dim,& + & nprow,mpi_integer,rootrank,icomm,info) + + displ(1)=1 + displ(2:)=all_dim(1:nprow-1)+1 + + ! root has to gather loc_glob from each process + if(myrow.eq.root) then + allocate(l_t_g_all(sum(all_dim)),scatterv(sum(all_dim))) + end if + + call mpi_gatherv(desc_a%loc_to_glob,nrow,& + & mpi_integer,l_t_g_all,sum(all_dim),& + & displ,mpi_integer,rootrank,icomm,info) + + ! prepare vector to scatter + if(myrow.eq.root) then + do i=1,nprow + pos=displ(i) + do j=1, all_dim(i) + idx=l_t_g_all(pos+j-1) + scatterv(pos+j-1)=globx(idx) + end do + end do + end if + + call mpi_scatterv(scatterv,sum(all_dim),displ,& + & mpi_double_precision,locx,nrow,& + & mpi_double_precision,rootrank,icomm,info) + + deallocate(all_dim, l_t_g_all, displ, scatterv) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return + +end subroutine psb_dscatterv diff --git a/src/comm/psb_ihalo.f90 b/src/comm/psb_ihalo.f90 new file mode 100644 index 00000000..ff662a5c --- /dev/null +++ b/src/comm/psb_ihalo.f90 @@ -0,0 +1,319 @@ +! File: psb_ihalo.f90 +! +! Subroutine: psb_ihalom +! This subroutine performs the exchange of the halo elements in a distributed dense matrix between all the processes. +! +! Parameters: +! x - integer,dimension(:,:). The local part of the dense matrix. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! alpha - real(optional). ???. +! jx - integer(optional). The starting column of the global matrix. +! ik - integer(optional). The number of columns to gather. +! work - integer(optional). A working area. +! tran - character(optional). ???. +! mode - integer(optional). +! +subroutine psb_ihalom(x,desc_a,info,alpha,jx,ik,work,tran,mode) + use psb_descriptor_type + use psb_const_mod + use psi_mod + use psb_error_mod + implicit none + + integer, intent(inout), target :: x(:,:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + real(kind(1.d0)), intent(in), optional :: alpha + integer, intent(inout), optional, target :: work(:) + integer, intent(in), optional :: mode,jx,ik + character, intent(in), optional :: tran + + ! locals + integer :: int_err(5), icontxt, nprow, npcol, myrow, mycol,& + & err_act, m, n, iix, jjx, temp(2), ix, ijx, nrow, ncol, k, maxk, liwork,& + & imode, err + integer, pointer :: xp(:,:), iwork(:) + character :: ltran + character(len=20) :: name, ch_err + + name='psb_ihalom' + info=0 + call psb_erractionsave(err_act) + + icontxt=desc_a%matrix_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, myrow, mycol) + if (nprow == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + ix = 1 + if (present(jx)) then + ijx = jx + else + ijx = 1 + endif + + m = desc_a%matrix_data(psb_m_) + n = desc_a%matrix_data(psb_n_) + nrow = desc_a%matrix_data(psb_n_row_) + + maxk=size(x,2)-jx+1 + + if(present(ik)) then + if(ik.gt.maxk) then + k=maxk + else + k=ik + end if + else + k = maxk + end if + + if (present(tran)) then + ltran = tran + else + ltran = 'N' + endif + if (present(mode)) then + imode = mode + else + imode = IOR(psb_swap_send_,psb_swap_recv_) + endif + + ! check vector correctness + call psb_chkvect(m,1,size(x,1),ix,ijx,desc_a%matrix_data,info,iix,jjx) + if(info.ne.0) then + info=4010 + ch_err='psb_chkvect' + call psb_errpush(info,name,a_err=ch_err) + end if + + if (iix.ne.1) then + info=3040 + call psb_errpush(info,name) + end if + + err=info + call psb_errcomm(icontxt,err) + if(err.ne.0) goto 9999 + + + ! we should write an "iscal" +!!$ if(present(alpha)) then +!!$ if(alpha.ne.1.d0) then +!!$ do i=0, k-1 +!!$ call iscal(nrow,alpha,x(1,jjx+i),1) +!!$ end do +!!$ end if +!!$ end if + + liwork=ncol + if (present(work)) then + if(size(work).lt.liwork) then + call psrealloc(liwork,work,info) + if(info.ne.0) then + info=4010 + ch_err='psrealloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + end if + iwork => work + else + call psrealloc(liwork,iwork,info) + if(info.ne.0) then + info=4010 + ch_err='psrealloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + end if + + xp => x(iix:size(x,1),jjx:jjx+k-1) + ! exchange halo elements + if(ltran.eq.'N') then + call psi_swapdata(imode,k,0,xp,& + & desc_a,iwork,info) + else if((ltran.eq.'T').or.(ltran.eq.'H')) then + call psi_swaptran(imode,k,1,xp,& + & desc_a,iwork,info) + end if + + if(info.ne.0) then + call psb_errpush(4010,name,a_err='PSI_iSwap...') + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return +end subroutine psb_ihalom + + + + + +! Subroutine: psb_ihalov +! This subroutine performs the exchange of the halo elements in a distributed dense matrix between all the processes. +! +! Parameters: +! x - integer,dimension(:). The local part of the dense matrix. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! alpha - real(optional). ???. +! work - integer(optional). A working area. +! tran - character(optional). ???. +! mode - integer(optional). +! +subroutine psb_ihalov(x,desc_a,info,alpha,work,tran,mode) + use psb_descriptor_type + use psb_const_mod + use psi_mod + use psb_error_mod + implicit none + + integer, intent(inout) :: x(:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + real(kind(1.d0)), intent(in), optional :: alpha + integer, intent(inout), optional, target :: work(:) + integer, intent(in), optional :: mode + character, intent(in), optional :: tran + + ! locals + integer :: int_err(5), icontxt, nprow, npcol, myrow, mycol,& + & err_act, m, n, iix, jjx, temp(2), ix, ijx, nrow, ncol, k, maxk, imode,& + & err, liwork + integer,pointer :: iwork(:) + character :: ltran + character(len=20) :: name, ch_err + + name='psb_ihalov' + info=0 + call psb_erractionsave(err_act) + + icontxt=desc_a%matrix_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, myrow, mycol) + if (nprow == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + ix = 1 + ijx = 1 + + m = desc_a%matrix_data(psb_m_) + n = desc_a%matrix_data(psb_n_) + nrow = desc_a%matrix_data(psb_n_row_) + + if (present(tran)) then + ltran = tran + else + ltran = 'N' + endif + if (present(mode)) then + imode = mode + else + imode = IOR(psb_swap_send_,psb_swap_recv_) + endif + + ! check vector correctness + call psb_chkvect(m,1,size(x,1),ix,ijx,desc_a%matrix_data,info,iix,jjx) + if(info.ne.0) then + info=4010 + ch_err='psb_chkvect' + call psb_errpush(info,name,a_err=ch_err) + end if + + if (iix.ne.1) then + info=3040 + call psb_errpush(info,name) + end if + + err=info + call psb_errcomm(icontxt,err) + if(err.ne.0) goto 9999 + +!!$ if(present(alpha)) then +!!$ if(alpha.ne.1.d0) then +!!$ call dscal(nrow,alpha,x,1) +!!$ end if +!!$ end if + + liwork=ncol + if (present(work)) then + if(size(work).lt.liwork) then + call psb_realloc(liwork,work,info) + if(info.ne.0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + end if + iwork => work + else + call psrealloc(liwork,iwork,info) + if(info.ne.0) then + info=4010 + ch_err='psrealloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + end if + + ! exchange halo elements + if(ltran.eq.'N') then + call psi_swapdata(imode,0,x(iix:size(x)),& + & desc_a,iwork,info) + else if((ltran.eq.'T').or.(ltran.eq.'H')) then + call psi_swaptran(imode,1,x(iix:size(x)),& + & desc_a,iwork,info) + end if + + if(info.ne.0) then + call psb_errpush(4010,name,a_err='PSI_iSwap...') + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return +end subroutine psb_ihalov + + + diff --git a/src/internals/Makefile b/src/internals/Makefile new file mode 100644 index 00000000..360d77b5 --- /dev/null +++ b/src/internals/Makefile @@ -0,0 +1,21 @@ +include ../../Make.inc + +FOBJS = psi_compute_size.o psi_crea_bnd_elem.o psi_crea_index.o \ + psi_crea_ovr_elem.o psi_dl_check.o \ + psi_exist_ovr_elem.o psi_gthsct.o \ + psi_list_search.o psi_sort_dl.o srtlist.o +COBJS = avltree.o + +MPFOBJS = psi_dswapdata.o psi_dswaptran.o psi_iswapdata.o \ + psi_iswaptran.o psi_extrct_dl.o psi_desc_index.o +INCDIRS = ../../lib . + +lib: mpfobjs $(FOBJS) $(COBJS) + + +mpfobjs: + (make $(MPFOBJS) F90="$(MPF90)" FC="$(MPF90)" FCOPT="$(F90COPT)") + + +clean: + /bin/rm -f $(MPFOBJS) $(FOBJS) $(COBJS) diff --git a/src/internals/avltree.c b/src/internals/avltree.c new file mode 100644 index 00000000..e3420b9d --- /dev/null +++ b/src/internals/avltree.c @@ -0,0 +1,782 @@ +/*****************************************************************/ +/* */ +/* avltree.c: balanced AVL tree search and insertion */ +/* written by: Salvatore Filippone */ +/* */ +/* Last updated: Mar 09 2004 */ +/* */ +/* Referrences: [1] D. E. Knuth */ +/* The Art of Computer Programming */ +/* Vol. 3: Sorting and Searching, sec. 6.2.3 */ +/* Addison-Wesley */ +/* */ +/* General description: */ +/* */ +/* Build and maintain a balanced binary search tree with */ +/* arbitrary keys. The user is responsible for providing */ +/* compare functions operating on the keys themselves. */ +/* Key pointers are stored into nodes that are managed */ +/* by the subroutine calls; the user should never examine */ +/* nodes directly. */ +/* The nodes for user items are allocated in batches, */ +/* and the batches are kept as a doubly linked list. */ +/* */ +/* Data types: */ +/* AVLTree: structure containing pointers to the list */ +/* of node batches and to the root of the binary tree */ +/* structure */ +/* */ +/* AVLNode: binary tree node, containing link pointers */ +/* a reserved field, and a pointer to user data */ +/* */ +/* */ +/* User callable functions: */ +/* */ +/* AVLTreePtr GetAVLTree() */ +/* Purpose: allocate a new tree; */ +/* Function value: a fresh AVL tree pointer; */ +/* returns NULL in case of a memory failure*/ +/* */ +/* */ +/* int AVLTreeReInit(AVLTreePtr Tree) */ +/* Purpose: reinitialize an existing AVL Tree, reusing */ +/* node batches already allocated. */ +/* Input: 1. Tree */ +/* A pointer to an existing tree structure */ +/* Function value: 0 Normal termination */ +/* -1 Invalid input pointer */ +/* -3 Memory allocation failure */ +/* */ +/* AVLNodePtr AVLTreeSearch(AVLTreePtr Tree, void *key, */ +/* int (*comp)(void*, void*)) */ +/* Purpose: search an existing AVL Tree for a key */ +/* Input: 1. Tree */ +/* A valid pointer to a Tree */ +/* 2. key */ +/* The item being searched for */ +/* 3. comp */ +/* A comparison function: */ +/* a comp(a,b)<0 */ +/* a==b => comp(a,b)=0 */ +/* a>b => comp(a,b)>0 */ +/* The function is always invoked as: */ +/* comp(user_key,tree_key); */ +/* */ +/* */ +/* Function value: NULL: input error or item not found */ +/* valid pointer: pointer to a node */ +/* containing the key */ +/* */ +/* int AVLTreeInsert(AVLTreePtr Tree, void *key, */ +/* int (*comp)(void*,void*), */ +/* void (*update)(void*,void*)) */ +/* */ +/* Purpose: Insert an item into an existing (possibly */ +/* empty) tree. */ +/* */ +/* Input: 1. Tree */ +/* The (existing) tree */ +/* 2. key */ +/* The (new) item to be inserted */ +/* 3. comp */ +/* comparison function (as in AVLTreeSearch) */ +/* 4. update */ +/* A user provided function to be called when */ +/* the key is already present in the tree */ +/* with the calling sequence: */ +/* update(new_key,existing_key) */ +/* enables the user to specify an arbitrary */ +/* update procedure. */ +/* */ +/* */ +/* */ +/* AVLNodePtr AVLTreeUserInsert(AVLTreePtr Tree, void *key, */ +/* int (*comp)(void*,void*)) */ +/* */ +/* Purpose: Insert an item into an existing (possibly */ +/* empty) tree; returns a pointer to a node */ +/* containing the item, even when that node */ +/* was already existing; does no update */ +/* */ +/* Input: 1. Tree */ +/* The (existing) tree */ +/* 2. key */ +/* The (new) item to be inserted */ +/* 3. comp */ +/* comparison function (as in AVLTreeSearch) */ +/* */ +/* Function value: Valid pointer: pointer to a node */ +/* containing the item (possibly */ +/* was already there) */ +/* NULL input error or memory failure */ +/* */ +/* */ +/* int HowManyKeys(AVLTreePtr Tree) */ +/* Purpose: how many keys does Tree contain? */ +/* Function value: >=0 */ +/* */ +/* */ +/* void AVLTreeInorderTraverse(AVLTreePtr Tree, */ +/* void (*func)( void*, void*), void *data) */ +/* */ +/* Purpose: visit the nodes of the binary tree in their */ +/* natural order, performing an arbitrary */ +/* task upon visit. */ +/* Input: 1. Tree */ +/* A tree pointer */ +/* 2. func */ +/* A function performing a user specified */ +/* task on each node; the fuction is invoked as */ +/* func( key,data) */ +/* where data is parm. 3 */ +/* 3. data */ +/* Auxiliary data to be passed to func upon */ +/* each visit */ +/* */ +/* int AVLTreeInorderTraverseWithDelims(AVLTreePtr Tree, */ +/* void *first, void *last, int (*comp)(void*,void*) */ +/* void (*func)( void*, void*), void *data) */ +/* */ +/* Purpose: visit the nodes of the binary tree in their */ +/* natural order, performing an arbitrary */ +/* task upon visit, but only on nodes */ +/* with their key within a specified range. */ +/* */ +/* Input: 1. Tree */ +/* A tree pointer */ +/* 2. first */ +/* Visit nodes with first <= node->key */ +/* 3. last */ +/* Visit nodes with node->key <= last */ +/* 4. comp */ +/* comparison function (as in AVLTreeSearch) */ +/* 5. func */ +/* A function performing a user specified */ +/* task on each node; the fuction is invoked as */ +/* func( key,data) */ +/* where data is parm. 3 */ +/* 6. data */ +/* Auxiliary data to be passed to func upon */ +/* each visit */ +/* Function value: total number of nodes visited (>=0) */ +/* */ +/* */ +/* */ +/* void AVLTreeFree(AVLTreePtr Tree, void (*ffree)(void*)) */ +/* Purpose: free up tree data storage */ +/* Does NOT free the Tree pointer itself, */ +/* rather all the structures that it points to */ +/* Input: 1. Tree */ +/* A tree pointer */ +/* 2. ffree */ +/* A user specified function invoked on each */ +/* key pointer contained in the tree to free */ +/* its memory (if necessary). Can be NULL. */ +/* */ +/* */ +/*****************************************************************/ + + + +#include +#include +#include +#include "avltree.h" + +#define POOLSIZE 4096 +#define MAXSTACK 64 +#define MAX(a,b) ((a)>=(b) ? (a) : (b)) + +typedef struct avltvect { + AVLNode pool[POOLSIZE]; + int avail; + AVLTVectPtr previous, next; +} AVLTVect; + + +int HowManyItems(AVLTreePtr Tree) +{ + if (Tree==NULL) { + return(0); + } else { + return(Tree->nnodes); + } +} + + +AVLTreePtr GetAVLTree() +{ + AVLTreePtr tree; + if ((tree=(AVLTreePtr) malloc(sizeof(AVLTree)))!=NULL){ + memset(tree,'\0',sizeof(AVLTree)); + AVLTreeInit(tree); + } + return(tree); +} + +int AVLTreeInit(AVLTreePtr Tree) +{ + AVLTVectPtr current; + if (Tree==NULL) { + fprintf(stderr,"Cannot initialize a NULL Tree pointer\n"); + return(-1); + } + + if (Tree->first!=NULL) { + fprintf(stderr,"Cannot initialize a nonempty Tree: call AVLTreeFree first\n"); + return(-2); + } + + if ((current=(AVLTVectPtr)malloc(sizeof(AVLTVect)))==NULL) { + fprintf(stderr,"Memory allocation failure\n"); + return(-3); + } + memset(current,'\0',sizeof(AVLTVect)); + Tree->first=Tree->current=current; + Tree->nnodes=0; + Tree->root=NULL; + return(0); +} + +int AVLTreeReInit(AVLTreePtr Tree) +{ + AVLTVectPtr current /* , next */ ; + if (Tree==NULL) { + fprintf(stderr,"Cannot ReInitialize a NULL Tree pointer\n"); + return(-1); + } + + if (Tree->first!=NULL) { + current=Tree->first; + while (current!=NULL) { + current->avail=0; + memset(current->pool,'\0',POOLSIZE*sizeof(AVLNode)); + current=current->next; + } + } else { + if ((current=(AVLTVectPtr)malloc(sizeof(AVLTVect)))==NULL) { + fprintf(stderr,"Memory allocation failure\n"); + return(-3); + } + current->avail=0; + current->previous=current->next=NULL; + Tree->first=current; + } + Tree->current=Tree->first; + Tree->nnodes=0; + Tree->root=NULL; + return(0); +} + + + + +AVLNodePtr AVLTreeSearch(AVLTreePtr Tree, void *key, + int (*comp)(void *, void *)) +{ + AVLNodePtr current; + int icmp; + if (Tree==NULL) return(NULL); + current = Tree->root; + + while (current != NULL) { + icmp = (*comp)(key,current->key); + if (icmp<0) { + current = current->llink; + } else if (icmp==0){ + return(current); + } else if (icmp>0) { + current = current->rlink; + } + } + return(current); +} + + + +void AVLTreeInorderTraverse(AVLTreePtr Tree, void (*func)(void *, void *), + void *data) +{ + int lev; + AVLNodePtr root; + + AVLNodePtr stack[MAXSTACK+2]; + int choice[MAXSTACK+2]; + root=Tree->root; + if (root == NULL) return; + + lev=0; + stack[lev] = root; + choice[lev] = -1; + + while (lev>=0) { + if (stack[lev]==NULL) { + lev--; + } else { + if (choice[lev]==-1) { + stack[lev+1] = stack[lev]->llink; + choice[lev+1] = -1; + choice[lev] += 1; + lev++; + } else if (choice[lev]==0) { + (*func)(stack[lev]->key,data); + stack[lev+1] = stack[lev]->rlink; + choice[lev+1] = -1; + choice[lev] += 1; + lev++; + } else { + lev--; + } + } + } +} + + +int AVLTreeInorderTraverseWithDelims(AVLTreePtr Tree, void *first, void *last, + int (*comp)(void*, void*), + void (*func)(void *, void *), + void *data) +{ + AVLNodePtr root, current; + int lev, nvisit, icmp; + AVLNodePtr stack[MAXSTACK+2]; + int choice[MAXSTACK+2]; + + root=Tree->root; + if (root == NULL) return(0); + + nvisit=0; + lev=0; + current = root; + while (current != NULL) { + stack[lev] = current; + icmp = (*comp)(first,current->key); + if (icmp<=0) { + choice[lev]=0; + current = current->llink; + } else if (icmp>0) { + current = current->rlink; + choice[lev]=1; + } + lev++; + } + lev--; + while (lev>=0) { + if (stack[lev]==NULL) { + lev--; + } else { + if (choice[lev]==-1) { + stack[lev+1] = stack[lev]->llink; + choice[lev+1] = -1; + choice[lev] += 1; + lev++; + } else if (choice[lev]==0) { + if (((*comp)(last,stack[lev]->key))<0) { + lev--; + } else { + (*func)(stack[lev]->key,data); + nvisit++; + stack[lev+1] = stack[lev]->rlink; + choice[lev+1] = -1; + choice[lev] += 1; + lev++; + } + } else { + lev--; + } + } + } + return(nvisit); +} + + + +void AVLTreePreorderTraverse(AVLTreePtr Tree, void (*func)(void *, void *), + void *data) +{ + AVLNodePtr root; + int lev; + AVLNodePtr stack[MAXSTACK+2]; + int choice[MAXSTACK+2]; + + root=Tree->root; + if (root == NULL) return; + lev=0; + stack[lev] = root; + choice[lev] = -1; + + while (lev>=0) { + if (stack[lev]==NULL) { + lev--; + } else { + if (choice[lev]==-1) { + (*func)(stack[lev]->key,data); + stack[lev+1] = stack[lev]->llink; + choice[lev+1] = -1; + choice[lev] += 1; + lev++; + } else if (choice[lev]==0) { + stack[lev+1] = stack[lev]->rlink; + choice[lev+1] = -1; + choice[lev] += 1; + lev++; + } else { + lev--; + } + } + } +} + + + +void AVLTreeFree(AVLTreePtr Tree, void (*ffree)(void *)) +{ + AVLTVectPtr current, next; + int i; + if (Tree == NULL) return; + + current=Tree->first; + + while (current != NULL) { + next=current->next; + if (*ffree != NULL) { + for (i=0; iavail; i++) + (*ffree)((current->pool[i]).key); + } + free(current); + current=next; + } + Tree->nnodes=0; + Tree->first=Tree->current=NULL; + return; +} + + +AVLNodePtr GetAVLNode(AVLTreePtr Tree) +{ + AVLTVectPtr current, new; + AVLNodePtr newnode; + + if (Tree==NULL) { + return(NULL); + } + if ((current=Tree->current)==NULL) { + return(NULL); + } + + while ((current->avail>=POOLSIZE)&&(current->next!=NULL)) + current=current->next; + + if (current->availpool[current->avail]); + current->avail += 1; + } else { + if ((new=(AVLTVectPtr)malloc(sizeof(AVLTVect)))==NULL) { + fprintf(stderr,"Memory allocation failure\n"); + return(NULL); + } + memset(new,'\0',sizeof(AVLTVect)); + newnode=&(new->pool[0]); + new->avail = 1; + current->next=new; + new->previous=current; + new->next=NULL; + Tree->current=new; + } + return(newnode); +} + +int AVLTreeInsert(AVLTreePtr Tree, void *key,int (*comp)(void *, void *), + void (*update)(void *, void *)) +{ + AVLNodePtr root, t, s, p, q, r; + int search, bal, icmp; + + if (Tree==NULL) { + fprintf(stderr,"Fatal error: null tree pointer\n"); + return(-1); + } + + if ((root = Tree->root) == NULL) { + if ((t=GetAVLNode(Tree))==NULL) { + return(-2); + } + t->key = key; + t->rlink=t->llink=NULL; + t->bal=0; + Tree->root = t; + Tree->nnodes=1; + return(0); + } + t = NULL; + s = root; + p = root; + search=1; + while (search) { + icmp = (*comp)(key,p->key); + if (icmp<0) { + if ((q=p->llink)==NULL) { + if ((q=GetAVLNode(Tree))==NULL) { + return(-2); + } + p->llink=q; + search=0; + } else { + if (q->bal != 0) { + t=p; + s=q; + } + } + } else if (icmp == 0) { + (*update)(key,p->key); + return(1); + } else { + if ((q=p->rlink)==NULL) { + if ((q=GetAVLNode(Tree))==NULL) { + return(-2); + } + p->rlink=q; + search=0; + } else { + if (q->bal != 0) { + t=p; + s=q; + } + } + } + p=q; + } + q->key=key; + q->llink=q->rlink=NULL; + q->bal=0; + Tree->nnodes += 1; + + if ((*comp)(key,s->key)<0) { + r=p=s->llink; + } else { + r=p=s->rlink; + } + + while (p!=q) { + if ((*comp)(key,p->key)<0) { + p->bal=-1; + p = p->llink; + } else { + p->bal=1; + p=p->rlink; + } + } + + if ((*comp)(key,s->key)<0) { + bal=-1; + } else { + bal=1; + } + + if (s->bal == 0) { + s->bal=bal; + return (0); + } else if (s->bal == -bal) { + s->bal=0; + return (0); + } else if (s->bal == bal) { + + if (r->bal == bal) { + /* single rotation */ + p=r; + if (bal>0) { + s->rlink=r->llink; + r->llink=s; + } else { + s->llink=r->rlink; + r->rlink=s; + } + s->bal=r->bal=0; + } else if (r->bal == -bal) { + /* double rotation */ + if (bal>0) { + p=r->llink; + r->llink=p->rlink; + p->rlink=r; + s->rlink=p->llink; + p->llink=s; + } else { + p=r->rlink; + r->rlink=p->llink; + p->llink=r; + s->llink=p->rlink; + p->rlink=s; + } + if (p->bal == bal) { + s->bal=-bal; + r->bal=0; + } else if (p->bal==0) { + s->bal=r->bal=0; + } else { + r->bal=bal; + s->bal=0; + } + p->bal=0; + } + if (t==NULL) { + root=p; + } else { + if (t->rlink==s) { + t->rlink=p; + } else { + t->llink=p; + } + } + Tree->root=root; + return(0); + } + return(0); +} + +AVLNodePtr AVLTreeUserInsert(AVLTreePtr Tree, void *key, + int (*comp)(void *, void *)) +{ + AVLNodePtr root, t, s, p, q, r; + int search, bal, icmp; + + if (Tree==NULL) { + fprintf(stderr,"Fatal error: null tree pointer\n"); + return(NULL); + } + + if ((root = Tree->root) == NULL) { + if ((t=GetAVLNode(Tree))==NULL) { + return(NULL); + } + t->key = key; + t->rlink=t->llink=NULL; + t->bal=0; + Tree->root = t; + Tree->nnodes=1; + return(t); + } + t = NULL; + s = root; + p = root; + search=1; + while (search) { + icmp = (*comp)(key,p->key); + if (icmp<0) { + if ((q=p->llink)==(AVLNodePtr) NULL) { + if ((q=GetAVLNode(Tree))==NULL) { + return(NULL); + } + p->llink=q; + search=0; + } else { + if (q->bal != 0) { + t=p; + s=q; + } + } + } else if (icmp == 0) { + return(p); + } else { + if ((q=p->rlink)==NULL) { + if ((q=GetAVLNode(Tree))==NULL) { + return(NULL); + } + p->rlink=q; + search=0; + } else { + if (q->bal != 0) { + t=p; + s=q; + } + } + } + p=q; + } + q->key=key; + q->llink=q->rlink=NULL; + q->bal=0; + Tree->nnodes += 1; + + if ((*comp)(key,s->key)<0) { + r=p=s->llink; + } else { + r=p=s->rlink; + } + + while (p!=q) { + if ((*comp)(key,p->key)<0) { + p->bal=-1; + p = p->llink; + } else { + p->bal=1; + p=p->rlink; + } + } + + if ((*comp)(key,s->key)<0) { + bal=-1; + } else { + bal=1; + } + + if (s->bal == 0) { + s->bal=bal; + return (q); + } else if (s->bal == -bal) { + s->bal=0; + return (q); + } else if (s->bal == bal) { + + if (r->bal == bal) { + /* single rotation */ + p=r; + if (bal>0) { + s->rlink=r->llink; + r->llink=s; + } else { + s->llink=r->rlink; + r->rlink=s; + } + s->bal=r->bal=0; + } else if (r->bal == -bal) { + /* double rotation */ + if (bal>0) { + p=r->llink; + r->llink=p->rlink; + p->rlink=r; + s->rlink=p->llink; + p->llink=s; + } else { + p=r->rlink; + r->rlink=p->llink; + p->llink=r; + s->llink=p->rlink; + p->rlink=s; + } + if (p->bal == bal) { + s->bal=-bal; + r->bal=0; + } else if (p->bal==0) { + s->bal=r->bal=0; + } else { + r->bal=bal; + s->bal=0; + } + p->bal=0; + } + if (t==NULL) { + root=p; + } else { + if (t->rlink==s) { + t->rlink=p; + } else { + t->llink=p; + } + } + Tree->root=root; + return(q); + } + return(q); +} + + diff --git a/src/internals/avltree.h b/src/internals/avltree.h new file mode 100644 index 00000000..260c2d5e --- /dev/null +++ b/src/internals/avltree.h @@ -0,0 +1,38 @@ +/* Type definitions for balanced AVL tree search and insertion */ +/* See avltree.c for a full definition of the subroutines */ +/* */ + +typedef struct avlnode *AVLNodePtr; +typedef struct avlnode { + AVLNodePtr llink,rlink; + int bal; + void *key; +} AVLNode; + +typedef struct avltvect *AVLTVectPtr; + +typedef struct avltree *AVLTreePtr; +typedef struct avltree { + AVLTVectPtr first, current; + AVLNodePtr root; + int nnodes; +} AVLTree; + + +AVLNodePtr AVLTreeSearch(AVLTreePtr, void *, int (*)(void *, void *)); +AVLNodePtr GetAVLNode(AVLTreePtr); +int AVLTreeInit(AVLTreePtr); +int AVLTreeReInit(AVLTreePtr); +AVLTreePtr GetAVLTree(); +int AVLTreeInsert(AVLTreePtr, void *, int (*)(void *, void *), + void (*)(void *, void *)); +AVLNodePtr AVLTreeUserInsert(AVLTreePtr, void *, int (*)(void *, void *)); +void AVLTreeInorderTraverse(AVLTreePtr, void (*)(void *, void *), void *); +void AVLTreePreorderTraverse(AVLTreePtr, void (*)(void *, void *), void *); +void AVLTreeFree(AVLTreePtr, void (*)(void *)); +int HowManyItems(AVLTreePtr); +int AVLTreeInorderTraverseWithDelims(AVLTreePtr,void*, void*, int (*)(void*,void*), + void (*)(void *, void *), void *); + + + diff --git a/src/internals/ctof_blacs.h b/src/internals/ctof_blacs.h new file mode 100644 index 00000000..e090df03 --- /dev/null +++ b/src/internals/ctof_blacs.h @@ -0,0 +1,314 @@ +/* This header file replaces every call to a BLACS routine by C interface + with the same call performed by Fortran interface */ + +#ifndef CTOF_BLACS +#define CTOF_BLACS +#endif + +/* Variables necessary for invocations where + constant arguments are used */ + +static int i1, i2, i3, i4, i5, i6, i7; + +/* Support routines: + Initialization */ + +#define Cblacs_pinfo(mypnum, nprocs) \ + blacs_pinfo_(mypnum, nprocs) +#define Cblacs_setup(mypnum, nprocs) \ + blacs_setup_(mypnum, nprocs) +#define Cblacs_get(icontxt, what, val) \ + {i1 = icontxt; i2 = what; \ + blacs_get_(&i1, &i2,val);} +#define Cblacs_set(icontxt, what, val) \ + {i1 = icontxt; i2 = what; \ + blacs_set_(&i1, &i2, &val);} +#define Cblacs_gridinit(icontxt, order, nprow, npcol) \ + {i1 = nprow; i2 = npcol; \ + blacs_gridinit_(icontxt, order, &i1, &i2);} +#define Cblacs_gridmap(icontxt, pmap, ldpmap, nprow, npcol) \ + {i1 = ldpmap; i2 = nprow; i3 = npcol; \ + blacs_gridmap_(icontxt, pmap, &i1, &i2, &i3);} + +/* Support routines: + Destruction */ + +#define Cblacs_freebuff(icontxt, wait) \ + {i1 = icontxt; i2 = wait; \ + blacs_freebuff_(&i1, &i2);} +#define Cblacs_gridexit(icontxt) \ + {i1 = icontxt; \ + blacs_gridexit_(&i1);} +#define Cblacs_abort(icontxt, errornum) \ + {i1 = icontxt; i2 = errornum; \ + blacs_abort_(&i1, &i2);} +#define Cblacs_exit(doneflag) \ + {i1 = doneflag; \ + blacs_exit_(&i1);} + +/* Support routines: + Informational and Miscellaneous */ + +#define Cblacs_gridinfo(icontxt,nprow,npcol,myprow,mypcol) \ + {i1 = icontxt; \ + blacs_gridinfo_(&i1, nprow, npcol, myprow, mypcol);} +#define Cblacs_pnum(icontxt, prow, pcol) \ + {i1 = icontxt; i2 = prow; i3 = pcol; \ + blacs_pnum_(&i1, &i2, &i3);} +#define Cblacs_pcoord(icontxt, pnum, prow, pcol) \ + {i1 = icontxt; i2 = pnum; \ + blacs_pcoord_(&i1, &i2, prow, pcol);} +#define Cblacs_barrier(icontxt, scope) \ + {i1 = icontxt; \ + blacs_barrier_(&i1, scope);} + +/* Support routines: + Unofficial */ + +#define Csetpvmtids(ntasks, tids) \ + {i1 = ntasks; \ + setpvmtids_(&i1, tids);} +#define Cdcputime() \ + dcputime_() +#define Cdwalltime() \ + dwalltime_() +#define Cksendid(icontxt, rdest, cdest) \ + {i1 = icontxt; i2 = rdest; i3 = cdest; \ + ksendid_(&i1, &i2, &i3);} +#define Ckrecvid(icontxt, rsrc, csrc) \ + {i1 = icontxt; i2 = rsrc; i3 = csrc; \ + krecvid_(&i1, &i2, &i3);} +#define Ckbsid(icontxt, scope) \ + {i1 = icontxt; \ + kbsid_(&i1, scope);} +#define Ckbrid(icontxt, scope, rsrc, csrc) \ + {i1 = icontxt; i2 = rsrc; i3 = csrc; \ + kbrid_(&i1, scope, &i2, &i3);} + +/* Point to Point : + Integer */ + +#define Cigesd2d(icontxt, m, n, A, lda, rdest, cdest) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = rdest; i6 = cdest; \ + igesd2d_(&i1, &i2, &i3, A, &i4, &i5, &i6);} +#define Cigerv2d(icontxt, m, n, A, lda, rsrc, csrc) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = rsrc; i6 = csrc; \ + igerv2d_(&i1, &i2, &i3, A, &i4, &i5, &i6);} +#define Citrsd2d(icontxt, uplo, diag, m, n, A, lda, rdest, cdest) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = rdest; i6 = cdest; \ + itrsd2d_(&i1, uplo, diag, &i2, &i3, A, &i4, &i5, &i6);} +#define Citrrv2d(icontxt, uplo, diag, m, n, A, lda, rsrc, csrc) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = rsrc; i6 = csrc; \ + itrsd2d_(&i1, uplo, diag, &i2, &i3, A, &i4, &i5, &i6);} + +/* Point to Point : + Single precision real */ + +#define Csgesd2d(icontxt, m, n, A, lda, rdest, cdest) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = rdest; i6 = cdest; \ + sgesd2d_(&i1, &i2, &i3, A, &i4, &i5, &i6);} +#define Csgerv2d(icontxt, m, n, A, lda, rsrc, csrc) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = rsrc; i6 = csrc; \ + sgerv2d_(&i1, &i2, &i3, A, &i4, &i5, &i6);} +#define Cstrsd2d(icontxt, uplo, diag, m, n, A, lda, rdest, cdest) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = rdest; i6 = cdest; \ + strsd2d_(&i1, uplo, diag, &i2, &i3, A, &i4, &i5, &i6);} +#define Cstrrv2d(icontxt, uplo, diag, m, n, A, lda, rsrc, csrc) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = rsrc; i6 = csrc; \ + strsd2d_(&i1, uplo, diag, &i2, &i3, A, &i4, &i5, &i6);} + +/* Point to Point : + Double precision real */ + +#define Cdgesd2d(icontxt, m, n, A, lda, rdest, cdest) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = rdest; i6 = cdest; \ + dgesd2d_(&i1, &i2, &i3, A, &i4, &i5, &i6);} +#define Cdgerv2d(icontxt, m, n, A, lda, rsrc, csrc) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = rsrc; i6 = csrc; \ + dgerv2d_(&i1, &i2, &i3, A, &i4, &i5, &i6);} +#define Cdtrsd2d(icontxt, uplo, diag, m, n, A, lda, rdest, cdest) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = rdest; i6 = cdest; \ + dtrsd2d_(&i1, uplo, diag, &i2, &i3, A, &i4, &i5, &i6);} +#define Cdtrrv2d(icontxt, uplo, diag, m, n, A, lda, rsrc, csrc) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = rsrc; i6 = csrc; \ + dtrsd2d_(&i1, uplo, diag, &i2, &i3, A, &i4, &i5, &i6);} + +/* Point to Point : + Single precision complex */ + +#define Ccgesd2d(icontxt, m, n, A, lda, rdest, cdest) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = rdest; i6 = cdest; \ + cgesd2d_(&i1, &i2, &i3, A, &i4, &i5, &i6);} +#define Ccgerv2d(icontxt, m, n, A, lda, rsrc, csrc) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = rsrc; i6 = csrc; \ + cgerv2d_(&i1, &i2, &i3, A, &i4, &i5, &i6);} +#define Cctrsd2d(icontxt, uplo, diag, m, n, A, lda, rdest, cdest) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = rdest; i6 = cdest; \ + ctrsd2d_(&i1, uplo, diag, &i2, &i3, A, &i4, &i5, &i6);} +#define Cctrrv2d(icontxt, uplo, diag, m, n, A, lda, rsrc, csrc) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = rsrc; i6 = csrc; \ + ctrsd2d_(&i1, uplo, diag, &i2, &i3, A, &i4, &i5, &i6);} + +/* Point to Point : + Double precision complex */ + +#define Czgesd2d(icontxt, m, n, A, lda, rdest, cdest) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = rdest; i6 = cdest; \ + zgesd2d_(&i1, &i2, &i3, A, &i4, &i5, &i6);} +#define Czgerv2d(icontxt, m, n, A, lda, rsrc, csrc) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = rsrc; i6 = csrc; \ + zgerv2d_(&i1, &i2, &i3, A, &i4, &i5, &i6);} +#define Cztrsd2d(icontxt, uplo, diag, m, n, A, lda, rdest, cdest) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = rdest; i6 = cdest; \ + ztrsd2d_(&i1, uplo, diag, &i2, &i3, A, &i4, &i5, &i6);} +#define Cztrrv2d(icontxt, uplo, diag, m, n, A, lda, rsrc, csrc) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = rsrc; i6 = csrc; \ + ztrsd2d_(&i1, uplo, diag, &i2, &i3, A, &i4, &i5, &i6);} + +/* Broadcasts : + Integer */ + +#define Cigebs2d(icontxt, scope, top, m, n, A, lda) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; \ + igebs2d_(&i1, scope, top, &i2, &i3, A, &i4);} +#define Cigebr2d(icontxt, scope, top, m, n, A, lda, rsrc, csrc) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = rsrc; i6 = csrc; \ + igebr2d_(&i1, scope, top, &i2, &i3, A, &i4, &i5, &i6);} +#define Citrbs2d(icontxt, scope, top, uplo, diag, m, n, A, lda) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; \ + itrbs2d_(&i1, scope, top, uplo, diag, &i2, &i3, A, &i4);} +#define Citrbr2d(icontxt, uplo, diag, m, n, A, lda, rsrc, csrc) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = rsrc; i6 = csrc; \ + igebr2d_(&i1, uplo, diag, &i2, &i3, A, &i4, &i5, &i6);} + +/* Broadcasts : + Single precision real */ + +#define Csgebs2d(icontxt, scope, top, m, n, A, lda) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; \ + sgebs2d_(&i1, scope, top, &i2, &i3, A, &i4);} +#define Csgebr2d(icontxt, scope, top, m, n, A, lda, rsrc, csrc) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = rsrc; i6 = csrc; \ + sgebr2d_(&i1, scope, top, &i2, &i3, A, &i4, &i5, &i6);} +#define Cstrbs2d(icontxt, scope, top, uplo, diag, m, n, A, lda) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; \ + strbs2d_(&i1, scope, top, uplo, diag, &i2, &i3, A, &i4);} +#define Cstrbr2d(icontxt, uplo, diag, m, n, A, lda, rsrc, csrc) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = rsrc; i6 = csrc; \ + sgebr2d_(&i1, uplo, diag, &i2, &i3, A, &i4, &i5, &i6);} + +/* Broadcasts : + Double precision real */ + +#define Cdgebs2d(icontxt, scope, top, m, n, A, lda) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; \ + dgebs2d_(&i1, scope, top, &i2, &i3, A, &i4);} +#define Cdgebr2d(icontxt, scope, top, m, n, A, lda, rsrc, csrc) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = rsrc; i6 = csrc; \ + dgebr2d_(&i1, scope, top, &i2, &i3, A, &i4, &i5, &i6);} +#define Cdtrbs2d(icontxt, scope, top, uplo, diag, m, n, A, lda) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; \ + dtrbs2d_(&i1, scope, top, uplo, diag, &i2, &i3, A, &i4);} +#define Cdtrbr2d(icontxt, uplo, diag, m, n, A, lda, rsrc, csrc) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = rsrc; i6 = csrc; \ + dgebr2d_(&i1, uplo, diag, &i2, &i3, A, &i4, &i5, &i6);} + +/* Broadcasts : + Single precision complex */ + +#define Ccgebs2d(icontxt, scope, top, m, n, A, lda) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; \ + cgebs2d_(&i1, scope, top, &i2, &i3, A, &i4);} +#define Ccgebr2d(icontxt, scope, top, m, n, A, lda, rsrc, csrc) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = rsrc; i6 = csrc; \ + cgebr2d_(&i1, scope, top, &i2, &i3, A, &i4, &i5, &i6);} +#define Cctrbs2d(icontxt, scope, top, uplo, diag, m, n, A, lda) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; \ + ctrbs2d_(&i1, scope, top, uplo, diag, &i2, &i3, A, &i4);} +#define Cctrbr2d(icontxt, uplo, diag, m, n, A, lda, rsrc, csrc) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = rsrc; i6 = csrc; \ + cgebr2d_(&i1, uplo, diag, &i2, &i3, A, &i4, &i5, &i6);} + +/* Broadcasts : + Double precision complex */ + +#define Czgebs2d(icontxt, scope, top, m, n, A, lda) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; \ + zgebs2d_(&i1, scope, top, &i2, &i3, A, &i4);} +#define Czgebr2d(icontxt, scope, top, m, n, A, lda, rsrc, csrc) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = rsrc; i6 = csrc; \ + zgebr2d_(&i1, scope, top, &i2, &i3, A, &i4, &i5, &i6);} +#define Cztrbs2d(icontxt, scope, top, uplo, diag, m, n, A, lda) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; \ + ztrbs2d_(&i1, scope, top, uplo, diag, &i2, &i3, A, &i4);} +#define Cztrbr2d(icontxt, uplo, diag, m, n, A, lda, rsrc, csrc) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = rsrc; i6 = csrc; \ + zgebr2d_(&i1, uplo, diag, &i2, &i3, A, &i4, &i5, &i6);} + +/* Combines: + Integer */ + +#define Cigsum2d(icontxt, scope, top, m, n, A, lda, rdest, cdest) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = rdest; i6 = cdest; \ + igsum2d_(&i1, scope, top, &i2, &i3, A, &i4, &i5, &i6);} +#define Cigamx2d(icontxt, scope, top, m, n, A, lda, RA, CA, RCflag, rdest, cdest) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = RCflag; i6 = rdest; i7 = cdest; \ + igamx2d_(&i1, scope, top, &i2, &i3, A, &i4, RA, CA, &i5, &i6, &i7);} +#define Cigamn2d(icontxt, scope, top, m, n, A, lda, RA, CA, RCflag, rdest, cdest) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = RCflag; i6 = rdest; i7 = cdest; \ + igamn2d_(&i1, scope, top, &i2, &i3, A, &i4, RA, CA, &i5, &i6, &i7);} + +/* Combines: + Single precision real */ + +#define Csgsum2d(icontxt, scope, top, m, n, A, lda, rdest, cdest) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = rdest; i6 = cdest; \ + sgsum2d_(&i1, scope, top, &i2, &i3, A, &i4, &i5, &i6);} +#define Csgamx2d(icontxt, scope, top, m, n, A, lda, RA, CA, RCflag, rdest, cdest) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = RCflag; i6 = rdest; i7 = cdest; \ + sgamx2d_(&i1, scope, top, &i2, &i3, A, &i4, RA, CA, &i5, &i6, &i7);} +#define Csgamn2d(icontxt, scope, top, m, n, A, lda, RA, CA, RCflag, rdest, cdest) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = RCflag; i6 = rdest; i7 = cdest; \ + sgamn2d_(&i1, scope, top, &i2, &i3, A, &i4, RA, CA, &i5, &i6, &i7);} + +/* Combines: + Double precision real */ + +#define Cdgsum2d(icontxt, scope, top, m, n, A, lda, rdest, cdest) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = rdest; i6 = cdest; \ + dgsum2d_(&i1, scope, top, &i2, &i3, A, &i4, &i5, &i6);} +#define Cdgamx2d(icontxt, scope, top, m, n, A, lda, RA, CA, RCflag, rdest, cdest) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = RCflag; i6 = rdest; i7 = cdest; \ + dgamx2d_(&i1, scope, top, &i2, &i3, A, &i4, RA, CA, &i5, &i6, &i7);} +#define Cdgamn2d(icontxt, scope, top, m, n, A, lda, RA, CA, RCflag, rdest, cdest) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = RCflag; i6 = rdest; i7 = cdest; \ + dgamn2d_(&i1, scope, top, &i2, &i3, A, &i4, RA, CA, &i5, &i6, &i7);} + +/* Combines: + Single precision complex */ + +#define Ccgsum2d(icontxt, scope, top, m, n, A, lda, rdest, cdest) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = rdest; i6 = cdest; \ + cgsum2d_(&i1, scope, top, &i2, &i3, A, &i4, &i5, &i6);} +#define Ccgamx2d(icontxt, scope, top, m, n, A, lda, RA, CA, RCflag, rdest, cdest) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = RCflag; i6 = rdest; i7 = cdest; \ + cgamx2d_(&i1, scope, top, &i2, &i3, A, &i4, RA, CA, &i5, &i6, &i7);} +#define Ccgamn2d(icontxt, scope, top, m, n, A, lda, RA, CA, RCflag, rdest, cdest) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = RCflag; i6 = rdest; i7 = cdest; \ + cgamn2d_(&i1, scope, top, &i2, &i3, A, &i4, RA, CA, &i5, &i6, &i7);} + +/* Combines: + Double precision complex */ + +#define Czgsum2d(icontxt, scope, top, m, n, A, lda, rdest, cdest) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = rdest; i6 = cdest; \ + zgsum2d_(&i1, scope, top, &i2, &i3, A, &i4, &i5, &i6);} +#define Czgamx2d(icontxt, scope, top, m, n, A, lda, RA, CA, RCflag, rdest, cdest) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = RCflag; i6 = rdest; i7 = cdest; \ + zgamx2d_(&i1, scope, top, &i2, &i3, A, &i4, RA, CA, &i5, &i6, &i7);} +#define Czgamn2d(icontxt, scope, top, m, n, A, lda, RA, CA, RCflag, rdest, cdest) \ + {i1 = icontxt; i2 = m; i3 = n; i4 = lda; i5 = RCflag; i6 = rdest; i7 = cdest; \ + zgamn2d_(&i1, scope, top, &i2, &i3, A, &i4, RA, CA, &i5, &i6, &i7);} + + + diff --git a/src/internals/psblas.h b/src/internals/psblas.h new file mode 100644 index 00000000..60d4ab60 --- /dev/null +++ b/src/internals/psblas.h @@ -0,0 +1,292 @@ +/* --------------------------------------------------------------------- +* +* -- PSBLAS routine (version 1.0) -- +* +* --------------------------------------------------------------------- +*/ + +/* +* This file includes the standard C libraries, as well as system +* dependent include files. All PSBLAS routines include this file. +*/ +#include + +#ifndef PSBLASH +#define PSBLASH +/* +* ======================================================================== +* Machine Specific PBLAS macros +* ======================================================================== +*/ +/* This is a debugging option. + #define PS_CONTROL_LEVEL */ + +#define _HAL_ 0 +#define _T3D_ 1 + +#ifdef T3D +#define _MACH_ _T3D_ +#endif + +#ifndef _MACH_ +#define _MACH_ _HAL_ +#endif + +/* +* ======================================================================== +* Include files +* ======================================================================== +*/ +#include +#include +#include + +#if( _MACH_ == _T3D_ ) +#include +#endif + +#ifdef USE_FBLACS +#ifndef CTOF_BLACS +#include "ctof_blacs.h" +#endif +#endif + + + +/* +* ======================================================================== +* FORTRAN <-> C interface +* ======================================================================== +* +* These macros define how the PBLAS will be called. _F2C_ADD_ assumes +* that they will be called by FORTRAN, which expects C routines to have +* an underscore postfixed to the name (Suns, and Intel machines expect +* this). _F2C_NOCHANGE indicates that FORTRAN will be calling, and that +* it expects the name called by FORTRAN to be identical to that compiled +* by the C (RS6K's do this). _F2C_UPCASE says it expects C routines +* called by FORTRAN to be in all upcase (CRAY wants this). +*/ + +#define _F2C_ADD_ 0 +#define _F2C_NOCHANGE 1 +#define _F2C_UPCASE 2 + +#ifdef UpCase +#define _F2C_CALL_ _F2C_UPCASE +#endif + +#ifdef NoChange +#define _F2C_CALL_ _F2C_NOCHANGE +#endif + +#ifdef Add_ +#define _F2C_CALL_ _F2C_ADD_ +#endif + +#ifndef _F2C_CALL_ +#define _F2C_CALL_ _F2C_ADD_ +#endif + +/* +* ======================================================================== +* TYPE DEFINITIONS AND CONVERSION UTILITIES +* ======================================================================== +*/ + +typedef struct { float re, im; } complex; +typedef struct { double re, im; } complex16; + +#if( _MACH_ == _T3D_ ) + /* Type of character argument in a FORTRAN call */ +#define F_CHAR _fcd + /* Character conversion utilities */ +#define F2C_CHAR(a) ( _fcdtocp( (a) ) ) +#define C2F_CHAR(a) ( _cptofcd( (a), 1 ) ) + /* Type of FORTRAN functions */ +#define F_VOID_FCT void fortran /* Subroutine */ +#define F_INTG_FCT int fortran /* INTEGER function */ +#define F_DBLE_FCT double fortran /* DOUBLE PRECISION function */ + +#else + /* Type of character argument in a FORTRAN call */ +typedef char * F_CHAR; + /* Character conversion utilities */ +#define F2C_CHAR(a) (a) +#define C2F_CHAR(a) (a) + /* Type of FORTRAN functions */ +#define F_VOID_FCT void /* Subroutine */ +#define F_INTG_FCT int /* INTEGER function */ +#define F_DBLE_FCT double /* DOUBLE PRECISION function */ + +#endif + +/* +* ====================================================================== +* FUNCTIONS PROTOTYPES +* ====================================================================== +*/ +void DVSct(int n, int k,int idx[],int flag, double X[], int lx, + double beta, double Y[], int ly); +void DVGth(int n, int k,int idx[],int flag, double X[], int lx,double Y[], int ly); +void IVSct(int n, int k,int idx[],int flag, int X[], int lx, + int beta, int Y[], int ly); +void IVGth(int n, int k,int idx[],int flag, int X[], int lx,int Y[], int ly); + +void PSI_dSwapData(int iflag, int n, double beta, double Y[], int ly, + int desc_data[], int desc_halo[], + double *work, int *lwork, int *ierror); + +void PSI_dSwapTran(int flag, int n, double beta, double Y[], int ly, + int desc_data[], int desc_halo[], + double *work, int *lwork, int *ierror); + +void PSI_zSwapData(int n, double Y[], int ly, int desc_data[], int desc_halo[], + double *work, int *lwork, int *ierror); + +void PSI_zSwapOverlap(double Y[], double Sum_Ovrlap[], int desc_data[], + int desc_ovrlap[], double work[], int *lwork, int *ierror); +void PSI_iSwapData(int iflag, int n, int beta, int Y[], int ly, + int desc_data[], int desc_halo[], + int *work, int *lwork, int *ierror); + +void PSI_iSwapTran(int flag, int n, int beta, int Y[], int ly, + int desc_data[], int desc_halo[], + int *work, int *lwork, int *ierror); + +/* +* ======================================================================== +* #DEFINE MACRO CONSTANTS +* ======================================================================== +*/ +/* MACRO max */ +#define max(x,y) ((x)>(y)?(x):(y)) + +/*MACRO for ovrlap update*/ +#define NOHALO_ 0 +#define HALO_ 4 +#define NONE_ 0 +#define SUM_ 1 +#define AVG_ 2 +#define SQUARE_ROOT_ 3 + +/* Bit fields to control swapdata/ovrlap behaviour. + BEWARE: check consistency with tools_const.f. + Should it be automated? */ +#define SWAP_SEND 1 +#define SWAP_RECV 2 +#define SWAP_SYNC 4 +#define SWAP_MPI 8 + + +/* Macro for MATRIX_DATA array */ +#define DEC_TYPE_ 0 /* The type of decomposition of global + matrix A. */ +#define M_ 1 /* Number of equations */ +#define N_ 2 /* Number of variables */ +#define N_ROW_ 3 /* The number of row of local matrix. */ +#define N_COL_ 4 /* The number of columns of local + matrix. */ +#define CTXT_ 5 /* The BLACS context handle, indicating + the global context of the operation + on the matrix. + The context itself is global. */ +#define LOC_TO_GLOB_ 6 /* The pointer to the array + loc_to_glob */ +#define MPI_C_ 8 /* The MPI Fortran handle */ +/* values for DEC_TYPE_ */ +#define DESC_ASB 3099 +#define DESC_BLD (DESC_ASB+1) + +/* Macro for HALO array */ +#define PROC_ID_ 0 /* The identifier of domain. */ +#define N_ELEM_RECV_ 1 /* The number of elements to receive*/ +#define ELEM_RECV_ 2 /* The first index of local elements */ +#define N_ELEM_SEND_ 2 /* The number of elements to send */ +#define ELEM_SEND_ 3 /* The first index of local elements */ + +/* Macro for OVERLAP array */ +#define N_OVRLP_ELEM_ 1 /* The number of overlap elements to recv/send */ +#define OVRLP_ELEM_TO_ 2 /* The first index of local elements */ + +/* Macro for OVR_ELEM_D array */ +#define OVRLP_ELEM_ 0 +#define N_DOM_OVR_ 1 + +#define BROADCAST "B" /* Blacs operation definitions */ +#define COMBINE "C" + +#define ALL "A" /* Scope definitions */ +#define COLUMN "C" +#define ROW "R" + +#define TOPDEF " " /* Default BLACS topology, PB-BLAS routines */ +#define CTOPDEF ' ' +#define TOPGET "!" + +#define YES "Y" +#define NO "N" + +#define MULLENFAC 2 + +#define ONE 1.0 +#define ZERO 0.0 + +/* Integer values for error checking */ +#define no_err 0 +#define act_ret 0 +#define act_abort 1 + + +/* +* ======================================================================== +* PREPROCESSOR MACRO FUNCTIONS USED FOR OPTIMIZATION & CONVENIENCE +* ======================================================================== +*/ + +#define ABS(a) ((a > 0) ? (a) : (-a)) + +#define MIN(a,b) ((a < b) ? (a) : (b)) + +#define MAX(a,b) ((a > b) ? (a) : (b)) + +#define CEIL(a,b) ( (a+b-1) / (b) ) + +#define Mlowcase(C) ( ((C) > 64 && (C) < 91) ? (C) | 32 : (C) ) + +#define Mupcase(C) ( ((C) > 96 && (C) < 123) ? (C) & 0xDF : (C) ) + +#define INDXG2L( iglob, nb, iproc, isrcproc, nprocs )\ + ( (nb) * ( ( (iglob)-1) / ( (nb) * (nprocs) ) ) +\ + ( ( (iglob) - 1 ) % (nb) ) + 1 ) + +#define INDXL2G( iloc, nb, iproc, isrcproc, nprocs )\ + ( (nprocs) * (nb) * ( ( (iloc) - 1 ) / (nb) ) +\ + ( ( (iloc) - 1 ) % (nb) ) +\ + ( ( (nprocs) + (iproc) - (isrcproc) ) % (nprocs) ) * (nb) + 1 ) + +#define INDXG2P( iglob, nb, iproc, isrcproc, nprocs ) \ + ( ( (isrcproc) + ( (iglob) - 1 ) / (nb) ) % (nprocs) ) + +#define MYROC0( nblocks, n, nb, nprocs )\ + ( ( (nblocks) % (nprocs) ) ? ( ( (nblocks) / (nprocs) ) * (nb) + (nb) )\ + : ( ( (nblocks) / (nprocs) )* (nb) + ( (n) % (nb) ) ) ) + +#if( _F2C_CALL_ == _F2C_ADD_ ) +/* +* These defines set up the naming scheme required to have a FORTRAN +* routine call a C routine (which is what the PBLAS are written in). +* No redefinition necessary to have following FORTRAN to C interface: +* FORTRAN CALL C DECLARATION +* call pdgemm(...) void pdgemm_(...) +* +* This is the default. +*/ +#define pbchkvectf pbchkvectf_ +#define fcpsb_errcomm fcpsb_errcomm_ +#define fcpsb_erractionsave fcpsb_erractionsave_ +#define fcpsb_erractionrestore fcpsb_erractionrestore_ +#define fcpsb_perror fcpsb_perror_ +#define fcpsb_serror fcpsb_serror_ +#define fcpsb_errpush fcpsb_errpush_ +#endif + diff --git a/src/internals/psi_compute_size.f90 b/src/internals/psi_compute_size.f90 new file mode 100644 index 00000000..1fe26fb1 --- /dev/null +++ b/src/internals/psi_compute_size.f90 @@ -0,0 +1,102 @@ +subroutine psi_compute_size(desc_data,& + & index_in, dl_lda, info) + + use psb_const_mod + use psb_error_mod + implicit none + + ! ....scalars parameters.... + integer :: info, dl_lda + ! .....array parameters.... + integer :: desc_data(:), index_in(:) + ! ....local scalars.... + integer :: i,npcol,nprow,mycol,myrow,proc,counter, max_index + integer :: icontxt, err, err_act, np + ! ...local array... + integer :: exch(2) + integer :: int_err(5) + integer, pointer :: counter_recv(:), counter_dl(:) + + ! ...parameters + logical, parameter :: debug=.false. + character(len=20) :: name + + name='psi_compute_size' + call psb_get_erraction(err_act) + + info = 0 + icontxt = desc_data(psb_ctxt_) + + call blacs_gridinfo(icontxt,nprow,npcol,myrow,mycol) + if (nprow == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + np=npcol + allocate(counter_dl(0:np-1),counter_recv(0:np-1)) + ! ..initialize counters... + do i=0,np-1 + counter_recv(i)=0 + counter_dl(i)=0 + enddo + + ! ....verify local correctness of halo_in.... + i=1 + do while (index_in(i).ne.-1) + proc=index_in(i) + if ((proc.gt.np-1).or.(proc.lt.0)) then + info = 115 + int_err(1) = 11 + int_err(2) = proc + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + counter_dl(proc)=1 + + ! ..update no of elements to receive from proc proc.. + counter_recv(proc)=counter_recv(proc)+& + & index_in(i+1) + + i=i+index_in(i+1)+2 + enddo + + ! ...computing max_halo: max halo points to be received from + ! same processor + max_index=0 + dl_lda=0 + + do i=0,np-1 + if (counter_recv(i).gt.max_index) max_index = counter_recv(i) + if (counter_dl(i).eq.1) dl_lda = dl_lda+1 + enddo + + ! computing max global value of dl_lda + call igamx2d(icontxt, psb_all_, psb_topdef_, 1, ione, dl_lda, & + &1, counter, counter, -ione ,-ione,-ione) + + if (debug) then + write(0,*) 'psi_compute_size: ',dl_lda + endif + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_serror(icontxt) + return + end if + return + +end subroutine psi_compute_size + + + diff --git a/src/internals/psi_crea_bnd_elem.f90 b/src/internals/psi_crea_bnd_elem.f90 new file mode 100644 index 00000000..8b8fffc7 --- /dev/null +++ b/src/internals/psi_crea_bnd_elem.f90 @@ -0,0 +1,73 @@ +subroutine psi_crea_bnd_elem(desc_a,info) + + use psb_descriptor_type + use psb_error_mod + implicit none + + type(psb_desc_type) :: desc_a + integer, intent(out) :: info + + integer, pointer :: work(:) + integer :: i, j, nr, ns, k, irv, err_act + character(len=20) :: name, ch_err + + info = 0 + name='psi_crea_bnd_elem' + call psb_erractionsave(err_act) + + allocate(work(size(desc_a%halo_index)),stat=info) + if (info /= 0 ) then + info = 4000 + call psb_errpush(info,name) + goto 9999 + end if + + i=0 + j=1 + do while(desc_a%halo_index(j) /= -1) + + nr = desc_a%halo_index(j+1) + ns = desc_a%halo_index(j+1+nr+1) + do k=1, ns + i = i + 1 + work(i) = desc_a%halo_index(j+1+nr+1+k) + enddo + j = j + 1 + ns + 1 + nr + 1 + enddo + + if (i>0) then + call isr(i,work) + j=1 + irv = work(1) + do k=2, i + if (work(k) /= irv) then + irv = work(k) + j = j + 1 + work(j) = work(k) + endif + enddo + else + j = 0 + endif + + allocate(desc_a%bnd_elem(j+1)) + if (.false.) then + desc_a%bnd_elem(1) = j + desc_a%bnd_elem(2:j+1) = work(1:j) + else + desc_a%bnd_elem(1:j) = work(1:j) + desc_a%bnd_elem(j+1) = -1 + endif + + deallocate(work) + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_serror() + return + end if + return +end subroutine psi_crea_bnd_elem diff --git a/src/internals/psi_crea_index.f90 b/src/internals/psi_crea_index.f90 new file mode 100644 index 00000000..d7daddf4 --- /dev/null +++ b/src/internals/psi_crea_index.f90 @@ -0,0 +1,88 @@ +subroutine psi_crea_index(desc_a,index_in,index_out,glob_idx,info) + + use psb_realloc_mod + use psb_descriptor_type + use psb_error_mod + implicit none + + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + integer, intent(in) :: index_in(:) + integer, intent(out) :: index_out(:) + logical :: glob_idx + +! ....local scalars... + integer :: me,npcol,mycol,nprow,i,j,k,& + & mode, int_err(5), err, err_act, np,& + & dl_lda, icontxt +! ...parameters... + integer, pointer :: dep_list(:,:), length_dl(:) + integer,parameter :: root=0,no_comm=-1 + logical,parameter :: debug=.false. + character(len=20) :: name, ch_err + + info = 0 + name='psi_crea_index' + call psb_erractionsave(err_act) + + icontxt = desc_a%matrix_data(psb_ctxt_) + call blacs_gridinfo(icontxt,np,npcol,me,mycol) + if (np == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + ! allocate dependency list + call psi_compute_size(desc_a%matrix_data, index_in, dl_lda, info) + allocate(dep_list(dl_lda,0:np-1),length_dl(0:np-1)) + ! ...extract dependence list (ordered list of identifer process + ! which every process must communcate with... + if (debug) write(*,*) 'crea_halo: calling extract_dep_list' + mode = 1 + call psi_extract_dep_list(desc_a%matrix_data,index_in,& + & dep_list,length_dl,np,dl_lda,mode,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='extrct_dl') + goto 9999 + end if + + if (debug) write(*,*) 'crea_index: from extract_dep_list',& + & me,length_dl(0),index_in(1), ':',dep_list(:length_dl(me),me) + ! ...now process root contains dependence list of all processes... + if (debug) write(*,*) 'crea_halo: root sorting dep list' + + ! ....i must order communication in in halo + call psi_dl_check(dep_list,dl_lda,np,length_dl) + + ! ....now i can sort dependence list...... + call psi_sort_dl(dep_list,length_dl,np,info) + if(info.ne.0) then + call psb_errpush(4010,name,a_err='psi_sort_dl') + goto 9999 + end if + + ! ...create desc_halo array..... + if(debug) write(0,*)'in psi_crea_index calling psi_desc_index',& + & size(index_out) + call psi_desc_index(desc_a%matrix_data,index_in,dep_list(1,me),& + & length_dl(me),desc_a%loc_to_glob,desc_a%glob_to_loc,& + & index_out,glob_idx) + + deallocate(dep_list,length_dl) + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_serror(icontxt) + return + end if + return +end subroutine psi_crea_index diff --git a/src/internals/psi_crea_ovr_elem.f90 b/src/internals/psi_crea_ovr_elem.f90 new file mode 100644 index 00000000..395cdfe8 --- /dev/null +++ b/src/internals/psi_crea_ovr_elem.f90 @@ -0,0 +1,63 @@ +subroutine psi_crea_ovr_elem(desc_overlap,ovr_elem) + + use psb_realloc_mod + implicit none + + ! ...parameter arrays.... + integer :: desc_overlap(:) + integer, pointer :: ovr_elem(:) + ! ...local scalars... + integer :: i,pnt_new_elem,ret,j, info + integer :: dim_ovr_elem + + ! ...external function... + integer :: psi_exist_ovr_elem,dim + external :: psi_exist_ovr_elem + + logical, parameter :: usetree=.true. + + i=1 + pnt_new_elem=1 + if (usetree) call initpairsearchtree(info) + do while (desc_overlap(i).ne.-1) + ! ...loop over all procs of desc_overlap list.... + + i=i+1 + do j=1,desc_overlap(i) + ! ....loop over all overlap indices referred to act proc..... + if (usetree) then + call searchinskeyval(desc_overlap(i+j),pnt_new_elem,& + & ret,info) + if (ret == pnt_new_elem) ret=-1 + else + ret=psi_exist_ovr_elem(ovr_elem,pnt_new_elem-2,& + & desc_overlap(i+j)) + endif + if (ret.eq.-1) then + + ! ...this point not exist in ovr_elem list: + ! add to it............................. + ovr_elem(pnt_new_elem)=desc_overlap(i+j) + ovr_elem(pnt_new_elem+1)=2 + pnt_new_elem=pnt_new_elem+2 + + ! ...check if overflow element_d array...... + if (pnt_new_elem.gt.dim_ovr_elem) then + dim=(3*size(ovr_elem))/2+2 + write(0,*) 'calling realloc crea_ovr_elem',dim + call psrealloc(dim,ovr_elem,info) + endif + else + ! ....this point already exist in ovr_elem list + ! its position is ret............................ + ovr_elem(ret+1)=ovr_elem(ret+1)+1 + endif + enddo + i=i+2*desc_overlap(i)+2 + enddo + + ! ...add -1 at the end of output list...... + ovr_elem(pnt_new_elem)=-1 + if (usetree) call freepairsearchtree() + +end subroutine psi_crea_ovr_elem diff --git a/src/internals/psi_desc_index.f90 b/src/internals/psi_desc_index.f90 new file mode 100644 index 00000000..e9b8724b --- /dev/null +++ b/src/internals/psi_desc_index.f90 @@ -0,0 +1,232 @@ +subroutine psi_desc_index(desc_data,index_in,dep_list,& + & length_dl,loc_to_glob,glob_to_loc,desc_index,& + & isglob_in,info) + + use psb_realloc_mod + use psb_error_mod + use psb_const_mod + implicit none + + include 'mpif.h' + !c ...array parameters..... + integer :: desc_data(:),index_in(:),dep_list(:) + integer :: loc_to_glob(:),glob_to_loc(:) + integer,pointer :: desc_index(:) + integer :: length_dl, info + logical :: isglob_in + !c ....local scalars... + integer :: j,me,np,npcol,mycol,i,proc,dim + !c ...parameters... + integer, parameter :: ione=1 + integer :: icontxt + integer :: no_comm,err + parameter (no_comm=-1) + !c ...local arrays.. + integer :: int_err(5) + integer,pointer :: brvindx(:),rvsz(:),& + & bsdindx(:),sdsz(:), sndbuf(:), rcvbuf(:) + + integer :: ihinsz,ntot,k,err_act,& + & idxr, idxs, iszs, iszr, nesd, nerv, icomm, iret + + logical,parameter :: debug=.false., usempi=.true. + character(len=20) :: name, ch_err + + info = 0 + name='psi_desc_index' + call psb_erractionsave(err_act) + + !c if mode == 1 then we can use glob_to_loc array + !c else we can't utilize it + icontxt=desc_data(psb_ctxt_) + call blacs_gridinfo(icontxt,np,npcol,me,mycol) + if (np == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + if (debug) then + write(0,*) me,'start desc_index' + call blacs_barrier(icontxt,'all') + endif + + call blacs_get(icontxt,10,icomm) + !c + !c first, find out the total sizes to be exchanged. + !c note: things marked here as sndbuf/rcvbuf (for mpi) corresponds to things + !c to be received/sent (in the final psblas descriptor). + !c be careful of the inversion + !c + allocate(sdsz(np),rvsz(np),bsdindx(np),brvindx(np),stat=info) + if(info /= 0) then + info=4000 + call psb_errpush(info,name) + goto 9999 + end if + + sdsz(:) = 0 + rvsz(:) = 0 + bsdindx(:) = 0 + brvindx(:) = 0 + i = 1 + do + if (index_in(i) == -1) exit + proc = index_in(i) + i = i + 1 + nerv = index_in(i) + sdsz(proc+1) = sdsz(proc+1) + nerv + i = i + nerv + 1 + end do + ihinsz=i + call mpi_alltoall(sdsz,1,mpi_integer,rvsz,1,mpi_integer,icomm,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='mpi_alltoall') + goto 9999 + end if + + i = 1 + idxs = 0 + idxr = 0 + do i=1, length_dl + proc = dep_list(i) + bsdindx(proc+1) = idxs + idxs = idxs + sdsz(proc+1) + brvindx(proc+1) = idxr + idxr = idxr + rvsz(proc+1) + end do + iszs = sum(sdsz) + iszr = sum(rvsz) + if ((iszs /= idxs).or.(iszr /= idxr)) then + write(0,*) 'strange results???', iszs,idxs,iszr,idxr + end if + if (debug) then + write(0,*) me,'computed sizes ',iszr,iszs + call blacs_barrier(icontxt,'all') + endif + + ntot = (3*(max(count(sdsz>0),count(rvsz>0)))+ iszs + iszr) + 1 + if (size(desc_index) < ntot) then + !c$$$ write(0,*) 'potential error on desc_index :', + !c$$$ + length_dh, size(desc_index),ntot + write(0,*) 'calling irealloc psi_desc_index ',ntot + call psrealloc(ntot,desc_index,info) + endif + if (info /= 0) then + call psb_errpush(4010,name,a_err='psrealloc') + goto 9999 + end if + + if (debug) then + write(0,*) me,'computed allocated workspace ',iszr,iszs + call blacs_barrier(icontxt,'all') + endif + allocate(sndbuf(iszs),rcvbuf(iszr),stat=info) + if(info /= 0) then + info=4000 + call psb_errpush(info,name) + goto 9999 + end if + + i = 1 + do + if (i > ihinsz) then + write(0,*) me,' did not find index_in end??? ',i,ihinsz + exit + end if + if (index_in(i) == -1) exit + proc = index_in(i) + i = i + 1 + nerv = index_in(i) + ! c + ! c note that here bsdinx is zero-based, hence the following loop + ! c + if (isglob_in) then + do j=1, nerv + sndbuf(bsdindx(proc+1)+j) = (index_in(i+j)) + end do + else + do j=1, nerv + sndbuf(bsdindx(proc+1)+j) = loc_to_glob(index_in(i+j)) + end do + endif + bsdindx(proc+1) = bsdindx(proc+1) + nerv + i = i + nerv + 1 + end do + + if (debug) then + write(0,*) me,' prepared send buffer ' + call blacs_barrier(icontxt,'all') + endif + !c + !c now have to regenerate bsdindx + !c + idxs = 0 + idxr = 0 + do i=1, length_dl + proc = dep_list(i) + bsdindx(proc+1) = idxs + idxs = idxs + sdsz(proc+1) + brvindx(proc+1) = idxr + idxr = idxr + rvsz(proc+1) + end do + + call mpi_alltoallv(sndbuf,sdsz,bsdindx,mpi_integer,& + & rcvbuf,rvsz,brvindx,mpi_integer,icomm,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='mpi_alltoallv') + goto 9999 + end if + + !c + !c at this point we can finally build the output desc_index. beware + !c of snd/rcv inversion. + !c + i = 1 + do k = 1, length_dl + proc = dep_list(k) + desc_index(i) = proc + i = i + 1 + nerv = sdsz(proc+1) + desc_index(i) = nerv + do j=1, nerv + desc_index(i+j) = glob_to_loc(sndbuf(bsdindx(proc+1)+j)) + end do + i = i + nerv + 1 + nesd = rvsz(proc+1) + desc_index(i) = nesd + do j=1, nesd + desc_index(i+j) = glob_to_loc(rcvbuf(brvindx(proc+1)+j)) + end do + i = i + nesd + 1 + end do + desc_index(i) = - 1 + + deallocate(sdsz,rvsz,bsdindx,brvindx,sndbuf,rcvbuf,stat=info) + if (info /= 0) then + info=4000 + call psb_errpush(info,name) + goto 9999 + end if + + if (debug) then + write(0,*) me,'end desc_index' + call blacs_barrier(icontxt,'all') + endif + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_serror(icontxt) + return + end if + return +end subroutine psi_desc_index diff --git a/src/internals/psi_dl_check.f90 b/src/internals/psi_dl_check.f90 new file mode 100644 index 00000000..881adbf4 --- /dev/null +++ b/src/internals/psi_dl_check.f90 @@ -0,0 +1,45 @@ +subroutine psi_dl_check(dep_list,dl_lda,np,length_dl) + + use psb_const_mod + implicit none + + integer :: np,dl_lda,length_dl(:) + integer :: dep_list(dl_lda,0:np-1) + ! locals + integer :: proc, proc2, i, j + +! ...i must order communication in in halo + +! ...if in dep_list of process i there is j +! and in dep_list of process j there isn't i, +! add to it process i... + + do proc=0,np-1 + i=1 + do while (i.le.length_dl(proc)) + proc2=dep_list(i,proc) + if (proc2.ne.psb_no_comm_) then + ! ...search proc in proc2's dep_list.... + j=1 + do while ((j.le.length_dl(proc2).and.& + & dep_list(j,proc2).ne.proc)) + j=j+1 + enddo + if ((dep_list(j,proc2).ne.proc).or.& + & (j.gt.length_dl(proc2))) then + + ! ...proc not found... + ! ...add proc to proc2's dep_list..... + length_dl(proc2)=length_dl(proc2)+1 + if (length_dl(proc2).gt.size(dep_list,1)) then + write(0,*)'error in crea_halo', proc2,& + & length_dl(proc2),'>',size(dep_list,1) + endif + dep_list(length_dl(proc2),proc2)=proc + endif + endif + i=i+1 + enddo + enddo + +end subroutine psi_dl_check diff --git a/src/internals/psi_dswapdata.f90 b/src/internals/psi_dswapdata.f90 new file mode 100644 index 00000000..17b771ae --- /dev/null +++ b/src/internals/psi_dswapdata.f90 @@ -0,0 +1,739 @@ +subroutine psi_dswapdatam(flag,n,beta,y,desc_a,work,info) + + use psb_error_mod + use psb_descriptor_type + implicit none + include 'mpif.h' + + integer, intent(in) :: flag, n + integer, intent(out) :: info + real(kind(1.d0)) :: y(:,:), beta + real(kind(1.d0)), target ::work(:) + type(psb_desc_type) :: desc_a + + ! locals + integer :: icontxt, nprow, npcol, myrow,& + & mycol, point_to_proc, nesd, nerv,& + & proc_to_comm, p2ptag, icomm, p2pstat,& + & idxs, idxr, iret, errlen, ifcomm, rank,& + & err_act, totxch, ixrec, i, lw, idx_pt,& + & snd_pt, rcv_pt + integer :: blacs_pnum, krecvid, ksendid + integer, pointer, dimension(:) :: bsdidx, brvidx,& + & sdsz, rvsz, prcid, ptp, rvhd, h_idx + integer :: int_err(5) + logical :: swap_mpi, swap_sync, swap_send, swap_recv + real(kind(1.d0)), pointer, dimension(:) :: sndbuf, rcvbuf + character(len=20) :: name, ch_err + + info = 0 + name='psi_dswap_data' + call psb_erractionsave(err_act) + + icontxt=desc_a%matrix_data(psb_ctxt_) + call blacs_gridinfo(icontxt,nprow,npcol,myrow,mycol) + if (nprow == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + call blacs_get(icontxt,10,icomm) + + allocate(sdsz(0:nprow-1), rvsz(0:nprow-1), bsdidx(0:nprow-1),& + & brvidx(0:nprow-1), rvhd(0:nprow-1), prcid(0:nprow-1),& + & ptp(0:nprow-1), stat=info) + if(info.ne.0) then + call psb_errpush(4000,name) + goto 9999 + end if + + swap_mpi = iand(flag,psb_swap_mpi_).ne.0 + swap_sync = iand(flag,psb_swap_sync_).ne.0 + swap_send = iand(flag,psb_swap_send_).ne.0 + swap_recv = iand(flag,psb_swap_recv_).ne.0 + h_idx => desc_a%halo_index + idxs = 0 + idxr = 0 + totxch = 0 + point_to_proc = 1 + rvhd(:) = mpi_request_null + + ! prepare info for communications + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm.ne.-1) + if(proc_to_comm .ne. myrow) totxch = totxch+1 + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + prcid(proc_to_comm) = blacs_pnum(icontxt,proc_to_comm,mycol) + ptp(proc_to_comm) = point_to_proc + + brvidx(proc_to_comm) = idxr + rvsz(proc_to_comm) = n*nerv + idxr = idxr+rvsz(proc_to_comm) + + bsdidx(proc_to_comm) = idxs + sdsz(proc_to_comm) = n*nesd + idxs = idxs+sdsz(proc_to_comm) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + if((idxr+idxs).lt.size(work)) then + sndbuf => work(1:idxs) + rcvbuf => work(idxs+1:idxs+idxr) + else + allocate(sndbuf(idxs),rcvbuf(idxr), stat=info) + if(info.ne.0) then + call psb_errpush(4000,name) + goto 9999 + end if + end if + + ! Case SWAP_MPI + if(swap_mpi) then + + ! gather elements into sendbuffer for swapping + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_gth(nesd,n,h_idx(idx_pt:idx_pt+nesd-1),& + & y,sndbuf(snd_pt:snd_pt+nesd*n-1)) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + ! swap elements using mpi_alltoallv + call mpi_alltoallv(sndbuf,sdsz,bsdidx,& + & mpi_double_precision,rcvbuf,rvsz,& + & brvidx,mpi_double_precision,icomm,iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + ! scatter elements from receivebuffer after swapping + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_sct(nerv,n,h_idx(idx_pt:idx_pt+nerv-1),& + & rcvbuf(rcv_pt:rcv_pt+n*nerv-1),beta,y) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + else if (swap_sync) then + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + if (proc_to_comm .lt. myrow) then + ! First I send + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_gth(nesd,n,h_idx(idx_pt:idx_pt+nesd-1),& + & y,sndbuf(snd_pt:snd_pt+nesd*n-1)) + call dgesd2d(icontxt,nesd,n,sndbuf(snd_pt),nesd,proc_to_comm,0) + ! Then I receive + rcv_pt = brvidx(proc_to_comm) + call dgerv2d(icontxt,nerv,n,rcvbuf(rcv_pt),nerv,proc_to_comm,0) + else if (proc_to_comm .gt. myrow) then + ! First I receive + rcv_pt = brvidx(proc_to_comm) + call dgerv2d(icontxt,nerv,n,rcvbuf(rcv_pt),nerv,proc_to_comm,0) + ! Then I send + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_gth(nesd,n,h_idx(idx_pt:idx_pt+nesd-1),& + & y,sndbuf(snd_pt:snd_pt+nesd*n-1)) + call dgesd2d(icontxt,nesd,n,sndbuf(snd_pt),nesd,proc_to_comm,0) + else if (proc_to_comm .eq. myrow) then + ! I send to myself + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_gth(nesd,n,h_idx(idx_pt:idx_pt+nesd-1),& + & y,sndbuf(snd_pt:snd_pt+nesd*n-1)) + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm.ne.myrow) then + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_sct(nerv,n,h_idx(idx_pt:idx_pt+nerv-1),& + & rcvbuf(rcv_pt:rcv_pt+n*nerv-1),beta,y) + else + idx_pt = point_to_proc+psb_elem_recv_ + snd_pt = bsdidx(proc_to_comm) + call psi_sct(nerv,n,h_idx(idx_pt:idx_pt+nerv-1),& + & sndbuf(snd_pt:snd_pt+n*nesd-1),beta,y) + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + else if (swap_send .and. swap_recv) then + + ! First I post all the non blocking receives + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm.ne.myrow) then + p2ptag = krecvid(icontxt,proc_to_comm,myrow) + rcv_pt = brvidx(proc_to_comm) + call mpi_irecv(rcvbuf(rcv_pt),rvsz(proc_to_comm),& + & mpi_double_precision,prcid(proc_to_comm),& + & p2ptag, icomm,rvhd(proc_to_comm),iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + ! Then I post all the blocking sends + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_gth(nesd,n,h_idx(idx_pt:idx_pt+nesd-1),& + & y,sndbuf(snd_pt:snd_pt+nesd*n-1)) + + if(proc_to_comm .ne. myrow) then + p2ptag=ksendid(icontxt,proc_to_comm,myrow) + call mpi_send(sndbuf(snd_pt),sdsz(proc_to_comm),& + & mpi_double_precision,prcid(proc_to_comm),& + & p2ptag,icomm,iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + end if + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + do i=1, totxch + call mpi_waitany(nprow,rvhd,ixrec,p2pstat,iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (ixrec .ne. mpi_undefined) then + ixrec=ixrec-1 ! mpi_waitany returns an 1 to nprow index + point_to_proc = ptp(ixrec) + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_sct(nerv,n,h_idx(idx_pt:idx_pt+nerv-1),& + & rcvbuf(rcv_pt:rcv_pt+n*nerv-1),beta,y) + else + int_err(1) = ixrec + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + end do + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm .eq. myrow) then + idx_pt = point_to_proc+psb_elem_recv_ + snd_pt = bsdidx(proc_to_comm) + call psi_sct(nerv,n,h_idx(idx_pt:idx_pt+nerv-1),& + & sndbuf(snd_pt:snd_pt+n*nesd-1),beta,y) + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + + + else if (swap_send) then + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_gth(nesd,n,h_idx(idx_pt:idx_pt+nesd-1),& + & y,sndbuf(snd_pt:snd_pt+nesd*n-1)) + call dgesd2d(icontxt,nesd,n,sndbuf(snd_pt),nesd,proc_to_comm,0) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + else if (swap_recv) then + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm.ne.myrow) then + rcv_pt = brvidx(proc_to_comm) + call dgerv2d(icontxt,nerv,n,rcvbuf(rcv_pt),nerv,proc_to_comm,0) + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_sct(nerv,n,h_idx(idx_pt:idx_pt+nerv-1),& + & rcvbuf(rcv_pt:rcv_pt+n*nerv-1),beta,y) + else + idx_pt = point_to_proc+psb_elem_recv_ + snd_pt = bsdidx(proc_to_comm) + call psi_sct(nerv,n,h_idx(idx_pt:idx_pt+nerv-1),& + & sndbuf(snd_pt:snd_pt+n*nesd-1),beta,y) + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return +end subroutine psi_dswapdatam + + + + + + + +subroutine psi_dswapdatav(flag,beta,y,desc_a,work,info) + + use psb_error_mod + use psb_descriptor_type + implicit none + include 'mpif.h' + + integer, intent(in) :: flag + integer, intent(out) :: info + real(kind(1.d0)) :: y(:), beta + real(kind(1.d0)), target :: work(:) + type(psb_desc_type) :: desc_a + + ! locals + integer :: icontxt, nprow, npcol, myrow,& + & mycol, point_to_proc, nesd, nerv,& + & proc_to_comm, p2ptag, icomm, p2pstat,& + & idxs, idxr, iret, errlen, ifcomm, rank,& + & err_act, totxch, ixrec, i, lw, idx_pt,& + & snd_pt, rcv_pt, n + + integer, pointer, dimension(:) :: bsdidx, brvidx,& + & sdsz, rvsz, prcid, ptp, rvhd, h_idx + integer :: blacs_pnum, krecvid, ksendid + integer :: int_err(5) + logical :: swap_mpi, swap_sync, swap_send, swap_recv + real(kind(1.d0)), pointer, dimension(:) :: sndbuf, rcvbuf + character(len=20) :: name, ch_err + + info = 0 + name='psi_dswap_datav' + call psb_erractionsave(err_act) + + icontxt=desc_a%matrix_data(psb_ctxt_) + call blacs_gridinfo(icontxt,nprow,npcol,myrow,mycol) + if (nprow == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + call blacs_get(icontxt,10,icomm) + + allocate(sdsz(0:nprow-1), rvsz(0:nprow-1), bsdidx(0:nprow-1),& + & brvidx(0:nprow-1), rvhd(0:nprow-1), prcid(0:nprow-1),& + & ptp(0:nprow-1), stat=info) + if(info.ne.0) then + call psb_errpush(4000,name) + goto 9999 + end if + + swap_mpi = iand(flag,psb_swap_mpi_).ne.0 + swap_sync = iand(flag,psb_swap_sync_).ne.0 + swap_send = iand(flag,psb_swap_send_).ne.0 + swap_recv = iand(flag,psb_swap_recv_).ne.0 + h_idx => desc_a%halo_index + idxs = 0 + idxr = 0 + totxch = 0 + point_to_proc = 1 + rvhd(:) = mpi_request_null + n=1 + + ! prepare info for communications + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm.ne.-1) + if(proc_to_comm .ne. myrow) totxch = totxch+1 + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + prcid(proc_to_comm) = blacs_pnum(icontxt,proc_to_comm,mycol) + ptp(proc_to_comm) = point_to_proc + + brvidx(proc_to_comm) = idxr + rvsz(proc_to_comm) = n*nerv + idxr = idxr+rvsz(proc_to_comm) + + bsdidx(proc_to_comm) = idxs + sdsz(proc_to_comm) = n*nesd + idxs = idxs+sdsz(proc_to_comm) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + if((idxr+idxs).lt.size(work)) then + sndbuf => work(1:idxs) + rcvbuf => work(idxs+1:idxs+idxr) + else + allocate(sndbuf(idxs),rcvbuf(idxr), stat=info) + if(info.ne.0) then + call psb_errpush(4000,name) + goto 9999 + end if + end if + + ! Case SWAP_MPI + if(swap_mpi) then + + ! gather elements into sendbuffer for swapping + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_gth(nesd,h_idx(idx_pt:idx_pt+nesd-1),& + & y,sndbuf(snd_pt:snd_pt+nesd-1)) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + ! swap elements using mpi_alltoallv + call mpi_alltoallv(sndbuf,sdsz,bsdidx,& + & mpi_double_precision,rcvbuf,rvsz,& + & brvidx,mpi_double_precision,icomm,iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + ! scatter elements from receivebuffer after swapping + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_sct(nerv,h_idx(idx_pt:idx_pt+nerv-1),& + & rcvbuf(rcv_pt:rcv_pt+nerv-1),beta,y) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + else if (swap_sync) then + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + if (proc_to_comm .lt. myrow) then + ! First I send + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_gth(nesd,h_idx(idx_pt:idx_pt+nesd-1),& + & y,sndbuf(snd_pt:snd_pt+nesd-1)) + call dgesd2d(icontxt,nesd,1,sndbuf(snd_pt),nesd,proc_to_comm,0) + ! Then I receive + rcv_pt = brvidx(proc_to_comm) + call dgerv2d(icontxt,nerv,1,rcvbuf(rcv_pt),nerv,proc_to_comm,0) + else if (proc_to_comm .gt. myrow) then + ! First I receive + rcv_pt = brvidx(proc_to_comm) + call dgerv2d(icontxt,nerv,1,rcvbuf(rcv_pt),nerv,proc_to_comm,0) + ! Then I send + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_gth(nesd,h_idx(idx_pt:idx_pt+nesd-1),& + & y,sndbuf(snd_pt:snd_pt+nesd-1)) + call dgesd2d(icontxt,nesd,1,sndbuf(snd_pt),nesd,proc_to_comm,0) + else if (proc_to_comm .eq. myrow) then + ! I send to myself + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_gth(nesd,h_idx(idx_pt:idx_pt+nesd-1),& + & y,sndbuf(snd_pt:snd_pt+nesd-1)) + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm.ne.myrow) then + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_sct(nerv,h_idx(idx_pt:idx_pt+nerv-1),& + & rcvbuf(rcv_pt:rcv_pt+nerv-1),beta,y) + else + idx_pt = point_to_proc+psb_elem_recv_ + snd_pt = bsdidx(proc_to_comm) + call psi_sct(nerv,h_idx(idx_pt:idx_pt+nerv-1),& + & sndbuf(snd_pt:snd_pt+nesd-1),beta,y) + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + else if (swap_send .and. swap_recv) then + + ! First I post all the non blocking receives + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm.ne.myrow) then + p2ptag = krecvid(icontxt,proc_to_comm,myrow) + rcv_pt = brvidx(proc_to_comm) + call mpi_irecv(rcvbuf(rcv_pt),rvsz(proc_to_comm),& + & mpi_double_precision,prcid(proc_to_comm),& + & p2ptag, icomm,rvhd(proc_to_comm),iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + ! Then I post all the blocking sends + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_gth(nesd,h_idx(idx_pt:idx_pt+nesd-1),& + & y,sndbuf(snd_pt:snd_pt+nesd-1)) + + if(proc_to_comm .ne. myrow) then + p2ptag=ksendid(icontxt,proc_to_comm,myrow) + call mpi_send(sndbuf(snd_pt),sdsz(proc_to_comm),& + & mpi_double_precision,prcid(proc_to_comm),& + & p2ptag,icomm,iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + end if + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + do i=1, totxch + call mpi_waitany(nprow,rvhd,ixrec,p2pstat,iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (ixrec .ne. mpi_undefined) then + ixrec=ixrec-1 ! mpi_waitany returns an 1 to nprow index + point_to_proc = ptp(ixrec) + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+psb_elem_recv_ + snd_pt = bsdidx(proc_to_comm) + call psi_sct(nerv,h_idx(idx_pt:idx_pt+nerv-1),& + & sndbuf(snd_pt:snd_pt+nesd-1),beta,y) + else + int_err(1) = ixrec + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + end do + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm .eq. myrow) then + idx_pt = point_to_proc+psb_elem_recv_ + snd_pt = bsdidx(proc_to_comm) + call psi_sct(nerv,h_idx(idx_pt:idx_pt+nerv-1),& + & sndbuf(snd_pt:snd_pt+nesd-1),beta,y) + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + + + else if (swap_send) then + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_gth(nesd,h_idx(idx_pt:idx_pt+nesd-1),& + & y,sndbuf(snd_pt:snd_pt+nesd-1)) + call dgesd2d(icontxt,nesd,1,sndbuf(snd_pt),nesd,proc_to_comm,0) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + else if (swap_recv) then + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm.ne.myrow) then + rcv_pt = brvidx(proc_to_comm) + call dgerv2d(icontxt,nerv,1,rcvbuf(rcv_pt),nerv,proc_to_comm,0) + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_sct(nerv,h_idx(idx_pt:idx_pt+nerv-1),& + & rcvbuf(rcv_pt:rcv_pt+nerv-1),beta,y) + else + idx_pt = point_to_proc+psb_elem_recv_ + snd_pt = bsdidx(proc_to_comm) + call psi_sct(nerv,h_idx(idx_pt:idx_pt+nerv-1),& + & sndbuf(snd_pt:snd_pt+nesd-1),beta,y) + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return +end subroutine psi_dswapdatav diff --git a/src/internals/psi_dswaptran.f90 b/src/internals/psi_dswaptran.f90 new file mode 100644 index 00000000..8f3d9ec1 --- /dev/null +++ b/src/internals/psi_dswaptran.f90 @@ -0,0 +1,735 @@ +subroutine psi_dswaptranm(flag,n,beta,y,desc_a,work,info) + + use psb_error_mod + use psb_descriptor_type + implicit none + include 'mpif.h' + + integer, intent(in) :: flag, n + integer, intent(out) :: info + real(kind(1.d0)) :: y(:,:), beta + real(kind(1.d0)), target :: work(:) + type(psb_desc_type) :: desc_a + + ! locals + integer :: icontxt, nprow, npcol, myrow,& + & mycol, point_to_proc, nesd, nerv,& + & proc_to_comm, p2ptag, icomm, p2pstat,& + & idxs, idxr, iret, errlen, ifcomm, rank,& + & err_act, totxch, ixrec, i, lw, idx_pt,& + & snd_pt, rcv_pt + + integer, pointer, dimension(:) :: bsdidx, brvidx,& + & sdsz, rvsz, prcid, ptp, rvhd, h_idx + integer :: int_err(5) + integer :: blacs_pnum, krecvid, ksendid + logical :: swap_mpi, swap_sync, swap_send, swap_recv + real(kind(1.d0)), pointer, dimension(:) :: sndbuf, rcvbuf + character(len=20) :: name, ch_err + + info = 0 + name='psi_dswaptranm' + call psb_erractionsave(err_act) + + icontxt=desc_a%matrix_data(psb_ctxt_) + call blacs_gridinfo(icontxt,nprow,npcol,myrow,mycol) + if (nprow == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + call blacs_get(icontxt,10,icomm) + + allocate(sdsz(0:nprow-1), rvsz(0:nprow-1), bsdidx(0:nprow-1),& + & brvidx(0:nprow-1), rvhd(0:nprow-1), prcid(0:nprow-1),& + & ptp(0:nprow-1), stat=info) + if(info.ne.0) then + call psb_errpush(4000,name) + goto 9999 + end if + + swap_mpi = iand(flag,psb_swap_mpi_).ne.0 + swap_sync = iand(flag,psb_swap_sync_).ne.0 + swap_send = iand(flag,psb_swap_send_).ne.0 + swap_recv = iand(flag,psb_swap_recv_).ne.0 + h_idx => desc_a%halo_index + idxs = 0 + idxr = 0 + totxch = 0 + point_to_proc = 1 + rvhd(:) = mpi_request_null + + ! prepare info for communications + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm.ne.-1) + if(proc_to_comm .ne. myrow) totxch = totxch+1 + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + prcid(proc_to_comm) = blacs_pnum(icontxt,proc_to_comm,mycol) + ptp(proc_to_comm) = point_to_proc + + brvidx(proc_to_comm) = idxr + rvsz(proc_to_comm) = n*nerv + idxr = idxr+rvsz(proc_to_comm) + + bsdidx(proc_to_comm) = idxs + sdsz(proc_to_comm) = n*nesd + idxs = idxs+sdsz(proc_to_comm) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + if((idxr+idxs).lt.size(work)) then + sndbuf => work(1:idxs) + rcvbuf => work(idxs+1:idxs+idxr) + else + allocate(sndbuf(idxs),rcvbuf(idxr), stat=info) + if(info.ne.0) then + call psb_errpush(4000,name) + goto 9999 + end if + end if + + ! Case SWAP_MPI + if(swap_mpi) then + + ! gather elements into sendbuffer for swapping + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_gth(nerv,n,h_idx(idx_pt:idx_pt+nerv-1),& + & y,rcvbuf(rcv_pt:rcv_pt+nerv*n-1)) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + ! swap elements using mpi_alltoallv + call mpi_alltoallv(rcvbuf,rvsz,brvidx,& + & mpi_double_precision,sndbuf,sdsz,& + & bsdidx,mpi_double_precision,icomm,iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + ! scatter elements from receivebuffer after swapping + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_sct(nesd,n,h_idx(idx_pt:idx_pt+nesd-1),& + & sndbuf(snd_pt:snd_pt+n*nesd-1),beta,y) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + else if (swap_sync) then + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + if (proc_to_comm .lt. myrow) then + ! First I send + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_gth(nerv,n,h_idx(idx_pt:idx_pt+nerv-1),& + & y,rcvbuf(rcv_pt:rcv_pt+nerv*n-1)) + call dgesd2d(icontxt,nerv,n,rcvbuf(rcv_pt),nerv,proc_to_comm,0) + ! Then I receive + snd_pt = brvidx(proc_to_comm) + call dgerv2d(icontxt,nesd,n,sndbuf(snd_pt),nesd,proc_to_comm,0) + else if (proc_to_comm .gt. myrow) then + ! First I receive + snd_pt = bsdidx(proc_to_comm) + call dgerv2d(icontxt,nesd,n,sndbuf(snd_pt),nesd,proc_to_comm,0) + ! Then I send + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_gth(nerv,n,h_idx(idx_pt:idx_pt+nerv-1),& + & y,rcvbuf(rcv_pt:rcv_pt+nerv*n-1)) + call dgesd2d(icontxt,nerv,n,rcvbuf(rcv_pt),nerv,proc_to_comm,0) + else if (proc_to_comm .eq. myrow) then + ! I send to myself + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = bsdidx(proc_to_comm) + call psi_gth(nerv,n,h_idx(idx_pt:idx_pt+nerv-1),& + & y,rcvbuf(rcv_pt:rcv_pt+nerv*n-1)) + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm.ne.myrow) then + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_sct(nesd,n,h_idx(idx_pt:idx_pt+nesd-1),& + & sndbuf(snd_pt:snd_pt+n*nesd-1),beta,y) + else + idx_pt = point_to_proc+nerv+psb_elem_send_ + rcv_pt = brvidx(proc_to_comm) + call psi_sct(nesd,n,h_idx(idx_pt:idx_pt+nesd-1),& + & rcvbuf(rcv_pt:rcv_pt+n*nerv-1),beta,y) + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + else if (swap_send .and. swap_recv) then + + ! First I post all the non blocking receives + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm.ne.myrow) then + p2ptag = krecvid(icontxt,proc_to_comm,myrow) + snd_pt = brvidx(proc_to_comm) + call mpi_irecv(sndbuf(rcv_pt),sdsz(proc_to_comm),& + & mpi_double_precision,prcid(proc_to_comm),& + & p2ptag, icomm,rvhd(proc_to_comm),iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + ! Then I post all the blocking sends + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_gth(nerv,n,h_idx(idx_pt:idx_pt+nerv-1),& + & y,rcvbuf(rcv_pt:rcv_pt+nerv*n-1)) + + if(proc_to_comm .ne. myrow) then + p2ptag=ksendid(icontxt,proc_to_comm,myrow) + call mpi_send(rcvbuf(rcv_pt),rvsz(proc_to_comm),& + & mpi_double_precision,prcid(proc_to_comm),& + & p2ptag,icomm,iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + end if + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + do i=1, totxch + call mpi_waitany(nprow,rvhd,ixrec,p2pstat,iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (ixrec .ne. mpi_undefined) then + ixrec=ixrec-1 ! mpi_waitany returns an 1 to nprow index + point_to_proc = ptp(ixrec) + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_sct(nesd,n,h_idx(idx_pt:idx_pt+nesd-1),& + & sndbuf(snd_pt:snd_pt+n*nesd-1),beta,y) + else + int_err(1) = ixrec + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + end do + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm .eq. myrow) then + idx_pt = point_to_proc+nerv+psb_elem_send_ + rcv_pt = brvidx(proc_to_comm) + call psi_sct(nesd,n,h_idx(idx_pt:idx_pt+nesd-1),& + & rcvbuf(rcv_pt:rcv_pt+n*nerv-1),beta,y) + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + else if (swap_send) then + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_gth(nerv,n,h_idx(idx_pt:idx_pt+nerv-1),& + & y,rcvbuf(rcv_pt:rcv_pt+nerv*n-1)) + call dgesd2d(icontxt,nerv,n,rcvbuf(rcv_pt),nerv,proc_to_comm,0) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + else if (swap_recv) then + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm.ne.myrow) then + snd_pt = bsdidx(proc_to_comm) + call dgerv2d(icontxt,nesd,n,sndbuf(snd_pt),nesd,proc_to_comm,0) + idx_pt = point_to_proc+nerv+psb_elem_send_ + call psi_sct(nesd,n,h_idx(idx_pt:idx_pt+nesd-1),& + & sndbuf(snd_pt:snd_pt+n*nesd-1),beta,y) + else + idx_pt = point_to_proc+nerv+psb_elem_send_ + rcv_pt = brvidx(proc_to_comm) + call psi_sct(nesd,n,h_idx(idx_pt:idx_pt+nesd-1),& + & rcvbuf(rcv_pt:rcv_pt+n*nerv-1),beta,y) + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return +end subroutine psi_dswaptranm + + + + + + + +subroutine psi_dswaptranv(flag,beta,y,desc_a,work,info) + + use psb_error_mod + use psb_descriptor_type + implicit none + include 'mpif.h' + + integer, intent(in) :: flag + integer, intent(out) :: info + real(kind(1.d0)) :: y(:), beta + real(kind(1.d0)), target :: work(:) + type(psb_desc_type) :: desc_a + + ! locals + integer :: icontxt, nprow, npcol, myrow,& + & mycol, point_to_proc, nesd, nerv,& + & proc_to_comm, p2ptag, icomm, p2pstat,& + & idxs, idxr, iret, errlen, ifcomm, rank,& + & err_act, totxch, ixrec, i, lw, idx_pt,& + & snd_pt, rcv_pt, n + + integer, pointer, dimension(:) :: bsdidx, brvidx,& + & sdsz, rvsz, prcid, ptp, rvhd, h_idx + integer :: int_err(5) + integer :: blacs_pnum, krecvid, ksendid + logical :: swap_mpi, swap_sync, swap_send, swap_recv + real(kind(1.d0)), pointer, dimension(:) :: sndbuf, rcvbuf + character(len=20) :: name, ch_err + + info = 0 + name='psi_dswaptranv' + call psb_erractionsave(err_act) + + icontxt=desc_a%matrix_data(psb_ctxt_) + call blacs_gridinfo(icontxt,nprow,npcol,myrow,mycol) + if (nprow == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + call blacs_get(icontxt,10,icomm) + + allocate(sdsz(0:nprow-1), rvsz(0:nprow-1), bsdidx(0:nprow-1),& + & brvidx(0:nprow-1), rvhd(0:nprow-1), prcid(0:nprow-1),& + & ptp(0:nprow-1), stat=info) + if(info.ne.0) then + call psb_errpush(4000,name) + goto 9999 + end if + + swap_mpi = iand(flag,psb_swap_mpi_).ne.0 + swap_sync = iand(flag,psb_swap_sync_).ne.0 + swap_send = iand(flag,psb_swap_send_).ne.0 + swap_recv = iand(flag,psb_swap_recv_).ne.0 + h_idx => desc_a%halo_index + idxs = 0 + idxr = 0 + totxch = 0 + point_to_proc = 1 + rvhd(:) = mpi_request_null + n=1 + + ! prepare info for communications + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm.ne.-1) + if(proc_to_comm .ne. myrow) totxch = totxch+1 + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + prcid(proc_to_comm) = blacs_pnum(icontxt,proc_to_comm,mycol) + ptp(proc_to_comm) = point_to_proc + + brvidx(proc_to_comm) = idxr + rvsz(proc_to_comm) = nerv + idxr = idxr+rvsz(proc_to_comm) + + bsdidx(proc_to_comm) = idxs + sdsz(proc_to_comm) = nesd + idxs = idxs+sdsz(proc_to_comm) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + if((idxr+idxs).lt.size(work)) then + sndbuf => work(1:idxs) + rcvbuf => work(idxs+1:idxs+idxr) + else + allocate(sndbuf(idxs),rcvbuf(idxr), stat=info) + if(info.ne.0) then + call psb_errpush(4000,name) + goto 9999 + end if + end if + + ! Case SWAP_MPI + if(swap_mpi) then + + ! gather elements into sendbuffer for swapping + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_gth(nerv,h_idx(idx_pt:idx_pt+nerv-1),& + & y,rcvbuf(rcv_pt:rcv_pt+nerv-1)) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + ! swap elements using mpi_alltoallv + call mpi_alltoallv(rcvbuf,rvsz,brvidx,& + & mpi_double_precision,sndbuf,sdsz,& + & bsdidx,mpi_double_precision,icomm,iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + ! scatter elements from receivebuffer after swapping + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_sct(nesd,h_idx(idx_pt:idx_pt+nesd-1),& + & sndbuf(snd_pt:snd_pt+nesd-1),beta,y) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + else if (swap_sync) then + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + if (proc_to_comm .lt. myrow) then + ! First I send + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_gth(nerv,h_idx(idx_pt:idx_pt+nerv-1),& + & y,rcvbuf(rcv_pt:rcv_pt+nerv-1)) + call dgesd2d(icontxt,nerv,1,rcvbuf(rcv_pt),nerv,proc_to_comm,0) + ! Then I receive + snd_pt = brvidx(proc_to_comm) + call dgerv2d(icontxt,nesd,1,sndbuf(snd_pt),nesd,proc_to_comm,0) + else if (proc_to_comm .gt. myrow) then + ! First I receive + snd_pt = bsdidx(proc_to_comm) + call dgerv2d(icontxt,nesd,1,sndbuf(snd_pt),nesd,proc_to_comm,0) + ! Then I send + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_gth(nerv,h_idx(idx_pt:idx_pt+nerv-1),& + & y,rcvbuf(rcv_pt:rcv_pt+nerv-1)) + call dgesd2d(icontxt,nerv,1,rcvbuf(rcv_pt),nerv,proc_to_comm,0) + else if (proc_to_comm .eq. myrow) then + ! I send to myself + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = bsdidx(proc_to_comm) + call psi_gth(nerv,h_idx(idx_pt:idx_pt+nerv-1),& + & y,rcvbuf(rcv_pt:rcv_pt+nerv-1)) + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm.ne.myrow) then + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_sct(nesd,h_idx(idx_pt:idx_pt+nesd-1),& + & sndbuf(snd_pt:snd_pt+nesd-1),beta,y) + else + idx_pt = point_to_proc+nerv+psb_elem_send_ + rcv_pt = brvidx(proc_to_comm) + call psi_sct(nesd,h_idx(idx_pt:idx_pt+nesd-1),& + & rcvbuf(rcv_pt:rcv_pt+nerv-1),beta,y) + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + else if (swap_send .and. swap_recv) then + + ! First I post all the non blocking receives + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm.ne.myrow) then + p2ptag = krecvid(icontxt,proc_to_comm,myrow) + snd_pt = brvidx(proc_to_comm) + call mpi_irecv(sndbuf(snd_pt),sdsz(proc_to_comm),& + & mpi_double_precision,prcid(proc_to_comm),& + & p2ptag, icomm,rvhd(proc_to_comm),iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + ! Then I post all the blocking sends + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_gth(nerv,h_idx(idx_pt:idx_pt+nerv-1),& + & y,rcvbuf(rcv_pt:rcv_pt+nerv-1)) + + if(proc_to_comm .ne. myrow) then + p2ptag=ksendid(icontxt,proc_to_comm,myrow) + call mpi_send(rcvbuf(rcv_pt),rvsz(proc_to_comm),& + & mpi_double_precision,prcid(proc_to_comm),& + & p2ptag,icomm,iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + end if + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + do i=1, totxch + call mpi_waitany(nprow,rvhd,ixrec,p2pstat,iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (ixrec .ne. mpi_undefined) then + ixrec=ixrec-1 ! mpi_waitany returns an 1 to nprow index + point_to_proc = ptp(ixrec) + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+nerv+psb_elem_send_ + rcv_pt = bsdidx(proc_to_comm) + call psi_sct(nesd,h_idx(idx_pt:idx_pt+nesd-1),& + & rcvbuf(rcv_pt:rcv_pt+nerv-1),beta,y) + else + int_err(1) = ixrec + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + end do + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm .eq. myrow) then + idx_pt = point_to_proc+nerv+psb_elem_send_ + rcv_pt = brvidx(proc_to_comm) + call psi_sct(nesd,h_idx(idx_pt:idx_pt+nesd-1),& + & rcvbuf(rcv_pt:rcv_pt+nerv-1),beta,y) + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + else if (swap_send) then + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_gth(nerv,h_idx(idx_pt:idx_pt+nerv-1),& + & y,rcvbuf(rcv_pt:rcv_pt+nerv-1)) + call dgesd2d(icontxt,nerv,1,rcvbuf(rcv_pt),nerv,proc_to_comm,0) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + else if (swap_recv) then + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm.ne.myrow) then + snd_pt = bsdidx(proc_to_comm) + call dgerv2d(icontxt,nesd,1,sndbuf(snd_pt),nesd,proc_to_comm,0) + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_sct(nesd,h_idx(idx_pt:idx_pt+nesd-1),& + & sndbuf(snd_pt:snd_pt+nesd-1),beta,y) + else + idx_pt = point_to_proc+nerv+psb_elem_send_ + rcv_pt = brvidx(proc_to_comm) + call psi_sct(nesd,h_idx(idx_pt:idx_pt+nesd-1),& + & rcvbuf(rcv_pt:rcv_pt+nerv-1),beta,y) + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return +end subroutine psi_dswaptranv diff --git a/src/internals/psi_exist_ovr_elem.f b/src/internals/psi_exist_ovr_elem.f new file mode 100644 index 00000000..ff1468cb --- /dev/null +++ b/src/internals/psi_exist_ovr_elem.f @@ -0,0 +1,43 @@ + INTEGER FUNCTION PSI_EXIST_OVR_ELEM(OVR_ELEM, + + DIM_LIST,ELEM_SEARCHED) + +C PURPOSE: +C ======= +C +C If ELEM_SEARCHED exist in the list OVR_ELEM returns its position in +C the list, else returns -1 +C +C +C INPUT +C ====== +C OVRLAP_ELEMENT_D.: Contains for all overlap points belonging to +C the current process: +C 1. overlap point index +C 2. Number of domains sharing that overlap point +C the end is marked by a -1............................... +C +C DIM_LIST..........: Dimension of list OVRLAP_ELEMENT_D +C +C ELEM_SEARCHED.....:point's Local index identifier to be searched. + + IMPLICIT NONE +C ...Array Parameters.... + INTEGER OVR_ELEM(*) + +C ....Scalars parameters.... + INTEGER DIM_LIST,ELEM_SEARCHED + +C ...Local Scalars.... + INTEGER I + + I=1 + DO WHILE ((I.LE.DIM_LIST).AND.(OVR_ELEM(I).NE.ELEM_SEARCHED)) + I=I+2 + ENDDO + IF ((I.LE.DIM_LIST).AND.(OVR_ELEM(I).EQ.ELEM_SEARCHED)) THEN + PSI_EXIST_OVR_ELEM=I + ELSE + PSI_EXIST_OVR_ELEM=-1 + ENDIF + END + diff --git a/src/internals/psi_extrct_dl.f b/src/internals/psi_extrct_dl.f new file mode 100644 index 00000000..ece5fdfa --- /dev/null +++ b/src/internals/psi_extrct_dl.f @@ -0,0 +1,266 @@ + subroutine psi_extract_dep_list(desc_data, + + desc_str,dep_list, + + length_dl,np,dl_lda,mode,info) + +c internal routine +c ================ +c +c _____called by psi_crea_halo and psi_crea_ovrlap ______ +c +c purpose +c ======= +c process root (pid=0) extracts for each process "k" the ordered list of process +c to which "k" must communicate. this list with its order is extracted from +c desc_str list +c +c +c input +c ======= +c desc_data :integer array +c explanation: +c name explanation +c ------------------ ------------------------------------------------------- +c desc_data array of integer that contains some local and global +c information of matrix. +c +c +c now we explain each of the above vectors. +c +c let a be a generic sparse matrix. we denote with matdata_a the matrix_data +c array for matrix a. +c data stored in matrix_data array are: +c +c notation stored in explanation +c --------------- ---------------------- ------------------------------------- +c dec_type matdata_a[psb_dec_type_] decomposition type +c m matdata_a[m_] total number of equations +c n matdata_a[n_] total number of variables +c n_row matdata_a[psb_n_row_] number of local equations +c n_col matdata_a[psb_n_col_] number of local variables +c psb_ctxt_a matdata_a[ctxt_] the blacs context handle, indicating +c the global context of the operation +c on the matrix. +c the context itself is global. +c desc_str integer array +c explanation: +c let desc_str_p be the array desc_str for local process. +c this is composed of variable dimension blocks for each process to +c communicate to. +c each block contain indexes of local halo elements to exchange with other +c process. +c let p be the pointer to the first element of a block in desc_str_p. +c this block is stored in desc_str_p as : +c +c notation stored in explanation +c --------------- --------------------------- ----------------------------------- +c process_id desc_str_p[p+psb_proc_id_] identifier of process which exchange +c data with. +c n_elements_recv desc_str_p[p+n_elem_recv_] number of elements to receive. +c elements_recv desc_str_p[p+elem_recv_+i] indexes of local elements to +c receive. these are stored in the +c array from location p+elem_recv_ to +c location p+elem_recv_+ +c desc_str_p[p+n_elem_recv_]-1. +c if desc_data(psb_dec_type_) == 0 +c then also will be: +c n_elements_send desc_str_p[p+n_elem_send_] number of elements to send. +c elements_send desc_str_p[p+elem_send_+i] indexes of local elements to +c send. these are stored in the +c array from location p+elem_send_ to +c location p+elem_send_+ +c desc_str_p[p+n_elem_send_]-1. +c list is ended by -1 value +c +c np integer (global input) +c number of grid process. +c +c mode integer (global input) +c if mode =0 then will be inserted also duplicate element in +c a same dependence list +c if mode =1 then not will be inserted duplicate element in +c a same dependence list +c output +c ===== +c only for root (pid=0) process: +c dep_list integer array(dl_lda,0:np) +c dependence list dep_list(*,i) is the list of process identifiers to which process i +c must communicate with. this list with its order is extracted from +c desc_str list. +c length_dl integer array(0:np) +c length_dl(i) is the length of dep_list(*,i) list + + implicit none + include 'psb_const.fh' + include 'mpif.h' +c ....scalar parameters... + integer np,dl_lda,mode, info + +c ....array parameters.... + integer desc_str(*),desc_data(*), + + dep_list(dl_lda,0:np),length_dl(0:np) + integer, pointer :: itmp(:) +c .....local arrays.... + integer int_err(5) + double precision real_err(5) + +c .....local scalars... + integer i,nprow,npcol,me,mycol,pointer_dep_list,proc,j,err_act + integer icontxt, err, icomm + logical debug + parameter (debug=.false.) + character name*20 + name='psi_extrct_dl' + call fcpsb_get_erraction(err_act) + + info = 0 + icontxt = desc_data(psb_ctxt_) + + + call blacs_gridinfo(icontxt,nprow,npcol,me,mycol) + do i=0,np + length_dl(i) = 0 + enddo + i=1 + if (debug) write(0,*) 'extract: info ',info, + + desc_data(psb_dec_type_) + pointer_dep_list=1 + if (desc_data(psb_dec_type_).eq.psb_desc_bld_) then + do while (desc_str(i).ne.-1) + if (debug) write(0,*) me,' extract: looping ',i, + + desc_str(i),desc_str(i+1),desc_str(i+2) + +c ...with different decomposition type we have different +c structure of indices lists............................ + if ((desc_str(i+1).ne.0).or.(desc_str(i+2).ne.0)) then +c ..if number of element to be exchanged !=0 + proc=desc_str(i) + if ((proc.lt.0).or.(proc.ge.nprow)) then + if (debug) write(0,*) 'extract error ',i,desc_str(i) + info = 3999 + goto 998 + endif + if (mode.eq.1) then +c ...search if already exist proc +c in dep_list(*,me)... + j=1 + do while ((j.lt.pointer_dep_list).and. + + (dep_list(j,me).ne.proc)) + j=j+1 + enddo + + if (j.eq.pointer_dep_list) then +c ...if not found..... + dep_list(pointer_dep_list,me)=proc + pointer_dep_list=pointer_dep_list+1 + endif + else if (mode.eq.0) then + if (pointer_dep_list.gt.dl_lda) then + info = 4000 + goto 998 + endif + dep_list(pointer_dep_list,me)=proc + pointer_dep_list=pointer_dep_list+1 + endif + endif + i=i+desc_str(i+1)+2 + enddo + else if (desc_data(psb_dec_type_).eq.psb_desc_upd_) then + do while (desc_str(i).ne.-1) + if (debug) write(0,*) 'extract: looping ',i,desc_str(i) + +c ...with different decomposition type we have different +c structure of indices lists............................ + if (desc_str(i+1).ne.0) then + + proc=desc_str(i) +c ..if number of element to be exchanged !=0 + + if (mode.eq.1) then +c ...search if already exist proc.... + j=1 + do while ((j.lt.pointer_dep_list).and. + + (dep_list(j,me).ne.proc)) + j=j+1 + enddo + if (j.eq.pointer_dep_list) then +c ...if not found..... + if (pointer_dep_list.gt.dl_lda) then + info = 4000 + goto 998 + endif + dep_list(pointer_dep_list,me)=proc + pointer_dep_list=pointer_dep_list+1 + endif + else if (mode.eq.0) then + if (pointer_dep_list.gt.dl_lda) then + info = 4000 + goto 998 + endif + dep_list(pointer_dep_list,me)=proc + pointer_dep_list=pointer_dep_list+1 + endif + endif + i=i+desc_str(i+1)+2 + enddo + else + write(0,*) 'invalid dec_type',desc_data(psb_dec_type_) + info = 2020 + endif + + length_dl(me)=pointer_dep_list-1 + +c ... check for errors... + 998 continue + if (debug) write(0,*) 'extract: info ',info + err = info +c$$$ call igamx2d(icontxt, all, topdef, ione, ione, err, ione, +c$$$ + i, i, -ione ,-ione,-ione) + + if (err.ne.0) goto 9999 + + if (.true.) then + call igsum2d(icontxt,'all',' ',np+1,1,length_dl,np+1,-1,-1) + call blacs_get(icontxt,10,icomm ) + allocate(itmp(dl_lda)) + itmp(1:dl_lda) = dep_list(1:dl_lda,me) + call mpi_allgather(itmp,dl_lda,mpi_integer, + + dep_list,dl_lda,mpi_integer,icomm,info) + deallocate(itmp) + + else + + if (me.eq.root) then + do proc=0,np-1 + if (proc.ne.root) then + if (debug) write(0,*) 'receiving from: ',proc +c ...receive from proc length of its dependence list.... + call igerv2d(icontxt,1,1,length_dl(proc),1, + + proc,mycol) + +c ...receive from proc its dependence list.... + call igerv2d(icontxt,length_dl(proc),1, + + dep_list(1,proc),length_dl(proc),proc,mycol) + + endif + enddo + else if (me.ne.root) then +c ...send to root dependence list length..... + if (debug) write(0,*) 'sending to: ',me,root + call igesd2d(icontxt,1,1,length_dl(me),1,root,mycol) + if (debug) write(0,*) 'sending to: ',me,root +c ...send to root dependence list.... + call igesd2d(icontxt,length_dl(me),1,dep_list(1,me), + + length_dl(me),root,mycol) + + endif + end if + return + + 9999 continue + call fcpsb_errpush(info,name,int_err) + if(err_act.eq.act_abort) then + call fcpsb_perror(icontxt) + endif + return + + end diff --git a/src/internals/psi_gthsct.f90 b/src/internals/psi_gthsct.f90 new file mode 100644 index 00000000..3c6225b5 --- /dev/null +++ b/src/internals/psi_gthsct.f90 @@ -0,0 +1,202 @@ +subroutine psi_dgthm(n,k,idx,x,y) + + implicit none + + integer :: n, k, idx(:) + real(kind(1.d0)) :: x(:,:), y(:) + + ! Locals + integer :: i, j, pt + + pt=0 + do j=1,k + do i=1,n + pt=pt+1 + y(pt)=x(idx(i),j) + end do + end do + +end subroutine psi_dgthm + + +subroutine psi_dgthv(n,idx,x,y) + + implicit none + + integer :: n, idx(:) + real(kind(1.d0)) :: x(:), y(:) + + ! Locals + integer :: i, j + + do i=1,n + y(i)=x(idx(i)) + end do + +end subroutine psi_dgthv + + +subroutine psi_dsctm(n,k,idx,x,beta,y) + + implicit none + + integer :: n, k, idx(:) + real(kind(1.d0)) :: beta, x(:), y(:,:) + + ! Locals + integer :: i, j, pt + + if (beta.eq.0.d0) then + pt=0 + do j=1,k + do i=1,n + pt=pt+1 + y(idx(i),j) = x(pt) + end do + end do + else if (beta.eq.1.d0) then + pt=0 + do j=1,k + do i=1,n + pt=pt+1 + y(idx(i),j) = y(idx(i),j)+x(pt) + end do + end do + else + pt=0 + do j=1,k + do i=1,n + pt=pt+1 + y(idx(i),j) = beta*y(idx(i),j)+x(pt) + end do + end do + end if +end subroutine psi_dsctm + +subroutine psi_dsctv(n,idx,x,beta,y) + + implicit none + + integer :: n, k, idx(:) + real(kind(1.d0)) :: beta, x(:), y(:) + + ! Locals + integer :: i, j, pt + + if (beta.eq.0.d0) then + do i=1,n + y(idx(i)) = x(i) + end do + else if (beta.eq.1.d0) then + do i=1,n + y(idx(i)) = y(idx(i))+x(i) + end do + else + do i=1,n + y(idx(i)) = beta*y(idx(i))+x(i) + end do + end if +end subroutine psi_dsctv + + + + +subroutine psi_igthm(n,k,idx,x,y) + + implicit none + + integer :: n, k, idx(:) + integer :: x(:,:), y(:) + + ! Locals + integer :: i, j, pt + + pt=0 + do j=1,k + do i=1,n + pt=pt+1 + y(pt)=x(idx(i),j) + end do + end do + +end subroutine psi_igthm + + +subroutine psi_igthv(n,idx,x,y) + + implicit none + + integer :: n, idx(:) + integer :: x(:), y(:) + + ! Locals + integer :: i, j + + do i=1,n + y(i)=x(idx(i)) + end do + +end subroutine psi_igthv + + +subroutine psi_isctm(n,k,idx,x,beta,y) + + implicit none + + integer :: n, k, idx(:) + integer :: beta, x(:), y(:,:) + + ! Locals + integer :: i, j, pt + + if (beta.eq.0.d0) then + pt=0 + do j=1,k + do i=1,n + pt=pt+1 + y(idx(i),j) = x(pt) + end do + end do + else if (beta.eq.1.d0) then + pt=0 + do j=1,k + do i=1,n + pt=pt+1 + y(idx(i),j) = y(idx(i),j)+x(pt) + end do + end do + else + pt=0 + do j=1,k + do i=1,n + pt=pt+1 + y(idx(i),j) = beta*y(idx(i),j)+x(pt) + end do + end do + end if +end subroutine psi_isctm + +subroutine psi_isctv(n,idx,x,beta,y) + + implicit none + + integer :: n, k, idx(:) + integer :: beta, x(:), y(:) + + ! Locals + integer :: i, j, pt + + if (beta.eq.0.d0) then + do i=1,n + y(idx(i)) = x(i) + end do + else if (beta.eq.1.d0) then + do i=1,n + y(idx(i)) = y(idx(i))+x(i) + end do + else + do i=1,n + y(idx(i)) = beta*y(idx(i))+x(i) + end do + end if +end subroutine psi_isctv diff --git a/src/internals/psi_iswapdata.f90 b/src/internals/psi_iswapdata.f90 new file mode 100644 index 00000000..d17efecb --- /dev/null +++ b/src/internals/psi_iswapdata.f90 @@ -0,0 +1,739 @@ +subroutine psi_iswapdatam(flag,n,beta,y,desc_a,work,info) + + use psb_error_mod + use psb_descriptor_type + implicit none + include 'mpif.h' + + integer, intent(in) :: flag, n + integer, intent(out) :: info + integer :: y(:,:), beta + integer, target ::work(:) + type(psb_desc_type) :: desc_a + + ! locals + integer :: icontxt, nprow, npcol, myrow,& + & mycol, point_to_proc, nesd, nerv,& + & proc_to_comm, p2ptag, icomm, p2pstat,& + & idxs, idxr, iret, errlen, ifcomm, rank,& + & err_act, totxch, ixrec, i, lw, idx_pt,& + & snd_pt, rcv_pt + integer :: blacs_pnum, krecvid, ksendid + integer, pointer, dimension(:) :: bsdidx, brvidx,& + & sdsz, rvsz, prcid, ptp, rvhd, h_idx + integer :: int_err(5) + logical :: swap_mpi, swap_sync, swap_send, swap_recv + integer, pointer, dimension(:) :: sndbuf, rcvbuf + character(len=20) :: name, ch_err + + info = 0 + name='psi_iswapdata' + call psb_erractionsave(err_act) + + icontxt=desc_a%matrix_data(psb_ctxt_) + call blacs_gridinfo(icontxt,nprow,npcol,myrow,mycol) + if (nprow == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + call blacs_get(icontxt,10,icomm) + + allocate(sdsz(0:nprow-1), rvsz(0:nprow-1), bsdidx(0:nprow-1),& + & brvidx(0:nprow-1), rvhd(0:nprow-1), prcid(0:nprow-1),& + & ptp(0:nprow-1), stat=info) + if(info.ne.0) then + call psb_errpush(4000,name) + goto 9999 + end if + + swap_mpi = iand(flag,psb_swap_mpi_).ne.0 + swap_sync = iand(flag,psb_swap_sync_).ne.0 + swap_send = iand(flag,psb_swap_send_).ne.0 + swap_recv = iand(flag,psb_swap_recv_).ne.0 + h_idx => desc_a%halo_index + idxs = 0 + idxr = 0 + totxch = 0 + point_to_proc = 1 + rvhd(:) = mpi_request_null + + ! prepare info for communications + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm.ne.-1) + if(proc_to_comm .ne. myrow) totxch = totxch+1 + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + prcid(proc_to_comm) = blacs_pnum(icontxt,proc_to_comm,mycol) + ptp(proc_to_comm) = point_to_proc + + brvidx(proc_to_comm) = idxr + rvsz(proc_to_comm) = n*nerv + idxr = idxr+rvsz(proc_to_comm) + + bsdidx(proc_to_comm) = idxs + sdsz(proc_to_comm) = n*nesd + idxs = idxs+sdsz(proc_to_comm) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + if((idxr+idxs).lt.size(work)) then + sndbuf => work(1:idxs) + rcvbuf => work(idxs+1:idxs+idxr) + else + allocate(sndbuf(idxs),rcvbuf(idxr), stat=info) + if(info.ne.0) then + call psb_errpush(4000,name) + goto 9999 + end if + end if + + ! Case SWAP_MPI + if(swap_mpi) then + + ! gather elements into sendbuffer for swapping + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_gth(nesd,n,h_idx(idx_pt:idx_pt+nesd-1),& + & y,sndbuf(snd_pt:snd_pt+nesd*n-1)) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + ! swap elements using mpi_alltoallv + call mpi_alltoallv(sndbuf,sdsz,bsdidx,& + & mpi_integer,rcvbuf,rvsz,& + & brvidx,mpi_integer,icomm,iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + ! scatter elements from receivebuffer after swapping + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_sct(nerv,n,h_idx(idx_pt:idx_pt+nerv-1),& + & rcvbuf(rcv_pt:rcv_pt+n*nerv-1),beta,y) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + else if (swap_sync) then + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + if (proc_to_comm .lt. myrow) then + ! First I send + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_gth(nesd,n,h_idx(idx_pt:idx_pt+nesd-1),& + & y,sndbuf(snd_pt:snd_pt+nesd*n-1)) + call igesd2d(icontxt,nesd,n,sndbuf(snd_pt),nesd,proc_to_comm,0) + ! Then I receive + rcv_pt = brvidx(proc_to_comm) + call igerv2d(icontxt,nerv,n,rcvbuf(rcv_pt),nerv,proc_to_comm,0) + else if (proc_to_comm .gt. myrow) then + ! First I receive + rcv_pt = brvidx(proc_to_comm) + call igerv2d(icontxt,nerv,n,rcvbuf(rcv_pt),nerv,proc_to_comm,0) + ! Then I send + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_gth(nesd,n,h_idx(idx_pt:idx_pt+nesd-1),& + & y,sndbuf(snd_pt:snd_pt+nesd*n-1)) + call igesd2d(icontxt,nesd,n,sndbuf(snd_pt),nesd,proc_to_comm,0) + else if (proc_to_comm .eq. myrow) then + ! I send to myself + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_gth(nesd,n,h_idx(idx_pt:idx_pt+nesd-1),& + & y,sndbuf(snd_pt:snd_pt+nesd*n-1)) + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm.ne.myrow) then + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_sct(nerv,n,h_idx(idx_pt:idx_pt+nerv-1),& + & rcvbuf(rcv_pt:rcv_pt+n*nerv-1),beta,y) + else + idx_pt = point_to_proc+psb_elem_recv_ + snd_pt = bsdidx(proc_to_comm) + call psi_sct(nerv,n,h_idx(idx_pt:idx_pt+nerv-1),& + & sndbuf(snd_pt:snd_pt+n*nesd-1),beta,y) + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + else if (swap_send .and. swap_recv) then + + ! First I post all the non blocking receives + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm.ne.myrow) then + p2ptag = krecvid(icontxt,proc_to_comm,myrow) + rcv_pt = brvidx(proc_to_comm) + call mpi_irecv(rcvbuf(rcv_pt),rvsz(proc_to_comm),& + & mpi_integer,prcid(proc_to_comm),& + & p2ptag, icomm,rvhd(proc_to_comm),iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + ! Then I post all the blocking sends + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_gth(nesd,n,h_idx(idx_pt:idx_pt+nesd-1),& + & y,sndbuf(snd_pt:snd_pt+nesd*n-1)) + + if(proc_to_comm .ne. myrow) then + p2ptag=ksendid(icontxt,proc_to_comm,myrow) + call mpi_send(sndbuf(snd_pt),sdsz(proc_to_comm),& + & mpi_integer,prcid(proc_to_comm),& + & p2ptag,icomm,iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + end if + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + do i=1, totxch + call mpi_waitany(nprow,rvhd,ixrec,p2pstat,iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (ixrec .ne. mpi_undefined) then + ixrec=ixrec-1 ! mpi_waitany returns an 1 to nprow index + point_to_proc = ptp(ixrec) + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_sct(nerv,n,h_idx(idx_pt:idx_pt+nerv-1),& + & rcvbuf(rcv_pt:rcv_pt+n*nerv-1),beta,y) + else + int_err(1) = ixrec + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + end do + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm .eq. myrow) then + idx_pt = point_to_proc+psb_elem_recv_ + snd_pt = bsdidx(proc_to_comm) + call psi_sct(nerv,n,h_idx(idx_pt:idx_pt+nerv-1),& + & sndbuf(snd_pt:snd_pt+n*nesd-1),beta,y) + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + + + else if (swap_send) then + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_gth(nesd,n,h_idx(idx_pt:idx_pt+nesd-1),& + & y,sndbuf(snd_pt:snd_pt+nesd*n-1)) + call igesd2d(icontxt,nesd,n,sndbuf(snd_pt),nesd,proc_to_comm,0) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + else if (swap_recv) then + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm.ne.myrow) then + rcv_pt = brvidx(proc_to_comm) + call igerv2d(icontxt,nerv,n,rcvbuf(rcv_pt),nerv,proc_to_comm,0) + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_sct(nerv,n,h_idx(idx_pt:idx_pt+nerv-1),& + & rcvbuf(rcv_pt:rcv_pt+n*nerv-1),beta,y) + else + idx_pt = point_to_proc+psb_elem_recv_ + snd_pt = bsdidx(proc_to_comm) + call psi_sct(nerv,n,h_idx(idx_pt:idx_pt+nerv-1),& + & sndbuf(snd_pt:snd_pt+n*nesd-1),beta,y) + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return +end subroutine psi_iswapdatam + + + + + + + +subroutine psi_iswapdatav(flag,beta,y,desc_a,work,info) + + use psb_error_mod + use psb_descriptor_type + implicit none + include 'mpif.h' + + integer, intent(in) :: flag + integer, intent(out) :: info + integer :: y(:), beta + integer, target :: work(:) + type(psb_desc_type) :: desc_a + + ! locals + integer :: icontxt, nprow, npcol, myrow,& + & mycol, point_to_proc, nesd, nerv,& + & proc_to_comm, p2ptag, icomm, p2pstat,& + & idxs, idxr, iret, errlen, ifcomm, rank,& + & err_act, totxch, ixrec, i, lw, idx_pt,& + & snd_pt, rcv_pt, n + + integer, pointer, dimension(:) :: bsdidx, brvidx,& + & sdsz, rvsz, prcid, ptp, rvhd, h_idx + integer :: blacs_pnum, krecvid, ksendid + integer :: int_err(5) + logical :: swap_mpi, swap_sync, swap_send, swap_recv + integer, pointer, dimension(:) :: sndbuf, rcvbuf + character(len=20) :: name, ch_err + + info = 0 + name='psi_iswapdatav' + call psb_erractionsave(err_act) + + icontxt=desc_a%matrix_data(psb_ctxt_) + call blacs_gridinfo(icontxt,nprow,npcol,myrow,mycol) + if (nprow == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + call blacs_get(icontxt,10,icomm) + + allocate(sdsz(0:nprow-1), rvsz(0:nprow-1), bsdidx(0:nprow-1),& + & brvidx(0:nprow-1), rvhd(0:nprow-1), prcid(0:nprow-1),& + & ptp(0:nprow-1), stat=info) + if(info.ne.0) then + call psb_errpush(4000,name) + goto 9999 + end if + + swap_mpi = iand(flag,psb_swap_mpi_).ne.0 + swap_sync = iand(flag,psb_swap_sync_).ne.0 + swap_send = iand(flag,psb_swap_send_).ne.0 + swap_recv = iand(flag,psb_swap_recv_).ne.0 + h_idx => desc_a%halo_index + idxs = 0 + idxr = 0 + totxch = 0 + point_to_proc = 1 + rvhd(:) = mpi_request_null + n=1 + + ! prepare info for communications + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm.ne.-1) + if(proc_to_comm .ne. myrow) totxch = totxch+1 + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + prcid(proc_to_comm) = blacs_pnum(icontxt,proc_to_comm,mycol) + ptp(proc_to_comm) = point_to_proc + + brvidx(proc_to_comm) = idxr + rvsz(proc_to_comm) = n*nerv + idxr = idxr+rvsz(proc_to_comm) + + bsdidx(proc_to_comm) = idxs + sdsz(proc_to_comm) = n*nesd + idxs = idxs+sdsz(proc_to_comm) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + if((idxr+idxs).lt.size(work)) then + sndbuf => work(1:idxs) + rcvbuf => work(idxs+1:idxs+idxr) + else + allocate(sndbuf(idxs),rcvbuf(idxr), stat=info) + if(info.ne.0) then + call psb_errpush(4000,name) + goto 9999 + end if + end if + + ! Case SWAP_MPI + if(swap_mpi) then + + ! gather elements into sendbuffer for swapping + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_gth(nesd,h_idx(idx_pt:idx_pt+nesd-1),& + & y,sndbuf(snd_pt:snd_pt+nesd-1)) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + ! swap elements using mpi_alltoallv + call mpi_alltoallv(sndbuf,sdsz,bsdidx,& + & mpi_integer,rcvbuf,rvsz,& + & brvidx,mpi_integer,icomm,iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + ! scatter elements from receivebuffer after swapping + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_sct(nerv,h_idx(idx_pt:idx_pt+nerv-1),& + & rcvbuf(rcv_pt:rcv_pt+nerv-1),beta,y) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + else if (swap_sync) then + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + if (proc_to_comm .lt. myrow) then + ! First I send + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_gth(nesd,h_idx(idx_pt:idx_pt+nesd-1),& + & y,sndbuf(snd_pt:snd_pt+nesd-1)) + call igesd2d(icontxt,nesd,1,sndbuf(snd_pt),nesd,proc_to_comm,0) + ! Then I receive + rcv_pt = brvidx(proc_to_comm) + call igerv2d(icontxt,nerv,1,rcvbuf(rcv_pt),nerv,proc_to_comm,0) + else if (proc_to_comm .gt. myrow) then + ! First I receive + rcv_pt = brvidx(proc_to_comm) + call igerv2d(icontxt,nerv,1,rcvbuf(rcv_pt),nerv,proc_to_comm,0) + ! Then I send + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_gth(nesd,h_idx(idx_pt:idx_pt+nesd-1),& + & y,sndbuf(snd_pt:snd_pt+nesd-1)) + call igesd2d(icontxt,nesd,1,sndbuf(snd_pt),nesd,proc_to_comm,0) + else if (proc_to_comm .eq. myrow) then + ! I send to myself + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_gth(nesd,h_idx(idx_pt:idx_pt+nesd-1),& + & y,sndbuf(snd_pt:snd_pt+nesd-1)) + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm.ne.myrow) then + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_sct(nerv,h_idx(idx_pt:idx_pt+nerv-1),& + & rcvbuf(rcv_pt:rcv_pt+nerv-1),beta,y) + else + idx_pt = point_to_proc+psb_elem_recv_ + snd_pt = bsdidx(proc_to_comm) + call psi_sct(nerv,h_idx(idx_pt:idx_pt+nerv-1),& + & sndbuf(snd_pt:snd_pt+nesd-1),beta,y) + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + else if (swap_send .and. swap_recv) then + + ! First I post all the non blocking receives + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm.ne.myrow) then + p2ptag = krecvid(icontxt,proc_to_comm,myrow) + rcv_pt = brvidx(proc_to_comm) + call mpi_irecv(rcvbuf(rcv_pt),rvsz(proc_to_comm),& + & mpi_integer,prcid(proc_to_comm),& + & p2ptag, icomm,rvhd(proc_to_comm),iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + ! Then I post all the blocking sends + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_gth(nesd,h_idx(idx_pt:idx_pt+nesd-1),& + & y,sndbuf(snd_pt:snd_pt+nesd-1)) + + if(proc_to_comm .ne. myrow) then + p2ptag=ksendid(icontxt,proc_to_comm,myrow) + call mpi_send(sndbuf(snd_pt),sdsz(proc_to_comm),& + & mpi_integer,prcid(proc_to_comm),& + & p2ptag,icomm,iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + end if + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + do i=1, totxch + call mpi_waitany(nprow,rvhd,ixrec,p2pstat,iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (ixrec .ne. mpi_undefined) then + ixrec=ixrec-1 ! mpi_waitany returns an 1 to nprow index + point_to_proc = ptp(ixrec) + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+psb_elem_recv_ + snd_pt = bsdidx(proc_to_comm) + call psi_sct(nerv,h_idx(idx_pt:idx_pt+nerv-1),& + & sndbuf(snd_pt:snd_pt+nesd-1),beta,y) + else + int_err(1) = ixrec + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + end do + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm .eq. myrow) then + idx_pt = point_to_proc+psb_elem_recv_ + snd_pt = bsdidx(proc_to_comm) + call psi_sct(nerv,h_idx(idx_pt:idx_pt+nerv-1),& + & sndbuf(snd_pt:snd_pt+nesd-1),beta,y) + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + + + else if (swap_send) then + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_gth(nesd,h_idx(idx_pt:idx_pt+nesd-1),& + & y,sndbuf(snd_pt:snd_pt+nesd-1)) + call igesd2d(icontxt,nesd,1,sndbuf(snd_pt),nesd,proc_to_comm,0) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + else if (swap_recv) then + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm.ne.myrow) then + rcv_pt = brvidx(proc_to_comm) + call igerv2d(icontxt,nerv,1,rcvbuf(rcv_pt),nerv,proc_to_comm,0) + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_sct(nerv,h_idx(idx_pt:idx_pt+nerv-1),& + & rcvbuf(rcv_pt:rcv_pt+nerv-1),beta,y) + else + idx_pt = point_to_proc+psb_elem_recv_ + snd_pt = bsdidx(proc_to_comm) + call psi_sct(nerv,h_idx(idx_pt:idx_pt+nerv-1),& + & sndbuf(snd_pt:snd_pt+nesd-1),beta,y) + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return +end subroutine psi_iswapdatav diff --git a/src/internals/psi_iswaptran.f90 b/src/internals/psi_iswaptran.f90 new file mode 100644 index 00000000..6d733769 --- /dev/null +++ b/src/internals/psi_iswaptran.f90 @@ -0,0 +1,735 @@ +subroutine psi_iswaptranm(flag,n,beta,y,desc_a,work,info) + + use psb_error_mod + use psb_descriptor_type + implicit none + include 'mpif.h' + + integer, intent(in) :: flag, n + integer, intent(out) :: info + integer :: y(:,:), beta + integer, target :: work(:) + type(psb_desc_type) :: desc_a + + ! locals + integer :: icontxt, nprow, npcol, myrow,& + & mycol, point_to_proc, nesd, nerv,& + & proc_to_comm, p2ptag, icomm, p2pstat,& + & idxs, idxr, iret, errlen, ifcomm, rank,& + & err_act, totxch, ixrec, i, lw, idx_pt,& + & snd_pt, rcv_pt + + integer, pointer, dimension(:) :: bsdidx, brvidx,& + & sdsz, rvsz, prcid, ptp, rvhd, h_idx + integer :: int_err(5) + integer :: blacs_pnum, krecvid, ksendid + logical :: swap_mpi, swap_sync, swap_send, swap_recv + integer, pointer, dimension(:) :: sndbuf, rcvbuf + character(len=20) :: name, ch_err + + info = 0 + name='psi_dswaptranm' + call psb_erractionsave(err_act) + + icontxt=desc_a%matrix_data(psb_ctxt_) + call blacs_gridinfo(icontxt,nprow,npcol,myrow,mycol) + if (nprow == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + call blacs_get(icontxt,10,icomm) + + allocate(sdsz(0:nprow-1), rvsz(0:nprow-1), bsdidx(0:nprow-1),& + & brvidx(0:nprow-1), rvhd(0:nprow-1), prcid(0:nprow-1),& + & ptp(0:nprow-1), stat=info) + if(info.ne.0) then + call psb_errpush(4000,name) + goto 9999 + end if + + swap_mpi = iand(flag,psb_swap_mpi_).ne.0 + swap_sync = iand(flag,psb_swap_sync_).ne.0 + swap_send = iand(flag,psb_swap_send_).ne.0 + swap_recv = iand(flag,psb_swap_recv_).ne.0 + h_idx => desc_a%halo_index + idxs = 0 + idxr = 0 + totxch = 0 + point_to_proc = 1 + rvhd(:) = mpi_request_null + + ! prepare info for communications + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm.ne.-1) + if(proc_to_comm .ne. myrow) totxch = totxch+1 + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + prcid(proc_to_comm) = blacs_pnum(icontxt,proc_to_comm,mycol) + ptp(proc_to_comm) = point_to_proc + + brvidx(proc_to_comm) = idxr + rvsz(proc_to_comm) = n*nerv + idxr = idxr+rvsz(proc_to_comm) + + bsdidx(proc_to_comm) = idxs + sdsz(proc_to_comm) = n*nesd + idxs = idxs+sdsz(proc_to_comm) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + if((idxr+idxs).lt.size(work)) then + sndbuf => work(1:idxs) + rcvbuf => work(idxs+1:idxs+idxr) + else + allocate(sndbuf(idxs),rcvbuf(idxr), stat=info) + if(info.ne.0) then + call psb_errpush(4000,name) + goto 9999 + end if + end if + + ! Case SWAP_MPI + if(swap_mpi) then + + ! gather elements into sendbuffer for swapping + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_gth(nerv,n,h_idx(idx_pt:idx_pt+nerv-1),& + & y,rcvbuf(rcv_pt:rcv_pt+nerv*n-1)) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + ! swap elements using mpi_alltoallv + call mpi_alltoallv(rcvbuf,rvsz,brvidx,& + & mpi_integer,sndbuf,sdsz,& + & bsdidx,mpi_integer,icomm,iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + ! scatter elements from receivebuffer after swapping + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_sct(nesd,n,h_idx(idx_pt:idx_pt+nesd-1),& + & sndbuf(snd_pt:snd_pt+n*nesd-1),beta,y) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + else if (swap_sync) then + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + if (proc_to_comm .lt. myrow) then + ! First I send + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_gth(nerv,n,h_idx(idx_pt:idx_pt+nerv-1),& + & y,rcvbuf(rcv_pt:rcv_pt+nerv*n-1)) + call igesd2d(icontxt,nerv,n,rcvbuf(rcv_pt),nerv,proc_to_comm,0) + ! Then I receive + snd_pt = brvidx(proc_to_comm) + call igerv2d(icontxt,nesd,n,sndbuf(snd_pt),nesd,proc_to_comm,0) + else if (proc_to_comm .gt. myrow) then + ! First I receive + snd_pt = bsdidx(proc_to_comm) + call igerv2d(icontxt,nesd,n,sndbuf(snd_pt),nesd,proc_to_comm,0) + ! Then I send + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_gth(nerv,n,h_idx(idx_pt:idx_pt+nerv-1),& + & y,rcvbuf(rcv_pt:rcv_pt+nerv*n-1)) + call igesd2d(icontxt,nerv,n,rcvbuf(rcv_pt),nerv,proc_to_comm,0) + else if (proc_to_comm .eq. myrow) then + ! I send to myself + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = bsdidx(proc_to_comm) + call psi_gth(nerv,n,h_idx(idx_pt:idx_pt+nerv-1),& + & y,rcvbuf(rcv_pt:rcv_pt+nerv*n-1)) + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm.ne.myrow) then + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_sct(nesd,n,h_idx(idx_pt:idx_pt+nesd-1),& + & sndbuf(snd_pt:snd_pt+n*nesd-1),beta,y) + else + idx_pt = point_to_proc+nerv+psb_elem_send_ + rcv_pt = brvidx(proc_to_comm) + call psi_sct(nesd,n,h_idx(idx_pt:idx_pt+nesd-1),& + & rcvbuf(rcv_pt:rcv_pt+n*nerv-1),beta,y) + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + else if (swap_send .and. swap_recv) then + + ! First I post all the non blocking receives + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm.ne.myrow) then + p2ptag = krecvid(icontxt,proc_to_comm,myrow) + snd_pt = brvidx(proc_to_comm) + call mpi_irecv(sndbuf(rcv_pt),sdsz(proc_to_comm),& + & mpi_integer,prcid(proc_to_comm),& + & p2ptag, icomm,rvhd(proc_to_comm),iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + ! Then I post all the blocking sends + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_gth(nerv,n,h_idx(idx_pt:idx_pt+nerv-1),& + & y,rcvbuf(rcv_pt:rcv_pt+nerv*n-1)) + + if(proc_to_comm .ne. myrow) then + p2ptag=ksendid(icontxt,proc_to_comm,myrow) + call mpi_send(rcvbuf(rcv_pt),rvsz(proc_to_comm),& + & mpi_integer,prcid(proc_to_comm),& + & p2ptag,icomm,iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + end if + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + do i=1, totxch + call mpi_waitany(nprow,rvhd,ixrec,p2pstat,iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (ixrec .ne. mpi_undefined) then + ixrec=ixrec-1 ! mpi_waitany returns an 1 to nprow index + point_to_proc = ptp(ixrec) + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_sct(nesd,n,h_idx(idx_pt:idx_pt+nesd-1),& + & sndbuf(snd_pt:snd_pt+n*nesd-1),beta,y) + else + int_err(1) = ixrec + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + end do + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm .eq. myrow) then + idx_pt = point_to_proc+nerv+psb_elem_send_ + rcv_pt = brvidx(proc_to_comm) + call psi_sct(nesd,n,h_idx(idx_pt:idx_pt+nesd-1),& + & rcvbuf(rcv_pt:rcv_pt+n*nerv-1),beta,y) + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + else if (swap_send) then + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_gth(nerv,n,h_idx(idx_pt:idx_pt+nerv-1),& + & y,rcvbuf(rcv_pt:rcv_pt+nerv*n-1)) + call igesd2d(icontxt,nerv,n,rcvbuf(rcv_pt),nerv,proc_to_comm,0) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + else if (swap_recv) then + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm.ne.myrow) then + snd_pt = bsdidx(proc_to_comm) + call igerv2d(icontxt,nesd,n,sndbuf(snd_pt),nesd,proc_to_comm,0) + idx_pt = point_to_proc+nerv+psb_elem_send_ + call psi_sct(nesd,n,h_idx(idx_pt:idx_pt+nesd-1),& + & sndbuf(snd_pt:snd_pt+n*nesd-1),beta,y) + else + idx_pt = point_to_proc+nerv+psb_elem_send_ + rcv_pt = brvidx(proc_to_comm) + call psi_sct(nesd,n,h_idx(idx_pt:idx_pt+nesd-1),& + & rcvbuf(rcv_pt:rcv_pt+n*nerv-1),beta,y) + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return +end subroutine psi_iswaptranm + + + + + + + +subroutine psi_iswaptranv(flag,beta,y,desc_a,work,info) + + use psb_error_mod + use psb_descriptor_type + implicit none + include 'mpif.h' + + integer, intent(in) :: flag + integer, intent(out) :: info + integer :: y(:), beta + integer, target :: work(:) + type(psb_desc_type) :: desc_a + + ! locals + integer :: icontxt, nprow, npcol, myrow,& + & mycol, point_to_proc, nesd, nerv,& + & proc_to_comm, p2ptag, icomm, p2pstat,& + & idxs, idxr, iret, errlen, ifcomm, rank,& + & err_act, totxch, ixrec, i, lw, idx_pt,& + & snd_pt, rcv_pt, n + + integer, pointer, dimension(:) :: bsdidx, brvidx,& + & sdsz, rvsz, prcid, ptp, rvhd, h_idx + integer :: int_err(5) + integer :: blacs_pnum, krecvid, ksendid + logical :: swap_mpi, swap_sync, swap_send, swap_recv + integer, pointer, dimension(:) :: sndbuf, rcvbuf + character(len=20) :: name, ch_err + + info = 0 + name='psi_dswaptranv' + call psb_erractionsave(err_act) + + icontxt=desc_a%matrix_data(psb_ctxt_) + call blacs_gridinfo(icontxt,nprow,npcol,myrow,mycol) + if (nprow == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + call blacs_get(icontxt,10,icomm) + + allocate(sdsz(0:nprow-1), rvsz(0:nprow-1), bsdidx(0:nprow-1),& + & brvidx(0:nprow-1), rvhd(0:nprow-1), prcid(0:nprow-1),& + & ptp(0:nprow-1), stat=info) + if(info.ne.0) then + call psb_errpush(4000,name) + goto 9999 + end if + + swap_mpi = iand(flag,psb_swap_mpi_).ne.0 + swap_sync = iand(flag,psb_swap_sync_).ne.0 + swap_send = iand(flag,psb_swap_send_).ne.0 + swap_recv = iand(flag,psb_swap_recv_).ne.0 + h_idx => desc_a%halo_index + idxs = 0 + idxr = 0 + totxch = 0 + point_to_proc = 1 + rvhd(:) = mpi_request_null + n=1 + + ! prepare info for communications + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm.ne.-1) + if(proc_to_comm .ne. myrow) totxch = totxch+1 + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + prcid(proc_to_comm) = blacs_pnum(icontxt,proc_to_comm,mycol) + ptp(proc_to_comm) = point_to_proc + + brvidx(proc_to_comm) = idxr + rvsz(proc_to_comm) = nerv + idxr = idxr+rvsz(proc_to_comm) + + bsdidx(proc_to_comm) = idxs + sdsz(proc_to_comm) = nesd + idxs = idxs+sdsz(proc_to_comm) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + if((idxr+idxs).lt.size(work)) then + sndbuf => work(1:idxs) + rcvbuf => work(idxs+1:idxs+idxr) + else + allocate(sndbuf(idxs),rcvbuf(idxr), stat=info) + if(info.ne.0) then + call psb_errpush(4000,name) + goto 9999 + end if + end if + + ! Case SWAP_MPI + if(swap_mpi) then + + ! gather elements into sendbuffer for swapping + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_gth(nerv,h_idx(idx_pt:idx_pt+nerv-1),& + & y,rcvbuf(rcv_pt:rcv_pt+nerv-1)) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + ! swap elements using mpi_alltoallv + call mpi_alltoallv(rcvbuf,rvsz,brvidx,& + & mpi_integer,sndbuf,sdsz,& + & bsdidx,mpi_integer,icomm,iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + ! scatter elements from receivebuffer after swapping + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_sct(nesd,h_idx(idx_pt:idx_pt+nesd-1),& + & sndbuf(snd_pt:snd_pt+nesd-1),beta,y) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + else if (swap_sync) then + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + if (proc_to_comm .lt. myrow) then + ! First I send + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_gth(nerv,h_idx(idx_pt:idx_pt+nerv-1),& + & y,rcvbuf(rcv_pt:rcv_pt+nerv-1)) + call igesd2d(icontxt,nerv,1,rcvbuf(rcv_pt),nerv,proc_to_comm,0) + ! Then I receive + snd_pt = brvidx(proc_to_comm) + call igerv2d(icontxt,nesd,1,sndbuf(snd_pt),nesd,proc_to_comm,0) + else if (proc_to_comm .gt. myrow) then + ! First I receive + snd_pt = bsdidx(proc_to_comm) + call igerv2d(icontxt,nesd,1,sndbuf(snd_pt),nesd,proc_to_comm,0) + ! Then I send + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_gth(nerv,h_idx(idx_pt:idx_pt+nerv-1),& + & y,rcvbuf(rcv_pt:rcv_pt+nerv-1)) + call igesd2d(icontxt,nerv,1,rcvbuf(rcv_pt),nerv,proc_to_comm,0) + else if (proc_to_comm .eq. myrow) then + ! I send to myself + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = bsdidx(proc_to_comm) + call psi_gth(nerv,h_idx(idx_pt:idx_pt+nerv-1),& + & y,rcvbuf(rcv_pt:rcv_pt+nerv-1)) + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm.ne.myrow) then + idx_pt = point_to_proc+nerv+psb_elem_send_ + snd_pt = bsdidx(proc_to_comm) + call psi_sct(nesd,h_idx(idx_pt:idx_pt+nesd-1),& + & sndbuf(snd_pt:snd_pt+nesd-1),beta,y) + else + idx_pt = point_to_proc+nerv+psb_elem_send_ + rcv_pt = brvidx(proc_to_comm) + call psi_sct(nesd,h_idx(idx_pt:idx_pt+nesd-1),& + & rcvbuf(rcv_pt:rcv_pt+nerv-1),beta,y) + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + else if (swap_send .and. swap_recv) then + + ! First I post all the non blocking receives + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm.ne.myrow) then + p2ptag = krecvid(icontxt,proc_to_comm,myrow) + snd_pt = brvidx(proc_to_comm) + call mpi_irecv(sndbuf(snd_pt),sdsz(proc_to_comm),& + & mpi_integer,prcid(proc_to_comm),& + & p2ptag, icomm,rvhd(proc_to_comm),iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + ! Then I post all the blocking sends + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_gth(nerv,h_idx(idx_pt:idx_pt+nerv-1),& + & y,rcvbuf(rcv_pt:rcv_pt+nerv-1)) + + if(proc_to_comm .ne. myrow) then + p2ptag=ksendid(icontxt,proc_to_comm,myrow) + call mpi_send(rcvbuf(rcv_pt),rvsz(proc_to_comm),& + & mpi_integer,prcid(proc_to_comm),& + & p2ptag,icomm,iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + end if + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + do i=1, totxch + call mpi_waitany(nprow,rvhd,ixrec,p2pstat,iret) + if(iret.ne.mpi_success) then + int_err(1) = iret + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (ixrec .ne. mpi_undefined) then + ixrec=ixrec-1 ! mpi_waitany returns an 1 to nprow index + point_to_proc = ptp(ixrec) + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+nerv+psb_elem_send_ + rcv_pt = bsdidx(proc_to_comm) + call psi_sct(nesd,h_idx(idx_pt:idx_pt+nesd-1),& + & rcvbuf(rcv_pt:rcv_pt+nerv-1),beta,y) + else + int_err(1) = ixrec + info=400 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + end do + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm .eq. myrow) then + idx_pt = point_to_proc+nerv+psb_elem_send_ + rcv_pt = brvidx(proc_to_comm) + call psi_sct(nesd,h_idx(idx_pt:idx_pt+nesd-1),& + & rcvbuf(rcv_pt:rcv_pt+nerv-1),beta,y) + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + else if (swap_send) then + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_gth(nerv,h_idx(idx_pt:idx_pt+nerv-1),& + & y,rcvbuf(rcv_pt:rcv_pt+nerv-1)) + call igesd2d(icontxt,nerv,1,rcvbuf(rcv_pt),nerv,proc_to_comm,0) + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + else if (swap_recv) then + + point_to_proc = 1 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + do while (proc_to_comm .ne. -1) + nerv = h_idx(point_to_proc+psb_n_elem_recv_) + nesd = h_idx(point_to_proc+nerv+psb_n_elem_send_) + + if(proc_to_comm.ne.myrow) then + snd_pt = bsdidx(proc_to_comm) + call igerv2d(icontxt,nesd,1,sndbuf(snd_pt),nesd,proc_to_comm,0) + idx_pt = point_to_proc+psb_elem_recv_ + rcv_pt = brvidx(proc_to_comm) + call psi_sct(nesd,h_idx(idx_pt:idx_pt+nesd-1),& + & sndbuf(snd_pt:snd_pt+nesd-1),beta,y) + else + idx_pt = point_to_proc+nerv+psb_elem_send_ + rcv_pt = brvidx(proc_to_comm) + call psi_sct(nesd,h_idx(idx_pt:idx_pt+nesd-1),& + & rcvbuf(rcv_pt:rcv_pt+nerv-1),beta,y) + end if + + point_to_proc = point_to_proc+nerv+nesd+3 + proc_to_comm = h_idx(point_to_proc+psb_proc_id_) + end do + + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return +end subroutine psi_iswaptranv diff --git a/src/internals/psi_list_search.f b/src/internals/psi_list_search.f new file mode 100644 index 00000000..8e2e41f6 --- /dev/null +++ b/src/internals/psi_list_search.f @@ -0,0 +1,25 @@ + INTEGER FUNCTION PSI_LIST_SEARCH(LIST,LENGHT_LIST,ELEM) +C !RETURNS POSITION OF ELEM IN A ARRAY LIST +C !OF LENGHT LENGHT_LIST, IF THIS ELEMENT NOT EXISTS +C !RETURNS -1 + INTEGER LIST(*) + INTEGER LENGHT_LIST + INTEGER ELEM + + INTEGER I + + I=1 + DO WHILE ((I.LE.LENGHT_LIST).AND.(LIST(I).NE.ELEM)) + I=I+1 + ENDDO + IF (I.LE.LENGHT_LIST) THEN + IF (LIST(I).EQ.ELEM) THEN + PSI_LIST_SEARCH=I + ELSE + PSI_LIST_SEARCH=-1 + ENDIF + ELSE + PSI_LIST_SEARCH=-1 + ENDIF + END + diff --git a/src/internals/psi_sort_dl.f90 b/src/internals/psi_sort_dl.f90 new file mode 100644 index 00000000..d13d2766 --- /dev/null +++ b/src/internals/psi_sort_dl.f90 @@ -0,0 +1,60 @@ +subroutine psi_sort_dl(dep_list,l_dep_list,np,info) + ! + ! interface between former sort_dep_list subroutine + ! and new srtlist + ! + use psb_error_mod + implicit none + + integer :: np,dep_list(:,:), l_dep_list(:) + integer :: idg, iupd, idgp, iedges, iidx, iich,ndgmx, isz, err_act + integer :: i, info + integer, pointer :: work(:) + logical, parameter :: debug=.false. + character(len=20) :: name, ch_err + + name='psi_sort_dl' + info=0 + call psb_erractionsave(err_act) + + info = 0 + ndgmx = 0 + do i=1,np + ndgmx = ndgmx + l_dep_list(i) + if (debug) write(0,*) i,l_dep_list(i) + enddo + idg = 1 + iupd = idg+np + idgp = iupd+np + iedges = idgp + ndgmx + iidx = iedges + 2*ndgmx + iich = iidx + ndgmx + isz = iich + ndgmx + if (debug)write(0,*) 'psi_sort_dl: ndgmx ',ndgmx,isz + + allocate(work(isz)) + ! call srtlist(dep_list, dl_lda, l_dep_list, np, info) + call srtlist(dep_list,size(dep_list,1),l_dep_list,np,work(idg),& + & work(idgp),work(iupd),work(iedges),work(iidx),work(iich),info) + + if (info .ne. 0) then + call psb_errpush(4010,name,a_err='srtlist') + goto 9999 + endif + + deallocate(work) + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return +end subroutine psi_sort_dl + + + diff --git a/src/internals/srtlist.f b/src/internals/srtlist.f new file mode 100644 index 00000000..28b699a9 --- /dev/null +++ b/src/internals/srtlist.f @@ -0,0 +1,177 @@ +*********************************************************************** +* * +* The communication step among processors at each * +* matrix-vector product is a variable all-to-all * +* collective communication that we reimplement * +* in terms of point-to-point communications. * +* The data in input is a list of dependencies: * +* for each node a list of all the nodes it has to * +* communicate with. The lists are guaranteed to be * +* symmetric, i.e. for each pair (I,J) there is a * +* pair (J,I). The idea is to organize the ordering * +* so that at each communication step as many * +* processors as possible are communicating at the * +* same time, i.e. a step is defined by the fact * +* that all edges (I,J) in it have no common node. * +* * +* Formulation of the problem is: * +* Given an undirected graph (forest): * +* Find the shortest series of steps to cancel all * +* graph edges, where at each step all edges belonging * +* to a matching in the graph are canceled. * +* * +* An obvious lower bound to the optimum number of steps * +* is the largest degree of any node in the graph. * +* * +* The algorithm proceeds as follows: * +* 1. Build a list of all edges, e.g. copy the * +* dependencies lists keeping only (I,J) with I). The sparse matrix containing A. +! prec - type(). The data structure containing the preconditioner. +! b - real,dimension(:). The right hand side. +! x - real,dimension(:). The vector of unknowns. +! eps - real. The error tolerance. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! itmax - integer(optional). The maximum number of iterations. +! iter - integer(optional). The number of iterations performed. +! err - real(optional). The error on return. +! itrace - integer(optional). The unit to write messages onto. +! istop - integer(optional). The stopping criterium. +! +subroutine psb_dbicg(a,prec,b,x,eps,desc_a,info,& + &itmax,iter,err, itrace,istop) + use psb_serial_mod + use psb_descriptor_type + use psb_prec_type + use psb_psblas_mod + use psb_tools_mod + use psb_const_mod + use psb_prec_mod + use psb_error_mod + implicit none + +!!$ parameters + type(psb_dspmat_type), intent(in) :: a + type(psb_dprec_type), intent(in) :: prec + type(psb_desc_type), intent(in) :: desc_a + real(kind(1.d0)), intent(in) :: b(:) + real(kind(1.d0)), intent(inout) :: x(:) + real(kind(1.d0)), intent(in) :: eps + integer, intent(out) :: info + integer, optional, intent(in) :: itmax, itrace, istop + integer, optional, intent(out) :: iter + real(kind(1.d0)), optional, intent(out) :: err +!!$ local data + real(kind(1.d0)), pointer :: aux(:),wwrk(:,:) + real(kind(1.d0)), pointer :: ww(:), q(:),& + & r(:), p(:), zt(:), pt(:), z(:), rt(:),qt(:) + integer, pointer :: iperm(:), ipnull(:), ipsave(:), int_err(:) + real(kind(1.d0)) ::rerr + integer ::litmax, liter, naux, m, mglob, it, itrac,& + & nprows,npcols,me,mecol, n_row, n_col, listop, err_act + character ::diagl, diagu + logical, parameter :: debug = .false. + logical, parameter :: exchange=.true., noexchange=.false. + integer, parameter :: ione=1 + integer, parameter :: irmax = 8 + integer :: itx, i, isvch, ich, icontxt + logical :: do_renum_left + real(kind(1.d0)), parameter :: one=1.d0, zero=0.d0, epstol=1.d-35 + real(kind(1.d0)) :: alpha, beta, rho, rho_old, rni, xni, bni, ani,& + & sigma, omega, tau,bn2 + character(len=20) :: name,ch_err + + info = 0 + name = 'psb_dbicg' + call psb_erractionsave(err_act) + + if (debug) write(*,*) 'entering psb_dbicg' + icontxt = desc_a%matrix_data(psb_ctxt_) + call blacs_gridinfo(icontxt,nprows,npcols,me,mecol) + if (debug) write(*,*) 'psb_dbicg: from gridinfo',nprows,npcols,me + + mglob = desc_a%matrix_data(m_) + n_row = desc_a%matrix_data(psb_n_row_) + n_col = desc_a%matrix_data(psb_n_col_) + + ! ensure global coherence for convergence checks. + call blacs_get(icontxt,16,isvch) + ich = 1 + call blacs_set(icontxt,16,ich) + + + if (present(istop)) then + listop = istop + else + listop = 1 + endif + ! + ! listop = 1: normwise backward error, infinity norm + ! listop = 2: ||r||/||b|| norm 2 + ! +!!$ +!!$ if ((prec%prec < min_prec_).or.(prec%prec > max_prec_) ) then +!!$ write(0,*) 'f90_bicg: invalid iprec',prec%prec +!!$ if (present(ierr)) ierr=-1 +!!$ return +!!$ endif + + if ((listop < 1 ).or.(listop > 2 ) ) then + write(0,*) 'psb_bicg: invalid istop',listop + info=5001 + int_err=listop + err=info + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + naux=4*n_col + + allocate(aux(naux),stat=info) + call psb_dalloc(mglob,9,wwrk,desc_a,info) + call psb_asb(wwrk,desc_a,info) + if(info.ne.0) then + info=4011 + ch_err='psb_asb' + err=info + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + q => wwrk(:,1) + qt => wwrk(:,2) + r => wwrk(:,3) + rt => wwrk(:,4) + p => wwrk(:,5) + pt => wwrk(:,6) + z => wwrk(:,7) + zt => wwrk(:,8) + ww => wwrk(:,9) + + if (present(itmax)) then + litmax = itmax + else + litmax = 1000 + endif + + if (present(itrace)) then + itrac = itrace + else + itrac = -1 + end if + + diagl = 'u' + diagu = 'u' + itx = 0 + + if (listop == 1) then + ani = psb_nrmi(a,desc_a,info) + bni = psb_amax(b,desc_a,info) + else if (listop == 2) then + bn2 = psb_nrm2(b,desc_a,info) + endif + + if(info.ne.0) then + info=4011 + err=info + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + restart: do +!!$ +!!$ r0 = b-ax0 +!!$ + if (itx.ge.itmax) exit restart + it = 0 + call psb_axpby(one,b,zero,r,desc_a,info) + call psb_spmm(-one,a,x,one,r,desc_a,info,work=aux) + call psb_axpby(one,r,zero,rt,desc_a,info) + if(info.ne.0) then + info=4011 + call psb_errpush(info,name) + goto 9999 + end if + + rho = zero + if (debug) write(*,*) 'on entry to amax: b: ',size(b) + if (listop == 1) then + rni = psb_amax(r,desc_a,info) + xni = psb_amax(x,desc_a,info) + else if (listop == 2) then + rni = psb_nrm2(r,desc_a,info) + endif + if(info.ne.0) then + info=4011 + call psb_errpush(info,name) + goto 9999 + end if + + if (listop == 1) then + xni = psb_amax(x,desc_a,info) + rerr = rni/(ani*xni+bni) + if (itrac /= -1) then + if (me.eq.0) write(itrac,'(a,i4,5(2x,es10.4))') 'bicg: ',itx,rerr,rni,bni,& + &xni,ani + endif + else if (listop == 2) then + rerr = rni/bn2 + if (itrac /= -1) then + if (me.eq.0) write(itrac,'(a,i4,3(2x,es10.4))') 'bicg: ',itx,rerr,rni,bn2 + endif + endif + + if(info.ne.0) then + info=4011 + call psb_errpush(info,name) + goto 9999 + end if + + if (rerr<=eps) then + exit restart + end if + + iteration: do + it = it + 1 + itx = itx + 1 + if (debug) write(*,*) 'iteration: ',itx + + call psb_prcaply(prec,r,z,desc_a,info,work=aux) + call psb_prcaply(prec,rt,zt,desc_a,info,trans='t',work=aux) + + rho_old = rho + rho = psb_dot(rt,z,desc_a,info) + if (rho==zero) then + if (debug) write(0,*) 'bicg itxation breakdown r',rho + exit iteration + endif + + if (it==1) then + call psb_axpby(one,z,zero,p,desc_a,info) + call psb_axpby(one,zt,zero,pt,desc_a,info) + else + beta = (rho/rho_old) + call psb_axpby(one,z,beta,p,desc_a,info) + call psb_axpby(one,zt,beta,pt,desc_a,info) + end if + + call psb_spmm(one,a,p,zero,q,desc_a,info,& + & work=aux) + call psb_spmm(one,a,pt,zero,qt,desc_a,info,& + & work=aux,trans='t') + + sigma = psb_dot(pt,q,desc_a,info) + if (sigma==zero) then + if (debug) write(0,*) 'cgs iteration breakdown s1', sigma + exit iteration + endif + + alpha = rho/sigma + + + call psb_axpby(alpha,p,one,x,desc_a,info) + call psb_axpby(-alpha,q,one,r,desc_a,info) + call psb_axpby(-alpha,qt,one,rt,desc_a,info) + + + if (listop == 1) then + rni = psb_amax(r,desc_a,info) + xni = psb_amax(x,desc_a,info) + else if (listop == 2) then + rni = psb_nrm2(r,desc_a,info) + endif + + if (listop == 1) then + xni = psb_amax(x,desc_a,info) + rerr = rni/(ani*xni+bni) + if (itrac /= -1) then + if (me.eq.0) write(itrac,'(a,i4,5(2x,es10.4))') 'bicg: ',itx,rerr,rni,bni,& + &xni,ani + endif + else if (listop == 2) then + rerr = rni/bn2 + if (itrac /= -1) then + if (me.eq.0) write(itrac,'(a,i4,3(2x,es10.4))') 'bicg: ',itx,rerr,rni,bn2 + endif + endif + if (rerr<=eps) then + exit restart + end if + if (itx.ge.itmax) exit restart + end do iteration + end do restart + + if (present(err)) err=rerr + if (present(iter)) iter = itx + if (rerr>eps) then + write(0,*) 'bicg failed to converge to ',eps,& + & ' in ',itx,' iterations ' + end if + + + deallocate(aux) + call psb_free(wwrk,desc_a,info) + ! restore external global coherence behaviour + call blacs_set(icontxt,16,isvch) + + if(info/=0) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end subroutine psb_dbicg + + diff --git a/src/methd/psb_dcg.f90 b/src/methd/psb_dcg.f90 new file mode 100644 index 00000000..b06f8cb1 --- /dev/null +++ b/src/methd/psb_dcg.f90 @@ -0,0 +1,283 @@ +! File: psb_dcg.f90 +!!$ CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC +!!$ C C +!!$ C References: C +!!$ C [1] Duff, I., Marrone, M., Radicati, G., and Vittoli, C. C +!!$ C Level 3 basic linear algebra subprograms for sparse C +!!$ C matrices: a user level interface C +!!$ C ACM Trans. Math. Softw., 23(3), 379-401, 1997. C +!!$ C C +!!$ C C +!!$ C [2] S. Filippone, M. Colajanni C +!!$ C PSBLAS: A library for parallel linear algebra C +!!$ C computation on sparse matrices C +!!$ C ACM Trans. on Math. Softw., 26(4), 527-550, Dec. 2000. C +!!$ C C +!!$ C [3] M. Arioli, I. Duff, M. Ruiz C +!!$ C Stopping criteria for iterative solvers C +!!$ C SIAM J. Matrix Anal. Appl., Vol. 13, pp. 138-144, 1992 C +!!$ C C +!!$ C C +!!$ C [4] R. Barrett et al C +!!$ C Templates for the solution of linear systems C +!!$ C SIAM, 1993 +!!$ C C +!!$ C C +!!$ CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC +! File: psb_dcg.f90 +! +! Subroutine: psb_dcg +! This subroutine implements the Conjugate Gradient method. +! +! Parameters: +! a - type(). The sparse matrix containing A. +! prec - type(). The data structure containing the preconditioner. +! b - real,dimension(:). The right hand side. +! x - real,dimension(:). The vector of unknowns. +! eps - real. The error tolerance. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! itmax - integer(optional). The maximum number of iterations. +! iter - integer(optional). The number of iterations performed. +! err - real(optional). The error on return. +! itrace - integer(optional). The unit to write messages onto. +! istop - integer(optional). The stopping criterium. +! +Subroutine psb_dcg(a,prec,b,x,eps,desc_a,info,& + &itmax,iter,err, itrace, istop) + use psb_serial_mod + use psb_descriptor_type + use psb_prec_type + use psb_psblas_mod + use psb_tools_mod + use psb_const_mod + use psb_prec_mod + use psb_error_mod + implicit none + +!!$ Parameters + Type(psb_dspmat_type), Intent(in) :: a + Type(psb_dprec_type), Intent(in) :: prec + Type(psb_desc_type), Intent(in) :: desc_a + Real(Kind(1.d0)), Intent(in) :: b(:) + Real(Kind(1.d0)), Intent(inout) :: x(:) + Real(Kind(1.d0)), Intent(in) :: eps + integer, intent(out) :: info + Integer, Optional, Intent(in) :: itmax, itrace, istop + Integer, Optional, Intent(out) :: iter + Real(Kind(1.d0)), Optional, Intent(out) :: err +!!$ Local data + real(kind(1.d0)), pointer :: aux(:), q(:), p(:),& + & r(:), z(:), w(:), wwrk(:,:) + real(kind(1.d0)) ::rerr + real(kind(1.d0)) ::alpha, beta, rho, rho_old, rni, xni, bni, ani,bn2,& + & sigma + integer :: litmax, liter, listop, naux, m, mglob, it, itrac,& + & nprows,npcols,me,mecol, n_col, isvch, ich, icontxt, n_row,err_act, int_err(5) + character ::diagl, diagu + logical, parameter :: exchange=.true., noexchange=.false. + integer, parameter :: ione=1 + real(kind(1.d0)), parameter :: one=1.d0, zero=0.d0, epstol=1.d-35 + character(len=20) :: name,ch_err + + info = 0 + name = 'psb_dcg' + call psb_erractionsave(err_act) + + + icontxt = desc_a%matrix_data(psb_ctxt_) + call blacs_gridinfo(icontxt,nprows,npcols,me,mecol) + + mglob = desc_a%matrix_data(m_) + n_row = desc_a%matrix_data(psb_n_row_) + n_col = desc_a%matrix_data(psb_n_col_) + + + if (present(istop)) then + listop = istop + else + listop = 1 + endif + ! + ! LISTOP = 1: Normwise backward error, infinity norm + ! LISTOP = 2: ||r||/||b|| norm 2 + ! + +!!$ If ((prec%prec < min_prec_).Or.(prec%prec > max_prec_) ) Then +!!$ Write(0,*) 'F90_CG: Invalid IPREC',prec%prec +!!$ If (Present(ierr)) ierr=-1 +!!$ Return +!!$ Endif + + if ((listop < 1 ).or.(listop > 2 ) ) then + write(0,*) 'psb_cg: invalid istop',listop + info=5001 + int_err(1)=listop + err=info + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + naux=4*n_col + allocate(aux(naux), stat=info) + call psb_dalloc(mglob,5,wwrk,desc_a,info) + call psb_asb(wwrk,desc_a,info) + if (info.ne.0) then + info=4011 + call psb_errpush(info,name) + goto 9999 + end if + + p => wwrk(:,1) + q => wwrk(:,2) + r => wwrk(:,3) + z => wwrk(:,4) + w => wwrk(:,5) + + + if (present(itmax)) then + litmax = itmax + else + litmax = 1000 + endif + + if (present(itrace)) then + itrac = itrace + else + itrac = -1 + end if + +!!$ DIAGL = 'U' +!!$ DIAGU = 'R' + + ! Ensure global coherence for convergence checks. + call blacs_get(icontxt,16,isvch) + ich = 1 + call blacs_set(icontxt,16,ich) + +!!$ +!!$ r0 = b-Ax0 +!!$ + call psb_axpby(one,b,zero,r,desc_a,info) + call psb_spmm(-one,a,x,one,r,desc_a,info,work=aux) + if (info.ne.0) then + info=4011 + call psb_errpush(info,name) + goto 9999 + end if + + rho = zero + if (listop == 1) then + ani = psb_nrmi(a,desc_a,info) + bni = psb_amax(b,desc_a,info) + else if (listop == 2) then + bn2 = psb_nrm2(b,desc_a,info) + endif + if (info.ne.0) then + info=4011 + call psb_errpush(info,name) + goto 9999 + end if + + + iteration: do it = 1, itmax + +!!$ +!!$ solve mz = r +!!$ Note: the overlapped preconditioner (if overlap is non empty) +!!$ is non-symmetric: M^{-1} = \Lambda P^T K^{-1} P +!!$ For CG we use instead +!!$ M^{-1} = \sqrt{\Lambda} P^T K^{-1} P \sqrt{\Lambda} +!!$ Keep track of the old symmetrized stuf, might come in useful. +!!$ CALL F90_PSAXPBY(ONE,R,ZERO,Z,DECOMP_DATA) +!!$ CALL F90_PSOVRL(Z,DECOMP_DATA,& +!!$ & UPDATE_TYPE=SQUARE_ROOT_,CHOICE=NOEXCHANGE) +!!$ CALL F90_PSSPSM(ONE,L,Z,ZERO,W,DECOMP_DATA,& +!!$ & TRANS='N',UNIT=DIAGL,CHOICE=NONE_,WORK=AUX) +!!$ CALL F90_PSSPSM(ONE,U,W,ZERO,Z,DECOMP_DATA,& +!!$ & TRANS='N',UNIT=DIAGU,CHOICE=NONE_,DIAG=VDIAG,WORK=AUX) +!!$ CALL F90_PSOVRL(Z,DECOMP_DATA,& +!!$ & UPDATE_TYPE=SQUARE_ROOT_) +!!$ CALL F90_PSHALO(Z,DECOMP_DATA) + Call psb_prcaply(prec,r,z,desc_a,info,work=aux) + rho_old = rho + rho = f90_psdot(r,z,desc_a,info) + + if (it==1) then + call psb_axpby(one,z,zero,p,desc_a,info) + else + if (rho_old==zero) then + write(0,*) 'CG Iteration breakdown' + exit iteration + endif + beta = rho/rho_old + call psb_axpby(one,z,beta,p,desc_a,info) + end if + + call psb_spmm(one,a,p,zero,q,desc_a,info,work=aux) + sigma = psb_dot(p,q,desc_a,info) + if (sigma==zero) then + write(0,*) 'CG Iteration breakdown' + exit iteration + endif + + alpha = rho/sigma + call psb_axpby(alpha,p,one,x,desc_a,info) + call psb_axpby(-alpha,q,one,r,desc_a,info) + + + if (listop == 1) Then + rni = psb_amax(r,desc_a,info) + xni = psb_amax(x,desc_a,info) + rerr = rni/(ani*xni+bni) + If (itrac /= -1) Then + If (me.Eq.0) Write(itrac,'(a,i4,5(2x,es10.4))') 'cg: ',it,rerr,rni,bni,& + &xni,ani + Endif + + Else If (listop == 2) Then + + rni = psb_nrm2(r,desc_a,info) + rerr = rni/bn2 + If (itrac /= -1) Then + If (me.Eq.0) Write(itrac,'(a,i4,3(2x,es10.4)))') 'cg: ',it,rerr,rni,bn2 + Endif + Endif + + if (rerr<=eps) then + exit iteration + end if + end do iteration + + if (present(err)) err=rerr + if (present(iter)) iter = it + if (rerr>eps) then + write(0,*) 'CG Failed to converge to ',eps,& + & ' in ',litmax,' iterations ' + info=it + end if + + deallocate(aux) + call psb_free(wwrk,desc_a,info) + ! restore external global coherence behaviour + call blacs_set(icontxt,16,isvch) + + if (info.ne.0) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end subroutine psb_dcg + + diff --git a/src/methd/psb_dcgs.f90 b/src/methd/psb_dcgs.f90 new file mode 100644 index 00000000..e972616e --- /dev/null +++ b/src/methd/psb_dcgs.f90 @@ -0,0 +1,332 @@ +!!$ CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC +!!$ C C +!!$ C References: C +!!$ C [1] Duff, I., Marrone, M., Radicati, G., and Vittoli, C. C +!!$ C Level 3 basic linear algebra subprograms for sparse C +!!$ C matrices: a user level interface C +!!$ C ACM Trans. Math. Softw., 23(3), 379-401, 1997. C +!!$ C C +!!$ C C +!!$ C [2] S. Filippone, M. Colajanni C +!!$ C PSBLAS: A library for parallel linear algebra C +!!$ C computation on sparse matrices C +!!$ C ACM Trans. on Math. Softw., 26(4), 527-550, Dec. 2000. C +!!$ C C +!!$ C [3] M. Arioli, I. Duff, M. Ruiz C +!!$ C Stopping criteria for iterative solvers C +!!$ C SIAM J. Matrix Anal. Appl., Vol. 13, pp. 138-144, 1992 C +!!$ C C +!!$ C C +!!$ C [4] R. Barrett et al C +!!$ C Templates for the solution of linear systems C +!!$ C SIAM, 1993 +!!$ C C +!!$ C C +!!$ CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC +! File: psb_dcgs.f90 +! +! Subroutine: psb_dcgs +! +! Parameters: +! a - type(). The sparse matrix containing A. +! prec - type(). The data structure containing the preconditioner. +! b - real,dimension(:). The right hand side. +! x - real,dimension(:). The vector of unknowns. +! eps - real. The error tolerance. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! itmax - integer(optional). The maximum number of iterations. +! iter - integer(optional). The number of iterations performed. +! err - real(optional). The error on return. +! itrace - integer(optional). The unit to write messages onto. +! istop - integer(optional). The stopping criterium. +! +Subroutine psb_dcgs(a,prec,b,x,eps,desc_a,info,& + &itmax,iter,err,itrace,istop) + use psb_serial_mod + use psb_descriptor_type + use psb_prec_type + use psb_psblas_mod + use psb_tools_mod + use psb_const_mod + use psb_prec_mod + use psb_error_mod + implicit none + +!!$ parameters + Type(psb_dspmat_type), Intent(in) :: a + Type(psb_desc_type), Intent(in) :: desc_a + Type(psb_dprec_type), Intent(in) :: prec + Real(Kind(1.d0)), Intent(in) :: b(:) + Real(Kind(1.d0)), Intent(inout) :: x(:) + Real(Kind(1.d0)), Intent(in) :: eps + integer, intent(out) :: info + Integer, Optional, Intent(in) :: itmax, itrace,istop + Integer, Optional, Intent(out) :: iter + Real(Kind(1.d0)), Optional, Intent(out) :: err +!!$ local data + Real(Kind(1.d0)), Pointer :: aux(:),wwrk(:,:) + Real(Kind(1.d0)), Pointer :: ww(:), q(:),& + & r(:), p(:), v(:), s(:), t(:), z(:), f(:), rt(:),qt(:),uv(:) + Integer, Pointer :: iperm(:), ipnull(:), ipsave(:) + Real(Kind(1.d0)) ::rerr + Integer ::litmax, liter, naux, m, mglob, it, itrac,int_err(5),& + & nprows,npcols,me,mecol, n_row, n_col,listop, err_act + Character ::diagl, diagu + Logical, Parameter :: exchange=.True., noexchange=.False. + Integer, Parameter :: ione=1 + Integer, Parameter :: irmax = 8 + Integer :: itx, i, isvch, ich, icontxt + Logical :: do_renum_left + Logical, Parameter :: debug = .false. + Real(Kind(1.d0)), Parameter :: one=1.d0, zero=0.d0, epstol=1.d-35 + Real(Kind(1.d0)) :: alpha, beta, rho, rho_old, rni, xni, bni, ani,bn2,& + & sigma, omega, tau + character(len=20) :: name,ch_err + + info = 0 + name = 'psb_dcgs' + call psb_erractionsave(err_act) + + If (debug) Write(*,*) 'entering psb_dcgs' + icontxt = desc_a%matrix_data(psb_ctxt_) + Call blacs_gridinfo(icontxt,nprows,npcols,me,mecol) + If (debug) Write(*,*) 'psb_dcgs: from gridinfo',nprows,npcols,me + + mglob = desc_a%matrix_data(m_) + n_row = desc_a%matrix_data(psb_n_row_) + n_col = desc_a%matrix_data(psb_n_col_) + + If (Present(istop)) Then + listop = istop + Else + listop = 1 + Endif +! +! listop = 1: normwise backward error, infinity norm +! listop = 2: ||r||/||b|| norm 2 +! +!!$ +!!$ If ((prec%prec < 0).Or.(prec%prec > 6) ) Then +!!$ Write(0,*) 'f90_cgstab: invalid iprec',prec%prec +!!$ If (Present(ierr)) ierr=-1 +!!$ Return +!!$ Endif + + if ((listop < 1 ).or.(listop > 2 ) ) then + write(0,*) 'psb_cgs: invalid istop',listop + info=5001 + int_err=listop + err=info + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + naux=4*n_col + Allocate(aux(naux),stat=info) + + Call psb_alloc(mglob,11,wwrk,desc_a,info) + Call psb_asb(wwrk,desc_a,info) + if (info.ne.0) Then + info=4011 + call psb_errpush(info,name) + goto 9999 + End If + + q => wwrk(:,1) + qt => wwrk(:,2) + r => wwrk(:,3) + rt => wwrk(:,4) + p => wwrk(:,5) + v => wwrk(:,6) + uv => wwrk(:,7) + z => wwrk(:,8) + f => wwrk(:,9) + s => wwrk(:,10) + ww => wwrk(:,11) + + + If (Present(itmax)) Then + litmax = itmax + Else + litmax = 1000 + Endif + + If (Present(itrace)) Then + itrac = itrace + Else + itrac = -1 + End If + + ! ensure global coherence for convergence checks. + Call blacs_get(icontxt,16,isvch) + ich = 1 + Call blacs_set(icontxt,16,ich) + + diagl = 'u' + diagu = 'u' + itx = 0 + + if (listop == 1) then + ani = psb_nrmi(a,desc_a,info) + bni = psb_amax(b,desc_a,info) + else if (listop == 2) then + bn2 = psb_nrm2(b,desc_a,info) + endif + if(info/=0)then + info=4011 + call psb_errpush(info,name) + goto 9999 + end if + + restart: Do +!!$ +!!$ r0 = b-ax0 +!!$ + If (itx.Ge.itmax) Exit restart + it = 0 + Call psb_axpby(one,b,zero,r,desc_a,info) + Call psb_spmm(-one,a,x,one,r,desc_a,info,work=aux) + Call psb_axpby(one,r,zero,rt,desc_a,info) + if(info/=0)then + info=4011 + call psb_errpush(info,name) + goto 9999 + end if + + rho = zero + If (debug) Write(*,*) 'on entry to amax: b: ',Size(b) + + if (listop == 1) then + rni = psb_amax(r,desc_a,info) + xni = psb_amax(x,desc_a,info) + rerr = rni/(ani*xni+bni) + if (itrac /= -1) then + If (me == 0) Write(itrac,'(a,i4,5(2x,es10.4))') 'cgs: ',& + & itx,rerr,rni,bni,xni,ani + endif + else if (listop == 2) then + rni = psb_nrm2(r,desc_a,info) + rerr = rni/bn2 + if (itrac /= -1) then + If (me == 0) Write(itrac,'(a,i4,3(2x,es10.4))') 'cgs: ',itx,rerr,rni,bn2 + endif + endif + if(info/=0)then + info=4011 + call psb_errpush(info,name) + goto 9999 + end if + + If (rerr<=eps) Then + Exit restart + End If + + iteration: Do + it = it + 1 + itx = itx + 1 + If (debug) Write(*,*) 'iteration: ',itx + rho_old = rho + rho = psb_dot(rt,r,desc_a,info) + If (rho==zero) Then + If (debug) Write(0,*) 'cgs iteration breakdown r',rho + Exit iteration + Endif + + If (it==1) Then + Call psb_axpby(one,r,zero,uv,desc_a,info) + Call psb_axpby(one,r,zero,p,desc_a,info) + Else + beta = (rho/rho_old) + Call psb_axpby(one,r,zero,uv,desc_a,info) + Call psb_axpby(beta,q,one,uv,desc_a,info) + Call psb_axpby(one,q,beta,p,desc_a,info) + Call psb_axpby(one,uv,beta,p,desc_a,info) + + End If + + Call psb_prcaply(prec,p,f,desc_a,info,work=aux) + + Call psb_spmm(one,a,f,zero,v,desc_a,info,& + & work=aux) + + sigma = psb_dot(rt,v,desc_a,info) + If (sigma==zero) Then + If (debug) Write(0,*) 'cgs iteration breakdown s1', sigma + Exit iteration + Endif + + alpha = rho/sigma + + Call psb_axpby(one,uv,zero,q,desc_a,info) + Call psb_axpby(-alpha,v,one,q,desc_a,info) + Call psb_axpby(one,uv,zero,s,desc_a,info) + Call psb_axpby(one,q,one,s,desc_a,info) + + Call psb_prcaply(prec,s,z,desc_a,info,work=aux) + + Call psb_axpby(alpha,z,one,x,desc_a,info) + + Call psb_spmm(one,a,z,zero,qt,desc_a,info,& + & work=aux) + + Call psb_axpby(-alpha,qt,one,r,desc_a,info) + + + if (listop == 1) then + rni = psb_amax(r,desc_a,info) + xni = psb_amax(x,desc_a,info) + rerr = rni/(ani*xni+bni) + if (itrac /= -1) then + If (me == 0) Write(itrac,'(a,i4,5(2x,es10.4))') 'cgs: ',& + & itx,rerr,rni,bni,xni,ani + endif + + else if (listop == 2) then + + rni = psb_nrm2(r,desc_a,info) + rerr = rni/bn2 + if (itrac /= -1) then + If (me == 0) Write(itrac,'(a,i4,3(2x,es10.4))') 'cgs: ',& + & itx,rerr,rni,bn2 + endif + endif + + If (rerr<=eps) Then + Exit restart + End If + If (itx.Ge.itmax) Exit restart + End Do iteration + End Do restart + + If (Present(err)) err=rerr + If (Present(iter)) iter = itx + If (rerr>eps) Then + Write(0,*) 'cgs failed to converge to ',eps,& + & ' in ',itx,' iterations ' + End If + + Deallocate(aux) + Call psb_dsfree(wwrk,desc_a,info) + ! restore external global coherence behaviour + Call blacs_set(icontxt,16,isvch) + + if(info/=0) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +End Subroutine psb_dcgs + + diff --git a/src/methd/psb_dcgstab.f90 b/src/methd/psb_dcgstab.f90 new file mode 100644 index 00000000..9c7e1221 --- /dev/null +++ b/src/methd/psb_dcgstab.f90 @@ -0,0 +1,365 @@ +!!$ CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC +!!$ C C +!!$ C References: C +!!$ C [1] Duff, I., Marrone, M., Radicati, G., and Vittoli, C. C +!!$ C Level 3 basic linear algebra subprograms for sparse C +!!$ C matrices: a user level interface C +!!$ C ACM Trans. Math. Softw., 23(3), 379-401, 1997. C +!!$ C C +!!$ C C +!!$ C [2] S. Filippone, M. Colajanni C +!!$ C PSBLAS: A library for parallel linear algebra C +!!$ C computation on sparse matrices C +!!$ C ACM Trans. on Math. Softw., 26(4), 527-550, Dec. 2000. C +!!$ C C +!!$ C [3] M. Arioli, I. Duff, M. Ruiz C +!!$ C Stopping criteria for iterative solvers C +!!$ C SIAM J. Matrix Anal. Appl., Vol. 13, pp. 138-144, 1992 C +!!$ C C +!!$ C C +!!$ C [4] R. Barrett et al C +!!$ C Templates for the solution of linear systems C +!!$ C SIAM, 1993 +!!$ C C +!!$ C C +!!$ CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC +! File: psb_dcgstab.f90 +! +! Subroutine: psb_dcgstab +! This subroutine implements the CG Stabilized method. +! +! Parameters: +! a - type(). The sparse matrix containing A. +! prec - type(). The data structure containing the preconditioner. +! b - real,dimension(:). The right hand side. +! x - real,dimension(:). The vector of unknowns. +! eps - real. The error tolerance. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! itmax - integer(optional). The maximum number of iterations. +! iter - integer(optional). The number of iterations performed. +! err - real(optional). The error on return. +! itrace - integer(optional). The unit to write messages onto. +! istop - integer(optional). The stopping criterium. +! +Subroutine psb_dcgstab(a,prec,b,x,eps,desc_a,info,& + &itmax,iter,err,itrace, istop) + use psb_serial_mod + use psb_descriptor_type + use psb_prec_type + use psb_psblas_mod + use psb_tools_mod + use psb_const_mod + use psb_prec_mod + use psb_error_mod + Implicit None +!!$ parameters + Type(psb_dspmat_type), Intent(in) :: a + Type(psb_dprec_type), Intent(in) :: prec + Type(psb_desc_type), Intent(in) :: desc_a + Real(Kind(1.d0)), Intent(in) :: b(:) + Real(Kind(1.d0)), Intent(inout) :: x(:) + Real(Kind(1.d0)), Intent(in) :: eps + integer, intent(out) :: info + Integer, Optional, Intent(in) :: itmax, itrace, istop + Integer, Optional, Intent(out) :: iter + Real(Kind(1.d0)), Optional, Intent(out) :: err +!!$ Local data + Real(Kind(1.d0)), Pointer :: aux(:),wwrk(:,:) + Real(Kind(1.d0)), Pointer :: q(:),& + & r(:), p(:), v(:), s(:), t(:), z(:), f(:) + Integer, Pointer :: iperm(:), ipnull(:), ipsave(:) + Real(Kind(1.d0)) ::rerr + Integer ::litmax, liter, naux, m, mglob, it,itrac,& + & nprows,npcols,me,mecol, n_row, n_col + Character ::diagl, diagu + Logical, Parameter :: debug = .false. + Logical, Parameter :: exchange=.True., noexchange=.False., debug1 = .False. + Integer, Parameter :: ione=1 + Integer, Parameter :: irmax = 8 + Integer :: itx, i, isvch, ich, icontxt, err_act, int_err(5) + Integer :: listop + Logical :: do_renum_left + Real(Kind(1.d0)), Parameter :: one=1.d0, zero=0.d0, epstol=1.d-35 + Real(Kind(1.d0)) :: alpha, beta, rho, rho_old, rni, xni, bni, ani,& + & sigma, omega, tau, rn0, bn2 +!!$ Integer istpb, istpe, ifctb, ifcte, imerr, irank, icomm,immb,imme +!!$ Integer mpe_log_get_event_number,mpe_Describe_state,mpe_log_event + character(len=20) :: name,ch_err + + info = 0 + name = 'psb_dcgstab' + call psb_erractionsave(err_act) + + If (debug) Write(*,*) 'Entering PSB_DCGSTAB',present(istop) + icontxt = desc_a%MATRIX_DATA(CTXT_) + CALL BLACS_GRIDINFO(icontxt,NPROWS,NPCOLS,ME,MECOL) + if (debug) write(*,*) 'PSB_DCGSTAB: From GRIDINFO',nprows,npcols,me + + mglob = desc_a%matrix_data(m_) + n_row = desc_a%matrix_data(psb_n_row_) + n_col = desc_a%matrix_data(psb_n_col_) + + If (Present(istop)) Then + listop = istop + Else + listop = 1 + Endif +! +! LISTOP = 1: Normwise backward error, infinity norm +! LISTOP = 2: ||r||/||b|| norm 2 +! + + If ((prec%prec < min_prec_).Or.(prec%prec > max_prec_) ) Then + Write(0,*) 'PSB_CGSTAB: Invalid IPREC',prec%prec + info=5002 + int_err(1)=prec%prec + err=info + call psb_errpush(info,name,i_err=int_err) + goto 9999 + Endif + + if ((listop < 1 ).or.(listop > 2 ) ) then + write(0,*) 'psb_bicgstab: invalid istop',listop + info=5001 + int_err(1)=listop + err=info + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + naux=6*n_col + allocate(aux(naux),stat=info) + call psb_alloc(mglob,8,wwrk,desc_a,info) + call psb_asb(wwrk,desc_a,info) + if (info /= 0) then + info=4011 + call psb_errpush(info,name) + goto 9999 + End If + + Q => WWRK(:,1) + R => WWRK(:,2) + P => WWRK(:,3) + V => WWRK(:,4) + F => WWRK(:,5) + S => WWRK(:,6) + T => WWRK(:,7) + Z => WWRK(:,8) + + If (Present(itmax)) Then + litmax = itmax + Else + litmax = 1000 + Endif + + If (Present(itrace)) Then + itrac = itrace + Else + itrac = -1 + End If + + diagl = 'U' + diagu = 'U' + + ! Ensure global coherence for convergence checks. + Call blacs_get(icontxt,16,isvch) + ich = 1 + Call blacs_set(icontxt,16,ich) + + itx = 0 + + If (listop == 1) Then + ani = psb_nrmi(a,desc_a,info) + bni = psb_amax(b,desc_a,info) + Else If (listop == 2) Then + bn2 = psb_nrm2(b,desc_a,info) + Endif + if (info /= 0) Then + info=4011 + call psb_errpush(info,name) + goto 9999 + End If + + restart: Do +!!$ +!!$ r0 = b-Ax0 +!!$ + If (itx >= itmax) Exit restart + it = 0 + Call psb_axpby(one,b,zero,r,desc_a,info) +!!$ imerr = MPE_Log_event( immb, 0, "st SPMM" ) + Call psb_spmm(-one,a,x,one,r,desc_a,info,work=aux) +!!$ imerr = MPE_Log_event( imme, 0, "ed SPMM" ) + Call psb_axpby(one,r,zero,q,desc_a,info) + if (info /= 0) Then + info=4011 + call psb_errpush(info,name) + goto 9999 + End If + + rho = zero + If (debug) Write(*,*) 'On entry to AMAX: B: ',Size(b) + +! +! Must always provide norm of R into RNI below for first check on +! residual +! + If (listop == 1) Then + rni = psb_amax(r,desc_a,info) + xni = psb_amax(x,desc_a,info) + Else If (listop == 2) Then + rni = psb_nrm2(r,desc_a,info) + Endif + if (info /= 0) Then + info=4011 + call psb_errpush(info,name) + goto 9999 + End If + + If (itx == 0) Then + rn0 = rni + End If + If (rn0 == 0.d0 ) Then + If (itrac /= -1) Then + If (me == 0) Write(itrac,*) 'BiCGSTAB: ',itx,rn0 + Endif + Exit restart + End If + + If (listop == 1) Then + xni = psb_amax(x,desc_a,info) + rerr = rni/(ani*xni+bni) + If (itrac /= -1) Then + If (me == 0) Write(itrac,'(a,i4,5(2x,es10.4))') 'bicgstab: ',itx,rerr,rni,bni,& + &xni,ani + Endif + Else If (listop == 2) Then + rerr = rni/bn2 + If (itrac /= -1) Then + If (me == 0) Write(itrac,'(a,i4,3(2x,es10.4))') 'bicgstab: ',itx,rerr,rni,bn2 + Endif + Endif + if (info /= 0) Then + info=4011 + call psb_errpush(info,name) + goto 9999 + End If + + If (rerr<=eps) Then + Exit restart + End If + + iteration: Do + it = it + 1 + itx = itx + 1 + If (debug) Write(*,*) 'Iteration: ',itx + rho_old = rho + rho = psb_dot(q,r,desc_a,info) + If (rho==zero) Then + If (debug) Write(0,*) 'Bi-CGSTAB Itxation breakdown R',rho + Exit iteration + Endif + + If (it==1) Then + Call psb_axpby(one,r,zero,p,desc_a,info) + Else + beta = (rho/rho_old)*(alpha/omega) + Call psb_axpby(-omega,v,one,p,desc_a,info) + Call psb_axpby(one,r,beta,p,desc_a,info) + End If + + Call psb_prcaply(prec,p,f,desc_a,info,work=aux) + + Call psb_spmm(one,a,f,zero,v,desc_a,info,& + & work=aux) + + sigma = psb_dot(q,v,desc_a,info) + If (sigma==zero) Then + If (debug) Write(0,*) 'Bi-CGSTAB Iteration breakdown S1', sigma + Exit iteration + Endif + + alpha = rho/sigma + Call psb_axpby(one,r,zero,s,desc_a,info) + Call psb_axpby(-alpha,v,one,s,desc_a,info) + + Call psb_prcaply(prec,s,z,desc_a,info,work=aux) + + Call psb_spmm(one,a,z,zero,t,desc_a,info,& + & work=aux) + + sigma = psb_dot(t,t,desc_a,info) + If (sigma==zero) Then + If (debug) Write(0,*) 'BI-CGSTAB ITERATION BREAKDOWN S2', sigma + Exit iteration + Endif + + tau = psb_dot(t,s,desc_a,info) + omega = tau/sigma + + If (omega==zero) Then + If (debug) Write(0,*) 'BI-CGSTAB ITERATION BREAKDOWN O',omega + Exit iteration + Endif + + Call psb_axpby(alpha,f,one,x,desc_a,info) + Call psb_axpby(omega,z,one,x,desc_a,info) + Call psb_axpby(one,s,zero,r,desc_a,info) + Call psb_axpby(-omega,t,one,r,desc_a,info) + + If (listop == 1) Then + rni = psb_amax(r,desc_a,info) + xni = psb_amax(x,desc_a,info) + rerr = rni/(ani*xni+bni) + If (itrac /= -1) Then + If (me == 0) Write(itrac,'(a,i4,5(2x,es10.4))') 'bicgstab: ',itx,rerr,rni,bni,& + &xni,ani + Endif + + Else If (listop == 2) Then + rni = psb_nrm2(r,desc_a,info) + rerr = rni/bn2 + If (itrac /= -1) Then + If (me == 0) Write(itrac,'(a,i4,3(2x,es10.4)))') 'bicgstab: ',itx,rerr,rni,bn2 + Endif + Endif + + If (rerr<=eps) Then + Exit restart + End If + + If (itx.Ge.itmax) Exit restart + End Do iteration + End Do restart + + If (Present(err)) err=rerr + If (Present(iter)) iter = itx + If (rerr>eps) Then + Write(0,*) 'BI-CGSTAB FAILED TO CONVERGE TO ',EPS,& + & ' IN ',ITX,' ITERATIONS ' + End If + + Deallocate(aux) + Call psb_free(wwrk,desc_a,info) + ! restore external global coherence behaviour + Call blacs_set(icontxt,16,isvch) +!!$ imerr = MPE_Log_event( istpe, 0, "ed CGSTAB" ) + if(info/=0) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +End Subroutine psb_dcgstab + diff --git a/src/methd/psb_dcgstabl.f90 b/src/methd/psb_dcgstabl.f90 new file mode 100644 index 00000000..3f50c4b8 --- /dev/null +++ b/src/methd/psb_dcgstabl.f90 @@ -0,0 +1,396 @@ +!!$ CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC +!!$ C C +!!$ C References: C +!!$ C [1] Duff, I., Marrone, M., Radicati, G., and Vittoli, C. C +!!$ C Level 3 basic linear algebra subprograms for sparse C +!!$ C matrices: a user level interface C +!!$ C ACM Trans. Math. Softw., 23(3), 379-401, 1997. C +!!$ C C +!!$ C C +!!$ C [2] S. Filippone, M. Colajanni C +!!$ C PSBLAS: A library for parallel linear algebra C +!!$ C computation on sparse matrices C +!!$ C ACM Trans. on Math. Softw., 26(4), 527-550, Dec. 2000. C +!!$ C C +!!$ C [3] M. Arioli, I. Duff, M. Ruiz C +!!$ C Stopping criteria for iterative solvers C +!!$ C SIAM J. Matrix Anal. Appl., Vol. 13, pp. 138-144, 1992 C +!!$ C C +!!$ C C +!!$ C [4] R. Barrett et al C +!!$ C Templates for the solution of linear systems C +!!$ C SIAM, 1993 C +!!$ C C +!!$ C C +!!$ C [5] G. Sleijpen, D. Fokkema C +!!$ C BICGSTAB(L) for linear equations involving unsymmetric C +!!$ C matrices with complex spectrum C +!!$ C Electronic Trans. on Numer. Analysis, Vol. 1, pp. 11-32, C +!!$ C Sep. 1993 C +!!$ C C +!!$ C C +!!$ CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC +! File: psb_dcgstabl.f90 +! +! Subroutine: psb_dcgstabl +! +! Parameters: +! a - type(). The sparse matrix containing A. +! prec - type(). The data structure containing the preconditioner. +! b - real,dimension(:). The right hand side. +! x - real,dimension(:). The vector of unknowns. +! eps - real. The error tolerance. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! itmax - integer(optional). The maximum number of iterations. +! iter - integer(optional). The number of iterations performed. +! err - real(optional). The error on return. +! itrace - integer(optional). The unit to write messages onto. +! istop - integer(optional). The stopping criterium. +! +Subroutine psb_dcgstabl(a,prec,b,x,eps,desc_a,info,& + &itmax,iter,err,itrace,irst,istop) + use psb_serial_mod + use psb_descriptor_type + use psb_prec_type + use psb_psblas_mod + use psb_tools_mod + use psb_const_mod + use psb_prec_mod + use psb_error_mod + implicit none + +!!$ parameters + Type(psb_dspmat_type), Intent(in) :: a + Type(psb_dprec_type), Intent(in) :: prec + Type(psb_desc_type), Intent(in) :: desc_a + Real(Kind(1.d0)), Intent(in) :: b(:) + Real(Kind(1.d0)), Intent(inout) :: x(:) + Real(Kind(1.d0)), Intent(in) :: eps + integer, intent(out) :: info + Integer, Optional, Intent(in) :: itmax, itrace, irst,istop + Integer, Optional, Intent(out) :: iter + Real(Kind(1.d0)), Optional, Intent(out) :: err +!!$ local data + Real(Kind(1.d0)), Pointer :: aux(:),wwrk(:,:) + Real(Kind(1.d0)), Pointer :: ww(:), q(:), r(:), rt0(:), p(:), v(:), & + & s(:), t(:), z(:), f(:), uh(:,:), rh(:,:), & + & gamma(:), gamma1(:), gamma2(:), taum(:,:), sigma(:),& + &pv1(:), pv2(:), pm1(:,:), pm2(:,:) + Integer, Pointer :: iperm(:), ipnull(:), ipsave(:) + Real(Kind(1.d0)) ::rerr + Integer ::litmax, liter, naux, m, mglob, it, itrac,& + & nprows,npcols,me,mecol, n_row, n_col, nl, err_act + Character ::diagl, diagu + Logical, Parameter :: exchange=.True., noexchange=.False. + Integer, Parameter :: ione=1 + Integer, Parameter :: irmax = 8 + Integer :: itx, i, isvch, ich, icontxt,listop,j, int_err(5) + Logical :: do_renum_left + Real(Kind(1.d0)), Parameter :: one=1.d0, zero=0.d0, epstol=1.d-35 + Logical, Parameter :: debug = .False. + Real(Kind(1.d0)) :: alpha, beta, rho, rho_old, rni, xni, bni, ani,bn2,& + & omega, tau + character(len=20) :: name,ch_err + + info = 0 + name = 'psb_dcgstabl' + call psb_erractionsave(err_act) + + If (debug) Write(0,*) 'entering psb_dbicgstabl' + icontxt = desc_a%matrix_data(psb_ctxt_) + Call blacs_gridinfo(icontxt,nprows,npcols,me,mecol) + + If (debug) Write(0,*) 'psb_dbicgstabl: from gridinfo',nprows,npcols,me + + mglob = desc_a%matrix_data(m_) + n_row = desc_a%matrix_data(psb_n_row_) + n_col = desc_a%matrix_data(psb_n_col_) + + if (present(istop)) then + listop = istop + else + listop = 1 + endif +! +! LISTOP = 1: Normwise backward error, infinity norm +! LISTOP = 2: ||r||/||b|| norm 2 +! + + if ((listop < 1 ).or.(listop > 2 ) ) then + write(0,*) 'psb_bicgstabl: invalid istop',listop + info=5001 + int_err=listop + err=info + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + If (Present(itmax)) Then + litmax = itmax + Else + litmax = 1000 + Endif + + If (Present(itrace)) Then + itrac = itrace + Else + itrac = -1 + End If + + If (Present(irst)) Then + nl = irst + If (debug) Write(0,*) 'present: irst: ',irst,nl + Else + nl = 1 + If (debug) Write(0,*) 'not present: irst: ',irst,nl + Endif + + naux=4*n_col + Allocate(aux(naux),gamma(0:nl),gamma1(nl),& + &gamma2(nl),taum(nl,nl),sigma(nl), stat=info) + + If (info.Ne.0) Then + info=4000 + call psb_errpush(info,name) + goto 9999 + End If + Call psb_alloc(mglob,10,wwrk,desc_a,info) + Call psb_alloc(mglob,nl+1,uh,desc_a,info,js=0) + Call psb_alloc(mglob,nl+1,rh,desc_a,info,js=0) + Call psb_asb(wwrk,desc_a,info) + Call psb_asb(uh,desc_a,info) + Call psb_asb(rh,desc_a,info) + if (info.ne.0) Then + info=4011 + call psb_errpush(info,name) + goto 9999 + End If + + q => wwrk(:,1) + r => wwrk(:,2) + p => wwrk(:,3) + v => wwrk(:,4) + f => wwrk(:,5) + s => wwrk(:,6) + t => wwrk(:,7) + z => wwrk(:,8) + ww => wwrk(:,9) + rt0 => wwrk(:,10) + + ! ensure global coherence for convergence checks. + Call blacs_get(icontxt,16,isvch) + ich = 1 + Call blacs_set(icontxt,16,ich) + + if (listop == 1) then + ani = psb_nrmi(a,desc_a,info) + bni = psb_amax(b,desc_a,info) + else if (listop == 2) then + bn2 = psb_nrm2(b,desc_a,info) + endif + if (info.ne.0) Then + info=4011 + call psb_errpush(info,name) + goto 9999 + End If + + diagl = 'u' + diagu = 'u' + itx = 0 + restart: Do +!!$ +!!$ r0 = b-ax0 +!!$ + If (debug) Write(0,*) 'restart: ',itx,it + If (itx.Ge.itmax) Exit restart + it = 0 + Call psb_axpby(one,b,zero,r,desc_a,info) + Call psb_spmm(-one,a,x,one,r,desc_a,info,work=aux) + + call psb_prcaply(prec,r,desc_a,info) + + Call psb_axpby(one,r,zero,rt0,desc_a,info) + Call psb_axpby(one,r,zero,rh(:,0),desc_a,info) + Call psb_axpby(zero,r,zero,uh(:,0),desc_a,info) + if (info.ne.0) Then + info=4011 + call psb_errpush(info,name) + goto 9999 + End If + + rho = one + alpha = zero + omega = one + + If (debug) Write(0,*) 'on entry to amax: b: ',Size(b) + + if (listop == 1) then + rni = psb_amax(r,desc_a,info) + xni = psb_amax(x,desc_a,info) + rerr = rni/(ani*xni+bni) + if (itrac /= -1) then + If (me == 0) Write(itrac,'(a,i4,5(2x,es10.4))') 'bicgstab(l): ',& + & itx,rerr,rni,bni,xni,ani + endif + else if (listop == 2) then + rni = psb_nrm2(r,desc_a,info) + rerr = rni/bn2 + if (itrac /= -1) then + If (me == 0) Write(itrac,'(a,i4,3(2x,es10.4))') 'bicgstab(l): ',& + & itx,rerr,rni,bn2 + endif + endif + if (info.ne.0) Then + info=4011 + call psb_errpush(info,name) + goto 9999 + End If + + If (rerr<=eps) Then + Exit restart + End If + + iteration: Do + it = it + nl + itx = itx + nl + rho = -omega*rho + If (debug) Write(0,*) 'iteration: ',itx, rho,rh(1,0) + + Do j = 0, nl -1 + If (debug) Write(0,*) 'bicg part: ',j, nl + rho_old = rho + rho = psb_dot(rh(:,j),rt0,desc_a,info) + If (rho==zero) Then + If (debug) Write(0,*) 'bi-cgstab iteration breakdown r',rho + Exit iteration + Endif + beta = alpha*rho/rho_old + If (debug) Write(0,*) 'bicg part: ',alpha,beta,rho,rho_old + rho_old = rho + Call psb_axpby(one,rh(:,0:j),-beta,uh(:,0:j),desc_a,info) + If (debug) Write(0,*) 'bicg part: ',rh(1,0),beta + Call psb_spmm(one,a,uh(:,j),zero,uh(:,j+1),desc_a,info,work=aux) + + call psb_prcaply(prec,uh(:,j+1),desc_a,info) + + gamma(j) = psb_dot(uh(:,j+1),rt0,desc_a,info) + If (gamma(j)==zero) Then + If (debug) Write(0,*) 'bi-cgstab iteration breakdown s2',gamma(j) + Exit iteration + Endif + alpha = rho/gamma(j) + If (debug) Write(0,*) 'bicg part: alpha=r/g ',alpha,rho,gamma(j) + + Call psb_axpby(-alpha,uh(:,1:j+1),one,rh(:,0:j),desc_a,info) + Call psb_axpby(alpha,uh(:,0),one,x,desc_a,info) + Call psb_spmm(one,a,rh(:,j),zero,rh(:,j+1),desc_a,info,work=aux) + + call psb_prcaply(prec,rh(:,j+1),desc_a,info) + + Enddo + + Do j=1, nl + If (debug) Write(0,*) 'mod g-s part: ',j, nl,rh(1,0) + Do i=1, j-1 + taum(i,j) = psb_dot(rh(:,i),rh(:,j),desc_a,info) + taum(i,j) = taum(i,j)/sigma(i) + Call psb_axpby(-taum(i,j),rh(:,i),one,rh(:,j),desc_a,info) + Enddo + If (debug) Write(0,*) 'mod g-s part: dot prod ' + sigma(j) = psb_dot(rh(:,j),rh(:,j),desc_a,info) + gamma1(j) = psb_dot(rh(:,0),rh(:,j),desc_a,info) + If (debug) Write(0,*) 'mod g-s part: gamma1 ', & + &gamma1(j), sigma(j) + gamma1(j) = gamma1(j)/sigma(j) + Enddo + + gamma(nl) = gamma1(nl) + omega = gamma(nl) + + Do j=nl-1,1,-1 + gamma(j) = gamma1(j) + Do i=j+1,nl + gamma(j) = gamma(j) - taum(j,i) * gamma(i) + Enddo + Enddo + If (debug) Write(0,*) 'first solve: ', gamma(:) + + Do j=1,nl-1 + gamma2(j) = gamma(j+1) + Do i=j+1,nl-1 + gamma2(j) = gamma2(j) + taum(j,i) * gamma(i+1) + Enddo + Enddo + If (debug) Write(0,*) 'second solve: ', gamma(:) + + Call psb_axpby(gamma(1),rh(:,0),one,x,desc_a,info) + Call psb_axpby(-gamma1(nl),rh(:,nl),one,rh(:,0),desc_a,info) + Call psb_axpby(-gamma(nl),uh(:,nl),one,uh(:,0),desc_a,info) + + Do j=1, nl-1 + Call psb_axpby(-gamma(j),uh(:,j),one,uh(:,0),desc_a,info) + Call psb_axpby(gamma2(j),rh(:,j),one,x,desc_a,info) + Call psb_axpby(-gamma1(j),rh(:,j),one,rh(:,0),desc_a,info) + Enddo + + if (listop == 1) then + rni = psb_amax(rh(:,0),desc_a,info) + xni = psb_amax(x,desc_a,info) + rerr = rni/(ani*xni+bni) + if (itrac /= -1) then + If (me == 0) Write(itrac,'(a,i4,5(2x,es10.4))') 'bicgstab(l): ',& + & itx,rerr,rni,bni,xni,ani + endif + + else if (listop == 2) then + + rni = psb_nrm2(rh(:,0),desc_a,info) + rerr = rni/bn2 + if (itrac /= -1) then + If (me == 0) Write(itrac,'(a,i4,3(2x,es10.4))') 'bicgstab(l): ',& + & itx,rerr,rni,bn2 + endif + endif + + If (rerr<=eps) Then + Exit restart + End If + + If (itx.Ge.itmax) Exit restart + End Do iteration + End Do restart + + If (Present(err)) err=rerr + If (Present(iter)) iter = itx + If (rerr>eps) Then + Write(0,*) 'bi-cgstabl failed to converge to ',eps,& + & ' in ',itx,' iterations ' + End If + + Deallocate(aux) + Call psb_free(wwrk,desc_a,info) + Call psb_free(uh,desc_a,info) + Call psb_free(rh,desc_a,info) + ! restore external global coherence behaviour + Call blacs_set(icontxt,16,isvch) + + if(info/=0) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +End Subroutine psb_dcgstabl + + diff --git a/src/methd/psb_dgmresr.f90 b/src/methd/psb_dgmresr.f90 new file mode 100644 index 00000000..06b5512a --- /dev/null +++ b/src/methd/psb_dgmresr.f90 @@ -0,0 +1,356 @@ +!!$ CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC +!!$ C C +!!$ C References: C +!!$ C [1] Duff, I., Marrone, M., Radicati, G., and Vittoli, C. C +!!$ C Level 3 basic linear algebra subprograms for sparse C +!!$ C matrices: a user level interface C +!!$ C ACM Trans. Math. Softw., 23(3), 379-401, 1997. C +!!$ C C +!!$ C C +!!$ C [2] S. Filippone, M. Colajanni C +!!$ C PSBLAS: A library for parallel linear algebra C +!!$ C computation on sparse matrices C +!!$ C ACM Trans. on Math. Softw., 26(4), 527-550, Dec. 2000. C +!!$ C C +!!$ C [3] M. Arioli, I. Duff, M. Ruiz C +!!$ C Stopping criteria for iterative solvers C +!!$ C SIAM J. Matrix Anal. Appl., Vol. 13, pp. 138-144, 1992 C +!!$ C C +!!$ C C +!!$ C [4] R. Barrett et al C +!!$ C Templates for the solution of linear systems C +!!$ C SIAM, 1993 C +!!$ C C +!!$ C C +!!$ C [5] G. Sleijpen, D. Fokkema C +!!$ C BICGSTAB(L) for linear equations involving unsymmetric C +!!$ C matrices with complex spectrum C +!!$ C Electronic Trans. on Numer. Analysis, Vol. 1, pp. 11-32, C +!!$ C Sep. 1993 C +!!$ C C +!!$ C C +!!$ CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC +! File: psb_dgmresr.f90 +! +! Subroutine: psb_dgmres +! This subroutine implements the restarted GMRES method. +! +! Parameters: +! a - type(). The sparse matrix containing A. +! prec - type(). The data structure containing the preconditioner. +! b - real,dimension(:). The right hand side. +! x - real,dimension(:). The vector of unknowns. +! eps - real. The error tolerance. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! itmax - integer(optional). The maximum number of iterations. +! iter - integer(optional). The number of iterations performed. +! err - real(optional). The error on return. +! itrace - integer(optional). The unit to write messages onto. +! irst - integer(optional). The restart value. +! istop - integer(optional). The stopping criterium. +! +Subroutine psb_dgmresr(a,prec,b,x,eps,desc_a,info,& + &itmax,iter,err,itrace,irst,istop) + use psb_serial_mod + use psb_descriptor_type + use psb_prec_type + use psb_psblas_mod + use psb_tools_mod + use psb_const_mod + use psb_prec_mod + use psb_error_mod + implicit none + +!!$ Parameters + Type(psb_dspmat_type), Intent(in) :: a + Type(psb_dprec_type), Intent(in) :: prec + Type(psb_desc_type), Intent(in) :: desc_a + Real(Kind(1.d0)), Intent(in) :: b(:) + Real(Kind(1.d0)), Intent(inout) :: x(:) + Real(Kind(1.d0)), Intent(in) :: eps + integer, intent(out) :: info + Integer, Optional, Intent(in) :: itmax, itrace, irst,istop + Integer, Optional, Intent(out) :: iter + Real(Kind(1.d0)), Optional, Intent(out) :: err +!!$ local data + Real(Kind(1.d0)), Pointer :: aux(:),wwrk(:,:) + Real(Kind(1.d0)), Pointer :: w(:), q(:), r(:), rt0(:), p(:), v(:,:), & + & c(:),s(:), t(:), z(:), f(:), uh(:,:), h(:,:), rs(:),& + & gamma(:), gamma1(:), gamma2(:), taum(:,:), sigma(:),& + &pv1(:), pv2(:), pm1(:,:), rr(:,:) + Integer, Pointer :: iperm(:), ipnull(:), ipsave(:), ierrv(:) + Real(Kind(1.d0)) :: rerr, scal, gm + Integer ::litmax, liter, naux, m, mglob, it,k, itrac,& + & nprows,npcols,me,mecol, n_row, n_col, nl, int_err(5) + Character ::diagl, diagu + Logical, Parameter :: exchange=.True., noexchange=.False. + Integer, Parameter :: ione=1 + Integer, Parameter :: irmax = 8 + Integer :: itx, i, isvch, ich, icontxt,listop, err_act + Logical :: do_renum_left,inner_stop + Real(Kind(1.d0)), Parameter :: one=1.d0, zero=0.d0, epstol=1.d-35 + Logical, Parameter :: debug = .false. + Real(Kind(1.d0)) :: alpha, beta, rho, rho_old, rni, xni, bni, ani,bn2,& + & omega, tau + real(kind(1.d0)), external :: dnrm2 + character(len=20) :: name,ch_err + + info = 0 + name = 'psb_dgmres' + call psb_erractionsave(err_act) + + If (debug) Write(0,*) 'entering psb_dgmres' + icontxt = desc_a%matrix_data(psb_ctxt_) + Call blacs_gridinfo(icontxt,nprows,npcols,me,mecol) + + If (debug) Write(0,*) 'psb_dgmres: from gridinfo',nprows,npcols,me + + mglob = desc_a%matrix_data(m_) + n_row = desc_a%matrix_data(psb_n_row_) + n_col = desc_a%matrix_data(psb_n_col_) + + if (present(istop)) then + listop = istop + else + listop = 1 + endif +! +! LISTOP = 1: Normwise backward error, infinity norm +! LISTOP = 2: ||r||/||b|| norm 2 +! + + if ((listop < 1 ).or.(listop > 2 ) ) then + write(0,*) 'psb_dgmres: invalid istop',listop + info=5001 + int_err(1)=listop + err=info + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + If (Present(itmax)) Then + litmax = itmax + Else + litmax = 1000 + Endif + + If (Present(itrace)) Then + itrac = itrace + Else + itrac = -1 + End If + + If (Present(irst)) Then + nl = irst + If (debug) Write(0,*) 'present: irst: ',irst,nl + Else + nl = 10 + If (debug) Write(0,*) 'not present: irst: ',irst,nl + Endif + + + naux=4*n_col + Allocate(aux(naux),h(nl+1,nl+1),rr(nl+1,nl+1),& + &c(nl+1),s(nl+1),rs(nl+1), stat=info) + + If (info.Ne.0) Then + info = 4000 + call psb_errpush(info,name) + goto 9999 + End If + + Call psb_dsall(mglob,nl+1,v,desc_a,info) + Call psb_dsall(mglob,w,desc_a,info) + Call psb_dsasb(v,desc_a,info) + Call psb_dsasb(w,desc_a,info) + if (info.ne.0) Then + info=4011 + call psb_errpush(info,name) + goto 9999 + End If + + ! ensure global coherence for convergence checks. + Call blacs_get(icontxt,16,isvch) + ich = 1 + Call blacs_set(icontxt,16,ich) + + if (listop == 1) then + ani = psb_nrmi(a,desc_a,info) + bni = psb_amax(b,desc_a,info) + else if (listop == 2) then + bn2 = psb_nrm2(b,desc_a,info) + endif + if (info.ne.0) Then + info=4011 + call psb_errpush(info,name) + goto 9999 + End If + + diagl = 'u' + diagu = 'u' + itx = 0 + restart: Do +!!$ +!!$ r0 = b-ax0 +!!$ + If (debug) Write(0,*) 'restart: ',itx,it + it = 0 + Call psb_axpby(one,b,zero,v(:,1),desc_a,info) + Call psb_spmm(-one,a,x,one,v(:,1),desc_a,info,work=aux) + + call psb_prcaply(prec,v(:,1),desc_a,info) + rs(1) = psb_nrm2(v(:,1),desc_a,info) + if (info.ne.0) Then + info=4011 + call psb_errpush(info,name) + goto 9999 + End If + + scal=one/rs(1) + If (debug) Write(0,*) 'on entry to amax: b: ',Size(b),rs(1),scal + + if (listop == 1) then + rni = psb_amax(v(:,1),desc_a,info) + xni = psb_amax(x,desc_a,info) + rerr = rni/(ani*xni+bni) + if (itrac /= -1) then + If (me == 0) Write(itrac,'(a,i4,5(2x,es10.4))') 'gmresr(l): ',& + & itx,rerr,rni,bni,xni,ani + endif + else if (listop == 2) then + rni = psb_nrm2(v(:,1),desc_a,info) + rerr = rni/bn2 + if (itrac /= -1) then + If (me == 0) Write(itrac,'(a,i4,3(2x,es10.4))') 'gmresr(l): ',& + & itx,rerr,rni,bn2 + endif + endif + if (info.ne.0) Then + info=4011 + call psb_errpush(info,name) + goto 9999 + End If + + If (rerr<=eps) Then + Exit restart + End If + + If (itx.Ge.itmax) Exit restart + + v(:,1) = v(:,1) * scal + + inner: Do i=1,nl + itx = itx + 1 + + Call psb_spmm(one,a,v(:,i),zero,w,desc_a,info,work=aux) + call psb_prcaply(prec,w,desc_a,info) + + do k = 1, i + h(k,i) = psb_dot(v(:,k),w,desc_a,info) + call psb_axpby(-h(k,i),v(:,k),one,w,desc_a,info) + end do + h(i+1,i) = psb_nrm2(w,desc_a,info) + scal=one/h(i+1,i) + call psb_axpby(scal,w,zero,v(:,i+1),desc_a,info) + do k=2,i + rr(k-1,i) = c(k-1)*h(k-1,i) + s(k-1)*h(k,i) + rr(k,i) = -s(k-1)*h(k-1,i) + c(k-1)*h(k,i) + enddo + gm = safe_dn2(h(i,i),h(i+1,i)) + if (debug) write(0,*) 'GM : ',gm + gm = max(gm,epstol) + + c(i) = h(i,i)/gm + s(i) = h(i+1,i)/gm + rs(i+1) = -s(i)*rs(i) + rs(i) = c(i)*rs(i) + rr(i,i) = c(i)*h(i,i)+s(i)*h(i+1,i) + + if (listop == 1) then + rni = abs(rs(i+1)) + xni = psb_amax(x,desc_a,info) + rerr = rni/(ani*xni+bni) + if (itrac /= -1) then + If (me == 0) Write(itrac,'(a,i4,5(2x,es10.4))') 'gmresr(l): ',& + & itx,rerr,rni,bni,xni,ani + endif + else if (listop == 2) then + rni = abs(rs(i+1)) + rerr = rni/bn2 + if (itrac /= -1) then + If (me == 0) Write(itrac,'(a,i4,3(2x,es10.4))') 'gmresr(l): ',& + & itx,rerr,rni,bn2 + endif + endif + + if (rerr < eps ) then + call dtrsm('l','u','n','n',i,1,one,rr,size(rr,1),rs,nl) + if (debug) write(0,*) 'Rebuild x-> RS:',rs(21:nl) + do k=1, i + call psb_axpby(rs(k),v(:,k),one,x,desc_a,info) + end do + exit restart + end if + + end Do inner + if (debug) write(0,*) 'Before DTRSM :',rs(1:nl) + call dtrsm('l','u','n','n',nl,1,one,rr,size(rr,1),rs,nl) + if (debug) write(0,*) 'Rebuild x-> RS:',rs(21:nl) + do k=1, nl + call psb_axpby(rs(k),v(:,k),one,x,desc_a,info) + end do + + End Do restart + + If (Present(err)) err=rerr + If (Present(iter)) iter = itx + If (rerr>eps) Then + Write(0,*) 'gmresr(l) failed to converge to ',eps,& + & ' in ',itx,' iterations ' + End If + + + Deallocate(aux,h,c,s,rs,rr, stat=info) + Call psb_free(v,desc_a,info) + Call psb_free(w,desc_a,info) + ! restore external global coherence behaviour + Call blacs_set(icontxt,16,isvch) + + if (info /= 0) then + info=4011 + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + + +contains + function safe_dn2(a,b) + real(kind(1.d0)), intent(in) :: a, b + real(kind(1.d0)) :: safe_dn2 + real(kind(1.d0)) :: t + + t = max(abs(a),abs(b)) + if (t==0.d0) then + safe_dn2 = 0.d0 + else + safe_dn2 = t * sqrt(abs(a/t)**2 + abs(b/t)**2) + endif + return + end function safe_dn2 + + +End Subroutine psb_dgmresr + + diff --git a/src/modules/Makefile b/src/modules/Makefile new file mode 100644 index 00000000..09b6ff58 --- /dev/null +++ b/src/modules/Makefile @@ -0,0 +1,22 @@ +include ../../Make.inc + +MODULES = psb_realloc_mod.o psb_string_mod.o psb_spmat_type.o \ + psb_desc_type.o \ + psb_blacs_mod.o psb_serial_mod.o psb_tools_mod.o \ + psb_prec_type.o psb_error_mod.o psb_prec_mod.o \ + psb_methd_mod.o psb_const_mod.o \ + psb_comm_mod.o psb_psblas_mod.o psi_mod.o + +OBJS = error.o parts.o + +INCDIRS = ../../lib + +psb_realloc_mod.o : psb_error_mod.o +psb_spmat_type.o : psb_realloc_mod.o psb_const_mod.o + +lib: $(MODULES) $(OBJS) + cp *$(.mod) ./psb_const.fh ../../lib + + +clean: + /bin/rm -f $(MODULES) $(OBJS) *$(.mod) diff --git a/src/modules/TODO b/src/modules/TODO new file mode 100644 index 00000000..80bef6a5 --- /dev/null +++ b/src/modules/TODO @@ -0,0 +1,3 @@ +1- psb_methd_mod: sistemare tutto +2- psb_prec_mod : sistemare tutto +3- psb_prec_type: sistemare tutto diff --git a/src/modules/error.f90 b/src/modules/error.f90 new file mode 100644 index 00000000..5e9eb10d --- /dev/null +++ b/src/modules/error.f90 @@ -0,0 +1,130 @@ +! +! Wrapper subroutines to provide error tools to F77 and C code +! + +subroutine FCpsb_errcomm(icontxt, err) + use psb_error_mod + integer, intent(in) :: icontxt + integer, intent(inout):: err + + call psb_errcomm(icontxt, err) + +end subroutine FCpsb_errcomm + +subroutine FCpsb_errpush(err_c, r_name, i_err) + use psb_error_mod + implicit none + + integer, intent(in) :: err_c + character(len=20), intent(in) :: r_name + integer :: i_err(5) + + call psb_errpush(err_c, r_name, i_err) + +end subroutine FCpsb_errpush + + + +subroutine FCpsb_serror() + use psb_error_mod + implicit none + + call psb_error() + +end subroutine FCpsb_serror + + + + + +subroutine FCpsb_perror(icontxt) + use psb_error_mod + implicit none + + integer, intent(in) :: icontxt + + call psb_error(icontxt) + +end subroutine FCpsb_perror + + + + + +subroutine FCpsb_get_errstatus(s) + use psb_error_mod + implicit none + + integer, intent(out) :: s + + call psb_get_errstatus(s) + +end subroutine FCpsb_get_errstatus + + + + + +subroutine FCpsb_get_errverbosity(v) + use psb_error_mod + implicit none + + integer, intent(out) :: v + + call psb_get_errverbosity(v) + +end subroutine FCpsb_get_errverbosity + + + + +subroutine FCpsb_set_errverbosity(v) + use psb_error_mod + implicit none + + integer, intent(inout) :: v + + call psb_set_errverbosity(v) + +end subroutine FCpsb_set_errverbosity + + + + + +subroutine FCpsb_erractionsave(err_act) + use psb_error_mod + implicit none + + integer, intent(out) :: err_act + + call psb_erractionsave(err_act) + +end subroutine FCpsb_erractionsave + + +subroutine FCpsb_get_erraction(err_act) + use psb_error_mod + implicit none + integer, intent(out) :: err_act + + call psb_get_erraction(err_act) +end subroutine FCpsb_get_erraction + + + +subroutine FCpsb_erractionrestore(err_act) + use psb_error_mod + implicit none + + integer, intent(in) :: err_act + + call psb_erractionrestore(err_act) + +end subroutine FCpsb_erractionrestore + + + + + + diff --git a/src/modules/parts.f90 b/src/modules/parts.f90 new file mode 100644 index 00000000..3ff3037a --- /dev/null +++ b/src/modules/parts.f90 @@ -0,0 +1,8 @@ +module psb_parts_mod + interface + subroutine psb_parts(glob_index,nrow,np,pv,nv) + integer, intent (in) :: glob_index,np,nrow + integer, intent (out) :: nv, pv(*) + end subroutine psb_parts + end interface +end module psb_parts_mod diff --git a/src/modules/psb_blacs_mod.f90 b/src/modules/psb_blacs_mod.f90 new file mode 100644 index 00000000..8c50bb03 --- /dev/null +++ b/src/modules/psb_blacs_mod.f90 @@ -0,0 +1,2735 @@ +module f90blacs + + interface gebs2d + module procedure igebs2ds, igebs2dv, igebs2dm,& + & dgebs2ds, dgebs2dv, dgebs2dm,& + & zgebs2ds, zgebs2dv, zgebs2dm + end interface + + interface gebr2d + module procedure igebr2ds, igebr2dv, igebr2dm,& + & dgebr2ds, dgebr2dv, dgebr2dm,& + & zgebr2ds, zgebr2dv, zgebr2dm + end interface + + + interface gesd2d + module procedure igesd2ds, igesd2dv, igesd2dm,& + & dgesd2ds, dgesd2dv, dgesd2dm,& + & zgesd2ds, zgesd2dv, zgesd2dm + end interface + + interface gerv2d + module procedure igerv2ds, igerv2dv, igerv2dm,& + & dgerv2ds, dgerv2dv, dgerv2dm,& + & zgerv2ds, zgerv2dv, zgerv2dm + end interface + + interface gsum2d + module procedure igsum2ds, igsum2dv, igsum2dm,& + & dgsum2ds, dgsum2dv, dgsum2dm,& + & zgsum2ds, zgsum2dv, zgsum2dm + end interface + + interface gamx2d + module procedure igamx2ds, igamx2dv, igamx2dm,& + & dgamx2ds, dgamx2dv, dgamx2dm,& + & zgamx2ds, zgamx2dv, zgamx2dm + end interface + + + interface gamn2d + module procedure igamn2ds, igamn2dv, igamn2dm,& + & dgamn2ds, dgamn2dv, dgamn2dm,& + & zgamn2ds, zgamn2dv, zgamn2dm + end interface + + +contains + + subroutine igebs2ds(icontxt,scope,dat,top) + integer, intent(in) :: icontxt,dat + character, intent(in) :: scope + character, intent(in), optional :: top + + character :: top_ + + interface + subroutine igebs2d(icontxt,scope,top,m,n,v,ld) + integer, intent(in) :: icontxt,m,n,ld + integer, intent(in) :: v + character, intent(in) :: scope, top + end subroutine igebs2d + end interface + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + + call igebs2d(icontxt,scope,top_,1,1,dat,1) + + end subroutine igebs2ds + + subroutine igebs2dv(icontxt,scope,dat,top) + integer, intent(in) :: icontxt,dat(:) + character, intent(in) :: scope + character, intent(in), optional :: top + + + interface + subroutine igebs2d(icontxt,scope,top,m,n,v,ld) + integer, intent(in) :: icontxt,m,n,ld + integer, intent(in) :: v(*) + character, intent(in) :: scope, top + end subroutine igebs2d + end interface + + character :: top_ + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + + call igebs2d(icontxt,scope,top_,size(dat,1),1,dat,size(dat,1)) + + end subroutine igebs2dv + + subroutine igebs2dm(icontxt,scope,dat,top) + integer, intent(in) :: icontxt,dat(:,:) + character, intent(in) :: scope + character, intent(in), optional :: top + + interface + subroutine igebs2d(icontxt,scope,top,m,n,v,ld) + integer, intent(in) :: icontxt,m,n,ld + integer, intent(in) :: v(ld,*) + character, intent(in) :: scope, top + end subroutine igebs2d + end interface + character :: top_ + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + + call igebs2d(icontxt,scope,top_,size(dat,1),size(dat,2),dat,size(dat,1)) + + end subroutine igebs2dm + + + + subroutine dgebs2ds(icontxt,scope,dat,top) + integer, intent(in) :: icontxt + real(kind(1.d0)), intent(in) :: dat + character, intent(in) :: scope + character, intent(in), optional :: top + + interface + subroutine dgebs2d(icontxt,scope,top,m,n,v,ld) + integer, intent(in) :: icontxt,m,n,ld + real(kind(1.d0)), intent(in) :: v + character, intent(in) :: scope, top + end subroutine dgebs2d + end interface + character :: top_ + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + + call dgebs2d(icontxt,scope,top_,1,1,dat,1) + + end subroutine dgebs2ds + + subroutine dgebs2dv(icontxt,scope,dat,top) + integer, intent(in) :: icontxt + real(kind(1.d0)), intent(in) :: dat(:) + character, intent(in) :: scope + character, intent(in), optional :: top + + interface + subroutine dgebs2d(icontxt,scope,top,m,n,v,ld) + integer, intent(in) :: icontxt,m,n,ld + real(kind(1.d0)), intent(in) :: v(*) + character, intent(in) :: scope, top + end subroutine dgebs2d + end interface + + character :: top_ + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + + call dgebs2d(icontxt,scope,top_,size(dat),1,dat,size(dat)) + + end subroutine dgebs2dv + + subroutine dgebs2dm(icontxt,scope,dat,top) + integer, intent(in) :: icontxt + real(kind(1.d0)), intent(in) :: dat(:,:) + + character, intent(in) :: scope + character, intent(in), optional :: top + + interface + subroutine dgebs2d(icontxt,scope,top,m,n,v,ld) + integer, intent(in) :: icontxt,m,n,ld + real(kind(1.d0)), intent(in) :: v(ld,*) + character, intent(in) :: scope, top + end subroutine dgebs2d + end interface + + character :: top_ + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + + call dgebs2d(icontxt,scope,top_,size(dat,1),size(dat,2),dat,size(dat,1)) + + end subroutine dgebs2dm + + + + subroutine zgebs2ds(icontxt,scope,dat,top) + integer, intent(in) :: icontxt + complex(kind(1.d0)), intent(in) :: dat + character, intent(in) :: scope + character, intent(in), optional :: top + + interface + subroutine zgebs2d(icontxt,scope,top,m,n,v,ld) + integer, intent(in) :: icontxt,m,n,ld + complex(kind(1.d0)), intent(in) :: v + character, intent(in) :: scope, top + end subroutine zgebs2d + end interface + character :: top_ + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + + call zgebs2d(icontxt,scope,top_,1,1,dat,1) + + end subroutine zgebs2ds + + subroutine zgebs2dv(icontxt,scope,dat,top) + integer, intent(in) :: icontxt + complex(kind(1.d0)), intent(in) :: dat(:) + character, intent(in) :: scope + character, intent(in), optional :: top + + interface + subroutine zgebs2d(icontxt,scope,top,m,n,v,ld) + integer, intent(in) :: icontxt,m,n,ld + complex(kind(1.d0)), intent(in) :: v(*) + character, intent(in) :: scope, top + end subroutine zgebs2d + end interface + + character :: top_ + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + + call zgebs2d(icontxt,scope,top_,size(dat),1,dat,size(dat)) + + end subroutine zgebs2dv + + subroutine zgebs2dm(icontxt,scope,dat,top) + integer, intent(in) :: icontxt + complex(kind(1.d0)), intent(in) :: dat(:,:) + + character, intent(in) :: scope + character, intent(in), optional :: top + + interface + subroutine zgebs2d(icontxt,scope,top,m,n,v,ld) + integer, intent(in) :: icontxt,m,n,ld + complex(kind(1.d0)), intent(in) :: v(ld,*) + character, intent(in) :: scope, top + end subroutine zgebs2d + end interface + + character :: top_ + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + + call zgebs2d(icontxt,scope,top_,size(dat,1),size(dat,2),dat,size(dat,1)) + + end subroutine zgebs2dm + + + + + + subroutine dgebr2ds(icontxt,scope,dat,top,rrt,crt) + integer, intent(in) :: icontxt + real(kind(1.d0)), intent(inout) :: dat + character, intent(in) :: scope + character, intent(in), optional :: top + integer, intent(in), optional :: rrt,crt + + interface + subroutine dgebr2d(icontxt,scope,top,m,n,v,ld,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld + real(kind(1.d0)), intent(inout) :: v + character, intent(in) :: scope, top + integer, intent(in) :: rrt,crt + end subroutine dgebr2d + end interface + character :: top_ + integer :: rrt_, crt_ + integer :: nrows,ncols,myrow,mycol + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = 0 + case('C','c') + rrt_ = 0 + crt_ = mycol + case('A','a') + rrt_ = 0 + crt_ = 0 + case default + rrt_ = 0 + crt_ = 0 + end select + + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + call dgebr2d(icontxt,scope,top_,1,1,dat,1,rrt_,crt_) + + end subroutine dgebr2ds + + subroutine dgebr2dv(icontxt,scope,dat,top,rrt,crt) + integer, intent(in) :: icontxt + real(kind(1.d0)), intent(inout) :: dat(:) + character, intent(in) :: scope + character, intent(in), optional :: top + integer, intent(in), optional :: rrt,crt + + interface + subroutine dgebr2d(icontxt,scope,top,m,n,v,ld,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld + real(kind(1.d0)), intent(inout) :: v(*) + character, intent(in) :: scope, top + integer, intent(in) :: rrt,crt + end subroutine dgebr2d + end interface + + character :: top_ + integer :: nrows,ncols,myrow,mycol + integer :: rrt_, crt_ + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = 0 + case('C','c') + rrt_ = 0 + crt_ = mycol + case('A','a') + rrt_ = 0 + crt_ = 0 + case default + rrt_ = 0 + crt_ = 0 + end select + + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + call dgebr2d(icontxt,scope,top_,size(dat),1,dat,size(dat),rrt_,crt_) + + end subroutine dgebr2dv + + subroutine dgebr2dm(icontxt,scope,dat,top,rrt,crt) + integer, intent(in) :: icontxt + real(kind(1.d0)), intent(inout) :: dat(:,:) + + character, intent(in) :: scope + character, intent(in), optional :: top + integer, intent(in), optional :: rrt,crt + + interface + subroutine dgebr2d(icontxt,scope,top,m,n,v,ld,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld + real(kind(1.d0)), intent(inout) :: v(ld,*) + character, intent(in) :: scope, top + integer, intent(in) :: rrt,crt + end subroutine dgebr2d + end interface + + character :: top_ + integer :: nrows,ncols,myrow,mycol + integer :: rrt_, crt_ + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = 0 + case('C','c') + rrt_ = 0 + crt_ = mycol + case('A','a') + rrt_ = 0 + crt_ = 0 + case default + rrt_ = 0 + crt_ = 0 + end select + + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + call dgebr2d(icontxt,scope,top_,size(dat,1),size(dat,2),dat,size(dat,1),rrt_,crt_) + + end subroutine dgebr2dm + + + + + subroutine zgebr2ds(icontxt,scope,dat,top,rrt,crt) + integer, intent(in) :: icontxt + complex(kind(1.d0)), intent(inout) :: dat + character, intent(in) :: scope + character, intent(in), optional :: top + integer, intent(in), optional :: rrt,crt + + interface + subroutine zgebr2d(icontxt,scope,top,m,n,v,ld,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld + complex(kind(1.d0)), intent(inout) :: v + character, intent(in) :: scope, top + integer, intent(in) :: rrt,crt + end subroutine zgebr2d + end interface + character :: top_ + integer :: rrt_, crt_ + integer :: nrows,ncols,myrow,mycol + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = 0 + case('C','c') + rrt_ = 0 + crt_ = mycol + case('A','a') + rrt_ = 0 + crt_ = 0 + case default + rrt_ = 0 + crt_ = 0 + end select + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + call zgebr2d(icontxt,scope,top_,1,1,dat,1,rrt_,crt_) + + end subroutine zgebr2ds + + subroutine zgebr2dv(icontxt,scope,dat,top,rrt,crt) + integer, intent(in) :: icontxt + complex(kind(1.d0)), intent(inout) :: dat(:) + character, intent(in) :: scope + character, intent(in), optional :: top + integer, intent(in), optional :: rrt,crt + + interface + subroutine zgebr2d(icontxt,scope,top,m,n,v,ld,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld + complex(kind(1.d0)), intent(inout) :: v(*) + character, intent(in) :: scope, top + integer, intent(in) :: rrt,crt + end subroutine zgebr2d + end interface + + character :: top_ + integer :: nrows,ncols,myrow,mycol + integer :: rrt_, crt_ + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = 0 + case('C','c') + rrt_ = 0 + crt_ = mycol + case('A','a') + rrt_ = 0 + crt_ = 0 + case default + rrt_ = 0 + crt_ = 0 + end select + + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + call zgebr2d(icontxt,scope,top_,size(dat),1,dat,size(dat),rrt_,crt_) + + end subroutine zgebr2dv + + subroutine zgebr2dm(icontxt,scope,dat,top,rrt,crt) + integer, intent(in) :: icontxt + complex(kind(1.d0)), intent(inout) :: dat(:,:) + + character, intent(in) :: scope + character, intent(in), optional :: top + integer, intent(in), optional :: rrt,crt + + interface + subroutine zgebr2d(icontxt,scope,top,m,n,v,ld,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld + complex(kind(1.d0)), intent(inout) :: v(ld,*) + character, intent(in) :: scope, top + integer, intent(in) :: rrt,crt + end subroutine zgebr2d + end interface + + character :: top_ + integer :: nrows,ncols,myrow,mycol + integer :: rrt_, crt_ + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = 0 + case('C','c') + rrt_ = 0 + crt_ = mycol + case('A','a') + rrt_ = 0 + crt_ = 0 + case default + rrt_ = 0 + crt_ = 0 + end select + + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + call zgebr2d(icontxt,scope,top_,size(dat,1),size(dat,2),dat,size(dat,1),rrt_,crt_) + + end subroutine zgebr2dm + + + + subroutine igebr2ds(icontxt,scope,dat,top,rrt,crt) + integer, intent(in) :: icontxt + integer, intent(inout) :: dat + character, intent(in) :: scope + character, intent(in), optional :: top + integer, intent(in), optional :: rrt,crt + + interface + subroutine igebr2d(icontxt,scope,top,m,n,v,ld,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld + integer, intent(inout) :: v + character, intent(in) :: scope, top + integer, intent(in) :: rrt,crt + end subroutine igebr2d + end interface + character :: top_ + integer :: rrt_, crt_ + integer :: nrows,ncols,myrow,mycol + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = 0 + case('C','c') + rrt_ = 0 + crt_ = mycol + case('A','a') + rrt_ = 0 + crt_ = 0 + case default + rrt_ = 0 + crt_ = 0 + end select + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + call igebr2d(icontxt,scope,top_,1,1,dat,1,rrt_,crt_) + + end subroutine igebr2ds + + subroutine igebr2dv(icontxt,scope,dat,top,rrt,crt) + integer, intent(in) :: icontxt + integer, intent(inout) :: dat(:) + character, intent(in) :: scope + character, intent(in), optional :: top + integer, intent(in), optional :: rrt,crt + + interface + subroutine igebr2d(icontxt,scope,top,m,n,v,ld,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld + integer, intent(inout) :: v(*) + character, intent(in) :: scope, top + integer, intent(in) :: rrt,crt + end subroutine igebr2d + end interface + + character :: top_ + integer :: nrows,ncols,myrow,mycol + integer :: rrt_, crt_ + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = 0 + case('C','c') + rrt_ = 0 + crt_ = mycol + case('A','a') + rrt_ = 0 + crt_ = 0 + case default + rrt_ = 0 + crt_ = 0 + end select + + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + call igebr2d(icontxt,scope,top_,size(dat),1,dat,size(dat),rrt_,crt_) + + end subroutine igebr2dv + + subroutine igebr2dm(icontxt,scope,dat,top,rrt,crt) + integer, intent(in) :: icontxt + integer, intent(inout) :: dat(:,:) + + character, intent(in) :: scope + character, intent(in), optional :: top + integer, intent(in), optional :: rrt,crt + + interface + subroutine igebr2d(icontxt,scope,top,m,n,v,ld,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld + integer, intent(inout) :: v(ld,*) + character, intent(in) :: scope, top + integer, intent(in) :: rrt,crt + end subroutine igebr2d + end interface + + character :: top_ + integer :: nrows,ncols,myrow,mycol + integer :: rrt_, crt_ + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = 0 + case('C','c') + rrt_ = 0 + crt_ = mycol + case('A','a') + rrt_ = 0 + crt_ = 0 + case default + rrt_ = 0 + crt_ = 0 + end select + + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + call igebr2d(icontxt,scope,top_,size(dat,1),size(dat,2),dat,size(dat,1),rrt_,crt_) + + end subroutine igebr2dm + + + + subroutine dgesd2ds(icontxt,dat,rdst,cdst) + integer, intent(in) :: icontxt + real(kind(1.d0)), intent(in) :: dat + integer, intent(in) :: rdst,cdst + + interface + subroutine dgesd2d(icontxt,m,n,v,ld,rd,cd) + integer, intent(in) :: icontxt,m,n,ld + real(kind(1.d0)), intent(in) :: v + integer, intent(in) :: rd,cd + end subroutine dgesd2d + end interface + + call dgesd2d(icontxt,1,1,dat,1,rdst,cdst) + + end subroutine dgesd2ds + + + subroutine dgesd2dv(icontxt,dat,rdst,cdst) + integer, intent(in) :: icontxt + real(kind(1.d0)), intent(in) :: dat(:) + integer, intent(in) :: rdst,cdst + + interface + subroutine dgesd2d(icontxt,m,n,v,ld,rd,cd) + integer, intent(in) :: icontxt,m,n,ld + real(kind(1.d0)), intent(in) :: v(*) + integer, intent(in) :: rd,cd + end subroutine dgesd2d + end interface + + call dgesd2d(icontxt,size(dat),1,dat,size(dat),rdst,cdst) + + end subroutine dgesd2dv + + subroutine dgesd2dm(icontxt,dat,rdst,cdst) + integer, intent(in) :: icontxt + real(kind(1.d0)), intent(in) :: dat(:,:) + integer, intent(in) :: rdst,cdst + + interface + subroutine dgesd2d(icontxt,m,n,v,ld,rd,cd) + integer, intent(in) :: icontxt,m,n,ld + real(kind(1.d0)), intent(in) :: v(ld,*) + integer, intent(in) :: rd,cd + end subroutine dgesd2d + end interface + + call dgesd2d(icontxt,size(dat,1),size(dat,2),dat,size(dat,1),rdst,cdst) + + end subroutine dgesd2dm + + + subroutine igesd2ds(icontxt,dat,rdst,cdst) + integer, intent(in) :: icontxt + integer, intent(in) :: dat + integer, intent(in) :: rdst,cdst + + interface + subroutine igesd2d(icontxt,m,n,v,ld,rd,cd) + integer, intent(in) :: icontxt,m,n,ld + integer, intent(in) :: v + integer, intent(in) :: rd,cd + end subroutine igesd2d + end interface + + call igesd2d(icontxt,1,1,dat,1,rdst,cdst) + + end subroutine igesd2ds + + + subroutine igesd2dv(icontxt,dat,rdst,cdst) + integer, intent(in) :: icontxt + integer, intent(in) :: dat(:) + integer, intent(in) :: rdst,cdst + + interface + subroutine igesd2d(icontxt,m,n,v,ld,rd,cd) + integer, intent(in) :: icontxt,m,n,ld + integer, intent(in) :: v(*) + integer, intent(in) :: rd,cd + end subroutine igesd2d + end interface + + call igesd2d(icontxt,size(dat),1,dat,size(dat),rdst,cdst) + + end subroutine igesd2dv + + subroutine igesd2dm(icontxt,dat,rdst,cdst) + integer, intent(in) :: icontxt + integer, intent(in) :: dat(:,:) + integer, intent(in) :: rdst,cdst + + interface + subroutine igesd2d(icontxt,m,n,v,ld,rd,cd) + integer, intent(in) :: icontxt,m,n,ld + integer, intent(in) :: v(ld,*) + integer, intent(in) :: rd,cd + end subroutine igesd2d + end interface + + call igesd2d(icontxt,size(dat,1),size(dat,2),dat,size(dat,1),rdst,cdst) + + end subroutine igesd2dm + + + + subroutine zgesd2ds(icontxt,dat,rdst,cdst) + integer, intent(in) :: icontxt + complex(kind(1.d0)), intent(in) :: dat + integer, intent(in) :: rdst,cdst + + interface + subroutine zgesd2d(icontxt,m,n,v,ld,rd,cd) + integer, intent(in) :: icontxt,m,n,ld + complex(kind(1.d0)), intent(in) :: v + integer, intent(in) :: rd,cd + end subroutine zgesd2d + end interface + + call zgesd2d(icontxt,1,1,dat,1,rdst,cdst) + + end subroutine zgesd2ds + + + subroutine zgesd2dv(icontxt,dat,rdst,cdst) + integer, intent(in) :: icontxt + complex(kind(1.d0)), intent(in) :: dat(:) + integer, intent(in) :: rdst,cdst + + interface + subroutine zgesd2d(icontxt,m,n,v,ld,rd,cd) + integer, intent(in) :: icontxt,m,n,ld + complex(kind(1.d0)), intent(in) :: v(*) + integer, intent(in) :: rd,cd + end subroutine zgesd2d + end interface + + call zgesd2d(icontxt,size(dat),1,dat,size(dat),rdst,cdst) + + end subroutine zgesd2dv + + subroutine zgesd2dm(icontxt,dat,rdst,cdst) + integer, intent(in) :: icontxt + complex(kind(1.d0)), intent(in) :: dat(:,:) + integer, intent(in) :: rdst,cdst + + interface + subroutine zgesd2d(icontxt,m,n,v,ld,rd,cd) + integer, intent(in) :: icontxt,m,n,ld + complex(kind(1.d0)), intent(in) :: v(ld,*) + integer, intent(in) :: rd,cd + end subroutine zgesd2d + end interface + + call zgesd2d(icontxt,size(dat,1),size(dat,2),dat,size(dat,1),rdst,cdst) + + end subroutine zgesd2dm + + + + subroutine dgerv2ds(icontxt,dat,rdst,cdst) + integer, intent(in) :: icontxt + real(kind(1.d0)), intent(inout) :: dat + integer, intent(in) :: rdst,cdst + + interface + subroutine dgerv2d(icontxt,m,n,v,ld,rd,cd) + integer, intent(in) :: icontxt,m,n,ld + real(kind(1.d0)), intent(inout) :: v + integer, intent(in) :: rd,cd + end subroutine dgerv2d + end interface + + call dgerv2d(icontxt,1,1,dat,1,rdst,cdst) + + end subroutine dgerv2ds + + + subroutine dgerv2dv(icontxt,dat,rdst,cdst) + integer, intent(in) :: icontxt + real(kind(1.d0)), intent(inout) :: dat(:) + integer, intent(in) :: rdst,cdst + + interface + subroutine dgerv2d(icontxt,m,n,v,ld,rd,cd) + integer, intent(in) :: icontxt,m,n,ld + real(kind(1.d0)), intent(inout) :: v(*) + integer, intent(in) :: rd,cd + end subroutine dgerv2d + end interface + + call dgerv2d(icontxt,size(dat),1,dat,size(dat),rdst,cdst) + + end subroutine dgerv2dv + + subroutine dgerv2dm(icontxt,dat,rdst,cdst) + integer, intent(in) :: icontxt + real(kind(1.d0)), intent(inout) :: dat(:,:) + integer, intent(in) :: rdst,cdst + + interface + subroutine dgerv2d(icontxt,m,n,v,ld,rd,cd) + integer, intent(in) :: icontxt,m,n,ld + real(kind(1.d0)), intent(inout) :: v(ld,*) + integer, intent(in) :: rd,cd + end subroutine dgerv2d + end interface + + call dgerv2d(icontxt,size(dat,1),size(dat,2),dat,size(dat,1),rdst,cdst) + + end subroutine dgerv2dm + + + subroutine igerv2ds(icontxt,dat,rdst,cdst) + integer, intent(in) :: icontxt + integer, intent(inout) :: dat + integer, intent(in) :: rdst,cdst + + interface + subroutine igerv2d(icontxt,m,n,v,ld,rd,cd) + integer, intent(in) :: icontxt,m,n,ld + integer, intent(inout) :: v + integer, intent(in) :: rd,cd + end subroutine igerv2d + end interface + + call igerv2d(icontxt,1,1,dat,1,rdst,cdst) + + end subroutine igerv2ds + + + subroutine igerv2dv(icontxt,dat,rdst,cdst) + integer, intent(in) :: icontxt + integer, intent(inout) :: dat(:) + integer, intent(in) :: rdst,cdst + + interface + subroutine igerv2d(icontxt,m,n,v,ld,rd,cd) + integer, intent(in) :: icontxt,m,n,ld + integer, intent(inout) :: v(*) + integer, intent(in) :: rd,cd + end subroutine igerv2d + end interface + + call igerv2d(icontxt,size(dat),1,dat,size(dat),rdst,cdst) + + end subroutine igerv2dv + + subroutine igerv2dm(icontxt,dat,rdst,cdst) + integer, intent(in) :: icontxt + integer, intent(inout) :: dat(:,:) + integer, intent(in) :: rdst,cdst + + interface + subroutine igerv2d(icontxt,m,n,v,ld,rd,cd) + integer, intent(in) :: icontxt,m,n,ld + integer, intent(inout) :: v(ld,*) + integer, intent(in) :: rd,cd + end subroutine igerv2d + end interface + + call igerv2d(icontxt,size(dat,1),size(dat,2),dat,size(dat,1),rdst,cdst) + + end subroutine igerv2dm + + + + subroutine zgerv2ds(icontxt,dat,rdst,cdst) + integer, intent(in) :: icontxt + complex(kind(1.d0)), intent(inout) :: dat + integer, intent(in) :: rdst,cdst + + interface + subroutine zgerv2d(icontxt,m,n,v,ld,rd,cd) + integer, intent(in) :: icontxt,m,n,ld + complex(kind(1.d0)), intent(inout) :: v + integer, intent(in) :: rd,cd + end subroutine zgerv2d + end interface + + call zgerv2d(icontxt,1,1,dat,1,rdst,cdst) + + end subroutine zgerv2ds + + + subroutine zgerv2dv(icontxt,dat,rdst,cdst) + integer, intent(in) :: icontxt + complex(kind(1.d0)), intent(inout) :: dat(:) + integer, intent(in) :: rdst,cdst + + interface + subroutine zgerv2d(icontxt,m,n,v,ld,rd,cd) + integer, intent(in) :: icontxt,m,n,ld + complex(kind(1.d0)), intent(inout) :: v(*) + integer, intent(in) :: rd,cd + end subroutine zgerv2d + end interface + + call zgerv2d(icontxt,size(dat),1,dat,size(dat),rdst,cdst) + + end subroutine zgerv2dv + + subroutine zgerv2dm(icontxt,dat,rdst,cdst) + integer, intent(in) :: icontxt + complex(kind(1.d0)), intent(inout) :: dat(:,:) + integer, intent(in) :: rdst,cdst + + interface + subroutine zgerv2d(icontxt,m,n,v,ld,rd,cd) + integer, intent(in) :: icontxt,m,n,ld + complex(kind(1.d0)), intent(inout) :: v(ld,*) + integer, intent(in) :: rd,cd + end subroutine zgerv2d + end interface + + call zgerv2d(icontxt,size(dat,1),size(dat,2),dat,size(dat,1),rdst,cdst) + + end subroutine zgerv2dm + + + + subroutine dgsum2ds(icontxt,scope,dat,top,rrt,crt) + integer, intent(in) :: icontxt + real(kind(1.d0)), intent(inout) :: dat + character, intent(in) :: scope + character, intent(in), optional :: top + integer, intent(in), optional :: rrt,crt + + interface + subroutine dgsum2d(icontxt,scope,top,m,n,v,ld,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld + real(kind(1.d0)), intent(inout) :: v + character, intent(in) :: scope, top + integer, intent(in) :: rrt,crt + end subroutine dgsum2d + end interface + character :: top_ + integer :: rrt_, crt_ + integer :: nrows,ncols,myrow,mycol + + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = -1 + case('C','c') + rrt_ = -1 + crt_ = mycol + case('A','a') + rrt_ = -1 + crt_ = -1 + case default + rrt_ = -1 + crt_ = -1 + end select + + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + call dgsum2d(icontxt,scope,top_,1,1,dat,1,rrt_,crt_) + + end subroutine dgsum2ds + + subroutine dgsum2dv(icontxt,scope,dat,top,rrt,crt) + integer, intent(in) :: icontxt + real(kind(1.d0)), intent(inout) :: dat(:) + character, intent(in) :: scope + character, intent(in), optional :: top + integer, intent(in), optional :: rrt,crt + + interface + subroutine dgsum2d(icontxt,scope,top,m,n,v,ld,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld + real(kind(1.d0)), intent(inout) :: v(*) + character, intent(in) :: scope, top + integer, intent(in) :: rrt,crt + end subroutine dgsum2d + end interface + + character :: top_ + integer :: rrt_, crt_ + integer :: nrows,ncols,myrow,mycol + + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = -1 + case('C','c') + rrt_ = -1 + crt_ = mycol + case('A','a') + rrt_ = -1 + crt_ = -1 + case default + rrt_ = -1 + crt_ = -1 + end select + + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + call dgsum2d(icontxt,scope,top_,size(dat),1,dat,size(dat),rrt_,crt_) + + end subroutine dgsum2dv + + subroutine dgsum2dm(icontxt,scope,dat,top,rrt,crt) + integer, intent(in) :: icontxt + real(kind(1.d0)), intent(inout) :: dat(:,:) + + character, intent(in) :: scope + character, intent(in), optional :: top + integer, intent(in), optional :: rrt,crt + + interface + subroutine dgsum2d(icontxt,scope,top,m,n,v,ld,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld + real(kind(1.d0)), intent(inout) :: v(ld,*) + character, intent(in) :: scope, top + integer, intent(in) :: rrt,crt + end subroutine dgsum2d + end interface + + character :: top_ + integer :: rrt_, crt_ + integer :: nrows,ncols,myrow,mycol + + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = -1 + case('C','c') + rrt_ = -1 + crt_ = mycol + case('A','a') + rrt_ = -1 + crt_ = -1 + case default + rrt_ = -1 + crt_ = -1 + end select + + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + call dgsum2d(icontxt,scope,top_,size(dat,1),size(dat,2),dat,size(dat,1),rrt_,crt_) + + end subroutine dgsum2dm + + + + subroutine igsum2ds(icontxt,scope,dat,top,rrt,crt) + integer, intent(in) :: icontxt + integer, intent(inout) :: dat + character, intent(in) :: scope + character, intent(in), optional :: top + integer, intent(in), optional :: rrt,crt + + interface + subroutine igsum2d(icontxt,scope,top,m,n,v,ld,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld + integer, intent(inout) :: v + character, intent(in) :: scope, top + integer, intent(in) :: rrt,crt + end subroutine igsum2d + end interface + character :: top_ + integer :: rrt_, crt_ + integer :: nrows,ncols,myrow,mycol + + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = -1 + case('C','c') + rrt_ = -1 + crt_ = mycol + case('A','a') + rrt_ = -1 + crt_ = -1 + case default + rrt_ = -1 + crt_ = -1 + end select + + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + call igsum2d(icontxt,scope,top_,1,1,dat,1,rrt_,crt_) + + end subroutine igsum2ds + + subroutine igsum2dv(icontxt,scope,dat,top,rrt,crt) + integer, intent(in) :: icontxt + integer, intent(inout) :: dat(:) + character, intent(in) :: scope + character, intent(in), optional :: top + integer, intent(in), optional :: rrt,crt + + interface + subroutine igsum2d(icontxt,scope,top,m,n,v,ld,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld + integer, intent(inout) :: v(*) + character, intent(in) :: scope, top + integer, intent(in) :: rrt,crt + end subroutine igsum2d + end interface + + character :: top_ + integer :: rrt_, crt_ + integer :: nrows,ncols,myrow,mycol + + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = -1 + case('C','c') + rrt_ = -1 + crt_ = mycol + case('A','a') + rrt_ = -1 + crt_ = -1 + case default + rrt_ = -1 + crt_ = -1 + end select + + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + call igsum2d(icontxt,scope,top_,size(dat),1,dat,size(dat),rrt_,crt_) + + end subroutine igsum2dv + + subroutine igsum2dm(icontxt,scope,dat,top,rrt,crt) + integer, intent(in) :: icontxt + integer, intent(inout) :: dat(:,:) + + character, intent(in) :: scope + character, intent(in), optional :: top + integer, intent(in), optional :: rrt,crt + + interface + subroutine igsum2d(icontxt,scope,top,m,n,v,ld,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld + integer, intent(inout) :: v(ld,*) + character, intent(in) :: scope, top + integer, intent(in) :: rrt,crt + end subroutine igsum2d + end interface + + character :: top_ + integer :: rrt_, crt_ + integer :: nrows,ncols,myrow,mycol + + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = -1 + case('C','c') + rrt_ = -1 + crt_ = mycol + case('A','a') + rrt_ = -1 + crt_ = -1 + case default + rrt_ = -1 + crt_ = -1 + end select + + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + call igsum2d(icontxt,scope,top_,size(dat,1),size(dat,2),dat,size(dat,1),rrt_,crt_) + + end subroutine igsum2dm + + + + subroutine zgsum2ds(icontxt,scope,dat,top,rrt,crt) + integer, intent(in) :: icontxt + complex(kind(1.d0)), intent(inout) :: dat + character, intent(in) :: scope + character, intent(in), optional :: top + integer, intent(in), optional :: rrt,crt + + interface + subroutine zgsum2d(icontxt,scope,top,m,n,v,ld,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld + complex(kind(1.d0)), intent(inout) :: v + character, intent(in) :: scope, top + integer, intent(in) :: rrt,crt + end subroutine zgsum2d + end interface + character :: top_ + integer :: rrt_, crt_ + integer :: nrows,ncols,myrow,mycol + + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = -1 + case('C','c') + rrt_ = -1 + crt_ = mycol + case('A','a') + rrt_ = -1 + crt_ = -1 + case default + rrt_ = -1 + crt_ = -1 + end select + + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + call zgsum2d(icontxt,scope,top_,1,1,dat,1,rrt_,crt_) + + end subroutine zgsum2ds + + subroutine zgsum2dv(icontxt,scope,dat,top,rrt,crt) + integer, intent(in) :: icontxt + complex(kind(1.d0)), intent(inout) :: dat(:) + character, intent(in) :: scope + character, intent(in), optional :: top + integer, intent(in), optional :: rrt,crt + + interface + subroutine zgsum2d(icontxt,scope,top,m,n,v,ld,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld + complex(kind(1.d0)), intent(inout) :: v(*) + character, intent(in) :: scope, top + integer, intent(in) :: rrt,crt + end subroutine zgsum2d + end interface + + character :: top_ + integer :: rrt_, crt_ + integer :: nrows,ncols,myrow,mycol + + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = -1 + case('C','c') + rrt_ = -1 + crt_ = mycol + case('A','a') + rrt_ = -1 + crt_ = -1 + case default + rrt_ = -1 + crt_ = -1 + end select + + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + call zgsum2d(icontxt,scope,top_,size(dat),1,dat,size(dat),rrt_,crt_) + + end subroutine zgsum2dv + + subroutine zgsum2dm(icontxt,scope,dat,top,rrt,crt) + integer, intent(in) :: icontxt + complex(kind(1.d0)), intent(inout) :: dat(:,:) + + character, intent(in) :: scope + character, intent(in), optional :: top + integer, intent(in), optional :: rrt,crt + + interface + subroutine zgsum2d(icontxt,scope,top,m,n,v,ld,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld + complex(kind(1.d0)), intent(inout) :: v(ld,*) + character, intent(in) :: scope, top + integer, intent(in) :: rrt,crt + end subroutine zgsum2d + end interface + + character :: top_ + integer :: rrt_, crt_ + integer :: nrows,ncols,myrow,mycol + + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = -1 + case('C','c') + rrt_ = -1 + crt_ = mycol + case('A','a') + rrt_ = -1 + crt_ = -1 + case default + rrt_ = -1 + crt_ = -1 + end select + + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + call zgsum2d(icontxt,scope,top_,size(dat,1),size(dat,2),dat,size(dat,1),rrt_,crt_) + + end subroutine zgsum2dm + + + + + subroutine dgamx2ds(icontxt,scope,dat,top,ria,cia,rrt,crt) + integer, intent(in) :: icontxt + real(kind(1.d0)), intent(inout) :: dat + character, intent(in) :: scope + character, intent(in), optional :: top + integer, intent(in), optional :: rrt,crt + integer, intent(inout), optional :: ria,cia + + interface + subroutine dgamx2d(icontxt,scope,top,m,n,v,ld,ria,cia,ldia,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld + real(kind(1.d0)), intent(inout) :: v + character, intent(in) :: scope, top + integer, intent(inout) :: ria(*),cia(*) + integer, intent(in) :: rrt,crt,ldia + end subroutine dgamx2d + end interface + character :: top_ + integer :: rrt_, crt_ + integer :: ldia_,ria_(1),cia_(1) + integer :: nrows,ncols,myrow,mycol + + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = -1 + case('C','c') + rrt_ = -1 + crt_ = mycol + case('A','a') + rrt_ = -1 + crt_ = -1 + case default + rrt_ = -1 + crt_ = -1 + end select + + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + if (present(ria).or.present(cia)) then + call dgamx2d(icontxt,scope,top_,1,1,dat,1,ria_,cia_,1,rrt_,crt_) + if (present(ria)) ria=ria_(1) + if (present(cia)) cia=cia_(1) + else + call dgamx2d(icontxt,scope,top_,1,1,dat,1,ria_,cia_,-1,rrt_,crt_) + endif + end subroutine dgamx2ds + + + subroutine dgamx2dv(icontxt,scope,dat,top,ria,cia,rrt,crt) + integer, intent(in) :: icontxt + real(kind(1.d0)), intent(inout) :: dat(:) + character, intent(in) :: scope + character, intent(in), optional :: top + integer, intent(inout), optional :: ria(:),cia(:) + integer, intent(in), optional :: rrt,crt + + interface + subroutine dgamx2d(icontxt,scope,top,m,n,v,ld,ria,cia,ldia,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld + real(kind(1.d0)), intent(inout) :: v(*) + character, intent(in) :: scope, top + integer, intent(inout) :: ria(*),cia(*) + integer, intent(in) :: rrt,crt,ldia + end subroutine dgamx2d + end interface + + integer :: ldia_,ria_(1),cia_(1) + character :: top_ + integer :: rrt_, crt_ + integer :: nrows,ncols,myrow,mycol + + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = -1 + case('C','c') + rrt_ = -1 + crt_ = mycol + case('A','a') + rrt_ = -1 + crt_ = -1 + case default + rrt_ = -1 + crt_ = -1 + end select + + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + if (present(ria).and.present(cia)) then + call dgamx2d(icontxt,scope,top_,size(dat),1,dat,size(dat),& + & ria,cia,min(size(ria),size(cia)),rrt_,crt_) + else + ldia_ = -1 + call dgamx2d(icontxt,scope,top_,size(dat),1,dat,size(dat),& + & ria_,cia_,ldia_,rrt_,crt_) + end if + + end subroutine dgamx2dv + + subroutine dgamx2dm(icontxt,scope,dat,top,ria,cia,rrt,crt) + integer, intent(in) :: icontxt + real(kind(1.d0)), intent(inout) :: dat(:,:) + character, intent(in) :: scope + integer, intent(inout), optional :: ria(:,:),cia(:,:) + character, intent(in), optional :: top + integer, intent(in), optional :: rrt,crt + + interface + subroutine dgamx2d(icontxt,scope,top,m,n,v,ld,ria,cia,ldia,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld,ldia + real(kind(1.d0)), intent(inout) :: v(ld,*) + integer, intent(inout) :: ria(ldia,*),cia(ldia,*) + character, intent(in) :: scope, top + integer, intent(in) :: rrt,crt + end subroutine dgamx2d + end interface + + character :: top_ + integer :: rrt_, crt_ + integer :: ldia_,ria_(1,1),cia_(1,1) + integer :: nrows,ncols,myrow,mycol + + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = -1 + case('C','c') + rrt_ = -1 + crt_ = mycol + case('A','a') + rrt_ = -1 + crt_ = -1 + case default + rrt_ = -1 + crt_ = -1 + end select + + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + if (present(ria).and.present(cia)) then + call dgamx2d(icontxt,scope,top_,size(dat,1),size(dat,2),dat,size(dat,1),& + & ria,cia,min(size(ria,1),size(cia,1)),rrt_,crt_) + else + ldia_ = -1 + call dgamx2d(icontxt,scope,top_,size(dat,1),size(dat,2),dat,size(dat,1),& + & ria_,cia_,ldia_,rrt_,crt_) + end if + + end subroutine dgamx2dm + + + + subroutine igamx2ds(icontxt,scope,dat,top,ria,cia,rrt,crt) + integer, intent(in) :: icontxt + integer, intent(inout) :: dat + character, intent(in) :: scope + character, intent(in), optional :: top + integer, intent(in), optional :: rrt,crt + integer, intent(inout), optional :: ria,cia + + interface + subroutine igamx2d(icontxt,scope,top,m,n,v,ld,ria,cia,ldia,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld + integer, intent(inout) :: v + character, intent(in) :: scope, top + integer, intent(inout) :: ria(*),cia(*) + integer, intent(in) :: rrt,crt,ldia + end subroutine igamx2d + end interface + character :: top_ + integer :: rrt_, crt_ + integer :: ldia_,ria_(1),cia_(1) + integer :: nrows,ncols,myrow,mycol + + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = -1 + case('C','c') + rrt_ = -1 + crt_ = mycol + case('A','a') + rrt_ = -1 + crt_ = -1 + case default + rrt_ = -1 + crt_ = -1 + end select + + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + if (present(ria).or.present(cia)) then + call igamx2d(icontxt,scope,top_,1,1,dat,1,ria_,cia_,1,rrt_,crt_) + if (present(ria)) ria=ria_(1) + if (present(cia)) cia=cia_(1) + else + call igamx2d(icontxt,scope,top_,1,1,dat,1,ria_,cia_,-1,rrt_,crt_) + endif + end subroutine igamx2ds + + + subroutine igamx2dv(icontxt,scope,dat,top,ria,cia,rrt,crt) + integer, intent(in) :: icontxt + integer, intent(inout) :: dat(:) + character, intent(in) :: scope + character, intent(in), optional :: top + integer, intent(inout), optional :: ria(:),cia(:) + integer, intent(in), optional :: rrt,crt + + interface + subroutine igamx2d(icontxt,scope,top,m,n,v,ld,ria,cia,ldia,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld + integer, intent(inout) :: v(*) + character, intent(in) :: scope, top + integer, intent(inout) :: ria(*),cia(*) + integer, intent(in) :: rrt,crt,ldia + end subroutine igamx2d + end interface + + integer :: ldia_,ria_(1),cia_(1) + character :: top_ + integer :: rrt_, crt_ + integer :: nrows,ncols,myrow,mycol + + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = -1 + case('C','c') + rrt_ = -1 + crt_ = mycol + case('A','a') + rrt_ = -1 + crt_ = -1 + case default + rrt_ = -1 + crt_ = -1 + end select + + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + if (present(ria).and.present(cia)) then + call igamx2d(icontxt,scope,top_,size(dat),1,dat,size(dat),& + & ria,cia,min(size(ria),size(cia)),rrt_,crt_) + else + ldia_ = -1 + call igamx2d(icontxt,scope,top_,size(dat),1,dat,size(dat),& + & ria_,cia_,ldia_,rrt_,crt_) + end if + + end subroutine igamx2dv + + subroutine igamx2dm(icontxt,scope,dat,top,ria,cia,rrt,crt) + integer, intent(in) :: icontxt + integer, intent(inout) :: dat(:,:) + character, intent(in) :: scope + integer, intent(inout), optional :: ria(:,:),cia(:,:) + character, intent(in), optional :: top + integer, intent(in), optional :: rrt,crt + + interface + subroutine igamx2d(icontxt,scope,top,m,n,v,ld,ria,cia,ldia,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld,ldia + integer, intent(inout) :: v(ld,*) + integer, intent(inout) :: ria(ldia,*),cia(ldia,*) + character, intent(in) :: scope, top + integer, intent(in) :: rrt,crt + end subroutine igamx2d + end interface + + character :: top_ + integer :: rrt_, crt_ + integer :: ldia_,ria_(1,1),cia_(1,1) + integer :: nrows,ncols,myrow,mycol + + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = -1 + case('C','c') + rrt_ = -1 + crt_ = mycol + case('A','a') + rrt_ = -1 + crt_ = -1 + case default + rrt_ = -1 + crt_ = -1 + end select + + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + if (present(ria).and.present(cia)) then + call igamx2d(icontxt,scope,top_,size(dat,1),size(dat,2),dat,size(dat,1),& + & ria,cia,min(size(ria,1),size(cia,1)),rrt_,crt_) + else + ldia_ = -1 + call igamx2d(icontxt,scope,top_,size(dat,1),size(dat,2),dat,size(dat,1),& + & ria_,cia_,ldia_,rrt_,crt_) + end if + + end subroutine igamx2dm + + + + subroutine zgamx2ds(icontxt,scope,dat,top,ria,cia,rrt,crt) + integer, intent(in) :: icontxt + complex(kind(1.d0)), intent(inout) :: dat + character, intent(in) :: scope + character, intent(in), optional :: top + integer, intent(in), optional :: rrt,crt + integer, intent(inout), optional :: ria,cia + + interface + subroutine zgamx2d(icontxt,scope,top,m,n,v,ld,ria,cia,ldia,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld + complex(kind(1.d0)), intent(inout) :: v + character, intent(in) :: scope, top + integer, intent(inout) :: ria(*),cia(*) + integer, intent(in) :: rrt,crt,ldia + end subroutine zgamx2d + end interface + character :: top_ + integer :: rrt_, crt_ + integer :: ldia_,ria_(1),cia_(1) + integer :: nrows,ncols,myrow,mycol + + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = -1 + case('C','c') + rrt_ = -1 + crt_ = mycol + case('A','a') + rrt_ = -1 + crt_ = -1 + case default + rrt_ = -1 + crt_ = -1 + end select + + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + if (present(ria).or.present(cia)) then + call zgamx2d(icontxt,scope,top_,1,1,dat,1,ria_,cia_,1,rrt_,crt_) + if (present(ria)) ria=ria_(1) + if (present(cia)) cia=cia_(1) + else + call zgamx2d(icontxt,scope,top_,1,1,dat,1,ria_,cia_,-1,rrt_,crt_) + endif + end subroutine zgamx2ds + + + subroutine zgamx2dv(icontxt,scope,dat,top,ria,cia,rrt,crt) + integer, intent(in) :: icontxt + complex(kind(1.d0)), intent(inout) :: dat(:) + character, intent(in) :: scope + character, intent(in), optional :: top + integer, intent(inout), optional :: ria(:),cia(:) + integer, intent(in), optional :: rrt,crt + + interface + subroutine zgamx2d(icontxt,scope,top,m,n,v,ld,ria,cia,ldia,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld + complex(kind(1.d0)), intent(inout) :: v(*) + character, intent(in) :: scope, top + integer, intent(inout) :: ria(*),cia(*) + integer, intent(in) :: rrt,crt,ldia + end subroutine zgamx2d + end interface + + integer :: ldia_,ria_(1),cia_(1) + character :: top_ + integer :: rrt_, crt_ + integer :: nrows,ncols,myrow,mycol + + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = -1 + case('C','c') + rrt_ = -1 + crt_ = mycol + case('A','a') + rrt_ = -1 + crt_ = -1 + case default + rrt_ = -1 + crt_ = -1 + end select + + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + if (present(ria).and.present(cia)) then + call zgamx2d(icontxt,scope,top_,size(dat),1,dat,size(dat),& + & ria,cia,min(size(ria),size(cia)),rrt_,crt_) + else + ldia_ = -1 + call zgamx2d(icontxt,scope,top_,size(dat),1,dat,size(dat),& + & ria_,cia_,ldia_,rrt_,crt_) + end if + + end subroutine zgamx2dv + + subroutine zgamx2dm(icontxt,scope,dat,top,ria,cia,rrt,crt) + integer, intent(in) :: icontxt + complex(kind(1.d0)), intent(inout) :: dat(:,:) + character, intent(in) :: scope + integer, intent(inout), optional :: ria(:,:),cia(:,:) + character, intent(in), optional :: top + integer, intent(in), optional :: rrt,crt + + interface + subroutine zgamx2d(icontxt,scope,top,m,n,v,ld,ria,cia,ldia,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld,ldia + complex(kind(1.d0)), intent(inout) :: v(ld,*) + integer, intent(inout) :: ria(ldia,*),cia(ldia,*) + character, intent(in) :: scope, top + integer, intent(in) :: rrt,crt + end subroutine zgamx2d + end interface + + character :: top_ + integer :: rrt_, crt_ + integer :: ldia_,ria_(1,1),cia_(1,1) + integer :: nrows,ncols,myrow,mycol + + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = -1 + case('C','c') + rrt_ = -1 + crt_ = mycol + case('A','a') + rrt_ = -1 + crt_ = -1 + case default + rrt_ = -1 + crt_ = -1 + end select + + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + if (present(ria).and.present(cia)) then + call zgamx2d(icontxt,scope,top_,size(dat,1),size(dat,2),dat,size(dat,1),& + & ria,cia,min(size(ria,1),size(cia,1)),rrt_,crt_) + else + ldia_ = -1 + call zgamx2d(icontxt,scope,top_,size(dat,1),size(dat,2),dat,size(dat,1),& + & ria_,cia_,ldia_,rrt_,crt_) + end if + + end subroutine zgamx2dm + + + subroutine dgamn2ds(icontxt,scope,dat,top,ria,cia,rrt,crt) + integer, intent(in) :: icontxt + real(kind(1.d0)), intent(inout) :: dat + character, intent(in) :: scope + character, intent(in), optional :: top + integer, intent(in), optional :: rrt,crt + integer, intent(inout), optional :: ria,cia + + interface + subroutine dgamn2d(icontxt,scope,top,m,n,v,ld,ria,cia,ldia,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld + real(kind(1.d0)), intent(inout) :: v + character, intent(in) :: scope, top + integer, intent(inout) :: ria(*),cia(*) + integer, intent(in) :: rrt,crt,ldia + end subroutine dgamn2d + end interface + character :: top_ + integer :: rrt_, crt_ + integer :: ldia_,ria_(1),cia_(1) + integer :: nrows,ncols,myrow,mycol + + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = -1 + case('C','c') + rrt_ = -1 + crt_ = mycol + case('A','a') + rrt_ = -1 + crt_ = -1 + case default + rrt_ = -1 + crt_ = -1 + end select + + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + if (present(ria).or.present(cia)) then + call dgamn2d(icontxt,scope,top_,1,1,dat,1,ria_,cia_,1,rrt_,crt_) + if (present(ria)) ria=ria_(1) + if (present(cia)) cia=cia_(1) + else + call dgamn2d(icontxt,scope,top_,1,1,dat,1,ria_,cia_,-1,rrt_,crt_) + endif + end subroutine dgamn2ds + + + subroutine dgamn2dv(icontxt,scope,dat,top,ria,cia,rrt,crt) + integer, intent(in) :: icontxt + real(kind(1.d0)), intent(inout) :: dat(:) + character, intent(in) :: scope + character, intent(in), optional :: top + integer, intent(inout), optional :: ria(:),cia(:) + integer, intent(in), optional :: rrt,crt + + interface + subroutine dgamn2d(icontxt,scope,top,m,n,v,ld,ria,cia,ldia,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld + real(kind(1.d0)), intent(inout) :: v(*) + character, intent(in) :: scope, top + integer, intent(inout) :: ria(*),cia(*) + integer, intent(in) :: rrt,crt,ldia + end subroutine dgamn2d + end interface + + integer :: ldia_,ria_(1),cia_(1) + character :: top_ + integer :: rrt_, crt_ + integer :: nrows,ncols,myrow,mycol + + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = -1 + case('C','c') + rrt_ = -1 + crt_ = mycol + case('A','a') + rrt_ = -1 + crt_ = -1 + case default + rrt_ = -1 + crt_ = -1 + end select + + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + if (present(ria).and.present(cia)) then + call dgamn2d(icontxt,scope,top_,size(dat),1,dat,size(dat),& + & ria,cia,min(size(ria),size(cia)),rrt_,crt_) + else + ldia_ = -1 + call dgamn2d(icontxt,scope,top_,size(dat),1,dat,size(dat),& + & ria_,cia_,ldia_,rrt_,crt_) + end if + + end subroutine dgamn2dv + + subroutine dgamn2dm(icontxt,scope,dat,top,ria,cia,rrt,crt) + integer, intent(in) :: icontxt + real(kind(1.d0)), intent(inout) :: dat(:,:) + character, intent(in) :: scope + integer, intent(inout), optional :: ria(:,:),cia(:,:) + character, intent(in), optional :: top + integer, intent(in), optional :: rrt,crt + + interface + subroutine dgamn2d(icontxt,scope,top,m,n,v,ld,ria,cia,ldia,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld,ldia + real(kind(1.d0)), intent(inout) :: v(ld,*) + integer, intent(inout) :: ria(ldia,*),cia(ldia,*) + character, intent(in) :: scope, top + integer, intent(in) :: rrt,crt + end subroutine dgamn2d + end interface + + character :: top_ + integer :: rrt_, crt_ + integer :: ldia_,ria_(1,1),cia_(1,1) + integer :: nrows,ncols,myrow,mycol + + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = -1 + case('C','c') + rrt_ = -1 + crt_ = mycol + case('A','a') + rrt_ = -1 + crt_ = -1 + case default + rrt_ = -1 + crt_ = -1 + end select + + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + if (present(ria).and.present(cia)) then + call dgamn2d(icontxt,scope,top_,size(dat,1),size(dat,2),dat,size(dat,1),& + & ria,cia,min(size(ria,1),size(cia,1)),rrt_,crt_) + else + ldia_ = -1 + call dgamn2d(icontxt,scope,top_,size(dat,1),size(dat,2),dat,size(dat,1),& + & ria_,cia_,ldia_,rrt_,crt_) + end if + + end subroutine dgamn2dm + + + + subroutine igamn2ds(icontxt,scope,dat,top,ria,cia,rrt,crt) + integer, intent(in) :: icontxt + integer, intent(inout) :: dat + character, intent(in) :: scope + character, intent(in), optional :: top + integer, intent(in), optional :: rrt,crt + integer, intent(inout), optional :: ria,cia + + interface + subroutine igamn2d(icontxt,scope,top,m,n,v,ld,ria,cia,ldia,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld + integer, intent(inout) :: v + character, intent(in) :: scope, top + integer, intent(inout) :: ria(*),cia(*) + integer, intent(in) :: rrt,crt,ldia + end subroutine igamn2d + end interface + character :: top_ + integer :: rrt_, crt_ + integer :: ldia_,ria_(1),cia_(1) + integer :: nrows,ncols,myrow,mycol + + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = -1 + case('C','c') + rrt_ = -1 + crt_ = mycol + case('A','a') + rrt_ = -1 + crt_ = -1 + case default + rrt_ = -1 + crt_ = -1 + end select + + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + if (present(ria).or.present(cia)) then + call igamn2d(icontxt,scope,top_,1,1,dat,1,ria_,cia_,1,rrt_,crt_) + if (present(ria)) ria=ria_(1) + if (present(cia)) cia=cia_(1) + else + call igamn2d(icontxt,scope,top_,1,1,dat,1,ria_,cia_,-1,rrt_,crt_) + endif + end subroutine igamn2ds + + + subroutine igamn2dv(icontxt,scope,dat,top,ria,cia,rrt,crt) + integer, intent(in) :: icontxt + integer, intent(inout) :: dat(:) + character, intent(in) :: scope + character, intent(in), optional :: top + integer, intent(inout), optional :: ria(:),cia(:) + integer, intent(in), optional :: rrt,crt + + interface + subroutine igamn2d(icontxt,scope,top,m,n,v,ld,ria,cia,ldia,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld + integer, intent(inout) :: v(*) + character, intent(in) :: scope, top + integer, intent(inout) :: ria(*),cia(*) + integer, intent(in) :: rrt,crt,ldia + end subroutine igamn2d + end interface + + integer :: ldia_,ria_(1),cia_(1) + character :: top_ + integer :: rrt_, crt_ + integer :: nrows,ncols,myrow,mycol + + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = -1 + case('C','c') + rrt_ = -1 + crt_ = mycol + case('A','a') + rrt_ = -1 + crt_ = -1 + case default + rrt_ = -1 + crt_ = -1 + end select + + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + if (present(ria).and.present(cia)) then + call igamn2d(icontxt,scope,top_,size(dat),1,dat,size(dat),& + & ria,cia,min(size(ria),size(cia)),rrt_,crt_) + else + ldia_ = -1 + call igamn2d(icontxt,scope,top_,size(dat),1,dat,size(dat),& + & ria_,cia_,ldia_,rrt_,crt_) + end if + + end subroutine igamn2dv + + subroutine igamn2dm(icontxt,scope,dat,top,ria,cia,rrt,crt) + integer, intent(in) :: icontxt + integer, intent(inout) :: dat(:,:) + character, intent(in) :: scope + integer, intent(inout), optional :: ria(:,:),cia(:,:) + character, intent(in), optional :: top + integer, intent(in), optional :: rrt,crt + + interface + subroutine igamn2d(icontxt,scope,top,m,n,v,ld,ria,cia,ldia,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld,ldia + integer, intent(inout) :: v(ld,*) + integer, intent(inout) :: ria(ldia,*),cia(ldia,*) + character, intent(in) :: scope, top + integer, intent(in) :: rrt,crt + end subroutine igamn2d + end interface + + character :: top_ + integer :: rrt_, crt_ + integer :: ldia_,ria_(1,1),cia_(1,1) + integer :: nrows,ncols,myrow,mycol + + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = -1 + case('C','c') + rrt_ = -1 + crt_ = mycol + case('A','a') + rrt_ = -1 + crt_ = -1 + case default + rrt_ = -1 + crt_ = -1 + end select + + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + if (present(ria).and.present(cia)) then + call igamn2d(icontxt,scope,top_,size(dat,1),size(dat,2),dat,size(dat,1),& + & ria,cia,min(size(ria,1),size(cia,1)),rrt_,crt_) + else + ldia_ = -1 + call igamn2d(icontxt,scope,top_,size(dat,1),size(dat,2),dat,size(dat,1),& + & ria_,cia_,ldia_,rrt_,crt_) + end if + + end subroutine igamn2dm + + + + subroutine zgamn2ds(icontxt,scope,dat,top,ria,cia,rrt,crt) + integer, intent(in) :: icontxt + complex(kind(1.d0)), intent(inout) :: dat + character, intent(in) :: scope + character, intent(in), optional :: top + integer, intent(in), optional :: rrt,crt + integer, intent(inout), optional :: ria,cia + + interface + subroutine zgamn2d(icontxt,scope,top,m,n,v,ld,ria,cia,ldia,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld + complex(kind(1.d0)), intent(inout) :: v + character, intent(in) :: scope, top + integer, intent(inout) :: ria(*),cia(*) + integer, intent(in) :: rrt,crt,ldia + end subroutine zgamn2d + end interface + character :: top_ + integer :: rrt_, crt_ + integer :: ldia_,ria_(1),cia_(1) + integer :: nrows,ncols,myrow,mycol + + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = -1 + case('C','c') + rrt_ = -1 + crt_ = mycol + case('A','a') + rrt_ = -1 + crt_ = -1 + case default + rrt_ = -1 + crt_ = -1 + end select + + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + if (present(ria).or.present(cia)) then + call zgamn2d(icontxt,scope,top_,1,1,dat,1,ria_,cia_,1,rrt_,crt_) + if (present(ria)) ria=ria_(1) + if (present(cia)) cia=cia_(1) + else + call zgamn2d(icontxt,scope,top_,1,1,dat,1,ria_,cia_,-1,rrt_,crt_) + endif + end subroutine zgamn2ds + + + subroutine zgamn2dv(icontxt,scope,dat,top,ria,cia,rrt,crt) + integer, intent(in) :: icontxt + complex(kind(1.d0)), intent(inout) :: dat(:) + character, intent(in) :: scope + character, intent(in), optional :: top + integer, intent(inout), optional :: ria(:),cia(:) + integer, intent(in), optional :: rrt,crt + + interface + subroutine zgamn2d(icontxt,scope,top,m,n,v,ld,ria,cia,ldia,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld + complex(kind(1.d0)), intent(inout) :: v(*) + character, intent(in) :: scope, top + integer, intent(inout) :: ria(*),cia(*) + integer, intent(in) :: rrt,crt,ldia + end subroutine zgamn2d + end interface + + integer :: ldia_,ria_(1),cia_(1) + character :: top_ + integer :: rrt_, crt_ + integer :: nrows,ncols,myrow,mycol + + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = -1 + case('C','c') + rrt_ = -1 + crt_ = mycol + case('A','a') + rrt_ = -1 + crt_ = -1 + case default + rrt_ = -1 + crt_ = -1 + end select + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + if (present(ria).and.present(cia)) then + call zgamn2d(icontxt,scope,top_,size(dat),1,dat,size(dat),& + & ria,cia,min(size(ria),size(cia)),rrt_,crt_) + else + ldia_ = -1 + call zgamn2d(icontxt,scope,top_,size(dat),1,dat,size(dat),& + & ria_,cia_,ldia_,rrt_,crt_) + end if + + end subroutine zgamn2dv + + subroutine zgamn2dm(icontxt,scope,dat,top,ria,cia,rrt,crt) + integer, intent(in) :: icontxt + complex(kind(1.d0)), intent(inout) :: dat(:,:) + character, intent(in) :: scope + integer, intent(inout), optional :: ria(:,:),cia(:,:) + character, intent(in), optional :: top + integer, intent(in), optional :: rrt,crt + + interface + subroutine zgamn2d(icontxt,scope,top,m,n,v,ld,ria,cia,ldia,rrt,crt) + integer, intent(in) :: icontxt,m,n,ld,ldia + complex(kind(1.d0)), intent(inout) :: v(ld,*) + integer, intent(inout) :: ria(ldia,*),cia(ldia,*) + character, intent(in) :: scope, top + integer, intent(in) :: rrt,crt + end subroutine zgamn2d + end interface + + character :: top_ + integer :: rrt_, crt_ + integer :: ldia_,ria_(1,1),cia_(1,1) + integer :: nrows,ncols,myrow,mycol + + + call blacs_gridinfo(icontxt,nrows,ncols,myrow,mycol) + select case(scope) + case('R','r') + rrt_ = myrow + crt_ = -1 + case('C','c') + rrt_ = -1 + crt_ = mycol + case('A','a') + rrt_ = -1 + crt_ = -1 + case default + rrt_ = -1 + crt_ = -1 + end select + + + if (present(top)) then + top_ = top + else + top_ = ' ' + endif + if (present(rrt)) then + rrt_ = rrt + endif + if (present(crt)) then + crt_ = crt + endif + + if (present(ria).and.present(cia)) then + call zgamn2d(icontxt,scope,top_,size(dat,1),size(dat,2),dat,size(dat,1),& + & ria,cia,min(size(ria,1),size(cia,1)),rrt_,crt_) + else + ldia_ = -1 + call zgamn2d(icontxt,scope,top_,size(dat,1),size(dat,2),dat,size(dat,1),& + & ria_,cia_,ldia_,rrt_,crt_) + end if + + end subroutine zgamn2dm + + +end module f90blacs diff --git a/src/modules/psb_comm_mod.f90 b/src/modules/psb_comm_mod.f90 new file mode 100644 index 00000000..d2742f61 --- /dev/null +++ b/src/modules/psb_comm_mod.f90 @@ -0,0 +1,110 @@ +module psb_comm_mod + + interface psb_ovrl + subroutine psb_dovrlm(x,desc_a,info,jx,ik,work,choice,update_type) + use psb_descriptor_type + real(kind(1.d0)), intent(inout) :: x(:,:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + real(kind(1.d0)), intent(inout), optional :: work(:) + logical, intent(in), optional :: choice + integer, intent(in), optional :: update_type,jx,ik + end subroutine psb_dovrlm + subroutine psb_dovrlv(x,desc_a,info,work,choice,update_type) + use psb_descriptor_type + real(kind(1.d0)), intent(inout) :: x(:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + real(kind(1.d0)), intent(inout), optional :: work(:) + logical, intent(in), optional :: choice + integer, intent(in), optional :: update_type + end subroutine psb_dovrlv + end interface + + interface psb_halo + subroutine psb_dhalom(x,desc_a,info,alpha,jx,ik,work,tran,mode) + use psb_descriptor_type + real(kind(1.d0)), intent(inout) :: x(:,:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + real(kind(1.d0)), intent(in), optional :: alpha + real(kind(1.d0)), intent(inout), optional :: work(:) + integer, intent(in), optional :: mode,jx,ik + character, intent(in), optional :: tran + end subroutine psb_dhalom + subroutine psb_dhalov(x,desc_a,info,alpha,work,tran,mode) + use psb_descriptor_type + real(kind(1.d0)), intent(inout) :: x(:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + real(kind(1.d0)), intent(in), optional :: alpha + real(kind(1.d0)), intent(inout), optional :: work(:) + integer, intent(in), optional :: mode + character, intent(in), optional :: tran + end subroutine psb_dhalov + subroutine psb_ihalom(x,desc_a,info,alpha,jx,ik,work,tran,mode) + use psb_descriptor_type + integer, intent(inout) :: x(:,:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + real(kind(1.d0)), intent(in), optional :: alpha + integer, intent(inout), optional :: work(:) + integer, intent(in), optional :: mode,jx,ik + character, intent(in), optional :: tran + end subroutine psb_ihalom + subroutine psb_ihalov(x,desc_a,info,alpha,work,tran,mode) + use psb_descriptor_type + integer, intent(inout) :: x(:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + real(kind(1.d0)), intent(in), optional :: alpha + integer, intent(inout), optional :: work(:) + integer, intent(in), optional :: mode + character, intent(in), optional :: tran + end subroutine psb_ihalov + end interface + + + interface psb_dscatter + subroutine psb_dscatterm(globx, locx, desc_a, info, iroot,& + & iiglobx, ijglobx, iilocx,ijlocx,ik) + use psb_descriptor_type + real(kind(1.d0)), intent(out) :: locx(:,:) + real(kind(1.d0)), intent(in) :: globx(:,:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + integer, intent(in), optional :: iroot,iiglobx,& + & ijglobx,iilocx,ijlocx,ik + end subroutine psb_dscatterm + subroutine psb_dscatterv(globx, locx, desc_a, info, iroot) + use psb_descriptor_type + real(kind(1.d0)), intent(out) :: locx(:) + real(kind(1.d0)), intent(in) :: globx(:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + integer, intent(in), optional :: iroot + end subroutine psb_dscatterv + end interface + + interface psb_dgather + subroutine psb_dgatherm(globx, locx, desc_a, info, iroot,& + & iiglobx, ijglobx, iilocx,ijlocx,ik) + use psb_descriptor_type + real(kind(1.d0)), intent(in) :: locx(:,:) + real(kind(1.d0)), intent(out) :: globx(:,:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + integer, intent(in), optional :: iroot, iiglobx, ijglobx, iilocx, ijlocx, ik + end subroutine psb_dgatherm + subroutine psb_dgatherv(globx, locx, desc_a, info, iroot,& + & iiglobx, iilocx) + use psb_descriptor_type + real(kind(1.d0)), intent(in) :: locx(:,:) + real(kind(1.d0)), intent(out) :: globx(:,:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + integer, intent(in), optional :: iroot, iiglobx, iilocx + end subroutine psb_dgatherv + end interface + +end module psb_comm_mod diff --git a/src/modules/psb_const.fh b/src/modules/psb_const.fh new file mode 100644 index 00000000..71015db6 --- /dev/null +++ b/src/modules/psb_const.fh @@ -0,0 +1,38 @@ + integer, parameter :: psb_nohalo_=0, psb_halo_=4 + integer, parameter :: psb_none_=0,psb_sum_=1 + integer, parameter :: psb_avg_=2,psb_square_root_=3 + integer, parameter :: psb_swap_send_=1,psb_swap_recv_=2 + integer, parameter :: psb_swap_sync_=4,psb_swap_mpi_=8 + integer, parameter :: psb_deadlock_check_=0 + integer, parameter :: psb_local_mtrx_check_=1 + integer, parameter :: psb_local_comm_check_=2 + integer, parameter :: psb_consistency_check_=3 + integer, parameter :: psb_global_check_=4 + integer, parameter :: psb_order_communication_=5 + integer, parameter :: psb_change_represent_=6 + integer, parameter :: psb_loc_to_glob_check_=7 + integer, parameter :: psb_convert_halo_=1 + integer, parameter :: psb_convert_ovrlap_=2 + integer, parameter :: psb_act_ret_=0 + integer, parameter :: psb_act_abort_=1, no_err_=0 + integer, parameter :: psb_dec_type_=1,psb_m_=2,psb_n_=3 + integer, parameter :: psb_n_row_=4,psb_n_col_=5,psb_ctxt_=6 + integer, parameter :: psb_loc_to_glob_=7 + integer, parameter :: psb_mpi_c_=9,psb_mdata_size_=10 + integer, parameter :: psb_desc_asb_=3099 + integer, parameter :: psb_desc_bld_=psb_desc_asb_+1 + integer, parameter :: psb_desc_upd_=psb_desc_bld_+1 + integer, parameter :: psb_desc_upd_asb_=psb_desc_upd_+1 + integer, parameter :: psb_upd_glb_=998,psb_upd_loc_=997 + integer, parameter :: psb_proc_id_=0,psb_n_elem_recv_=1 + integer, parameter :: psb_elem_recv_=2,psb_n_elem_send_=2 + integer, parameter :: psb_elem_send_=3,psb_n_ovrlp_elem_=1 + integer, parameter :: psb_ovrlp_elem_to_=2,psb_ovrlp_elem_=0 + integer, parameter :: psb_nnz_=1, psb_n_dom_ovr_=1 + integer, parameter :: psb_no_comm_=-1, psb_nzsizereq_=3 + integer, parameter :: ione=1, done=1.d0,izero=0, dzero=0.d0 + integer, parameter :: itwo=2, ithree=3,root=0, act_abort=1 + integer, parameter :: psb_nztotreq_=1,psb_nzrowreq_=2 + character, parameter :: psb_all_='A',psb_topdef_=' ' + + diff --git a/src/modules/psb_const_mod.f90 b/src/modules/psb_const_mod.f90 new file mode 100644 index 00000000..3b14144d --- /dev/null +++ b/src/modules/psb_const_mod.f90 @@ -0,0 +1,36 @@ +module psb_const_mod + + integer, parameter :: psb_nohalo_=0, psb_halo_=4 + integer, parameter :: psb_none_=0, psb_sum_=1 + integer, parameter :: psb_avg_=2, psb_square_root_=3 + integer, parameter :: psb_swap_send_=1, psb_swap_recv_=2 + integer, parameter :: psb_swap_sync_=4, psb_swap_mpi_=8 + integer, parameter :: psb_deadlock_check_=0, psb_local_mtrx_check_=1 + integer, parameter :: psb_local_comm_check_=2, psb_consistency_check_=3 + integer, parameter :: psb_global_check_=4, psb_order_communication_=5 + integer, parameter :: psb_change_represent_=6, psb_loc_to_glob_check_=7 + integer, parameter :: psb_convert_halo_=1, psb_convert_ovrlap_=2 + integer, parameter :: psb_act_ret_=0, psb_act_abort_=1, no_err_=0 + integer, parameter :: psb_dec_type_=1, psb_m_=2,psb_n_=3 + integer, parameter :: psb_n_row_=4, psb_n_col_=5,psb_ctxt_=6 + integer, parameter :: psb_loc_to_glob_=7, psb_mpi_c_=9,psb_mdata_size_=10 + integer, parameter :: psb_desc_asb_=3099, psb_desc_bld_=psb_desc_asb_+1 + integer, parameter :: psb_desc_upd_=psb_desc_bld_+1, psb_desc_upd_asb_=psb_desc_upd_+1 + integer, parameter :: psb_upd_glb_=998, psb_upd_loc_=997 + integer, parameter :: psb_proc_id_=0, psb_n_elem_recv_=1 + integer, parameter :: psb_elem_recv_=2, psb_n_elem_send_=2 + integer, parameter :: psb_elem_send_=3, psb_n_ovrlp_elem_=1 + integer, parameter :: psb_ovrlp_elem_to_=2, psb_ovrlp_elem_=0, psb_n_dom_ovr_=1 + integer, parameter :: psb_nnz_=1 + integer, parameter :: psb_no_comm_=-1 + integer, parameter :: ione=1, done=1.d0, izero=0, dzero=0.d0 + integer, parameter :: itwo=2, ithree=3, root=0 + integer, parameter :: psb_nztotreq_=1, psb_nzrowreq_=2, psb_nzsizereq_=3 + + real(kind(1.d0)), parameter :: psb_colrow_=0.33 + + character, parameter :: psb_all_='A', psb_topdef_=' ' + + + +end module psb_const_mod diff --git a/src/modules/psb_desc_type.f90 b/src/modules/psb_desc_type.f90 new file mode 100644 index 00000000..1000c652 --- /dev/null +++ b/src/modules/psb_desc_type.f90 @@ -0,0 +1,82 @@ +! +! Module to define desc_a, +! structure for coomunications. +! +! Typedef: psb_desc_type +! Defines a communication descriptor + + +module psb_descriptor_type + use psb_const_mod + + ! desc_type contains data for communications. + type psb_desc_type + ! contain decomposition informations + integer, pointer :: matrix_data(:)=>null() + ! contain index of halo elements to send/receive + integer, pointer :: halo_index(:)=>null() + ! contain indices of boundary elements + integer, pointer :: bnd_elem(:)=>null() + ! contain index of overlap elements to send/receive + integer, pointer :: ovrlap_elem(:)=>null() + ! contain for each local overlap element, the number of times + ! that is duplicated + integer, pointer :: ovrlap_index(:)=>null() + ! contain for each local element the corresponding global index + integer, pointer :: loc_to_glob(:)=>null() + ! contain for each global element the corresponding local index, + ! if exist. + integer, pointer :: glob_to_loc (:)=>null() + ! local renumbering induced by sparse matrix storage. + integer, pointer :: lprm(:)=>null() + ! index space in case it is not just the contiguous range 1:n + integer, pointer :: idx_space(:)=>null() + end type psb_desc_type + +contains + + subroutine psb_nullify_desc(desc) + type(psb_desc_type), intent(inout) :: desc + + nullify(desc%matrix_data,desc%loc_to_glob,desc%glob_to_loc,& + &desc%halo_index,desc%bnd_elem,desc%ovrlap_elem,& + &desc%ovrlap_index, desc%lprm, desc%idx_space) + + end subroutine psb_nullify_desc + + logical function psb_is_ok_dec(dectype) + integer :: dectype + + psb_is_ok_dec = ((dectype == desc_asb).or.(dectype == desc_bld).or.& + &(dectype == desc_upd).or.(dectype== desc_upd_asb)) + + end function psb_is_ok_dec + + logical function psb_is_bld_dec(dectype) + integer :: dectype + + psb_is_bld_dec = (dectype == desc_bld) + end function psb_is_bld_dec + + logical function psb_is_upd_dec(dectype) + integer :: dectype + + psb_is_upd_dec = (dectype == desc_upd) + + end function psb_is_upd_dec + + logical function psb_is_asb_upd_dec(dectype) + integer :: dectype + + psb_is_asb_upd_dec = (dectype == desc_upd_asb) + + end function psb_is_asb_upd_dec + + logical function psb_is_asb_dec(dectype) + integer :: dectype + + psb_is_asb_dec = (dectype == desc_asb) + + end function psb_is_asb_dec + +end module psb_descriptor_type diff --git a/src/modules/psb_error_mod.f90 b/src/modules/psb_error_mod.f90 new file mode 100644 index 00000000..ef92322f --- /dev/null +++ b/src/modules/psb_error_mod.f90 @@ -0,0 +1,428 @@ +module psb_error_mod + + public psb_errpush, psb_error, psb_get_errstatus,& + & psb_get_errverbosity, psb_set_errverbosity,psb_errcomm, & + & psb_erractionsave, psb_erractionrestore, act_ret, act_abort, & + & no_err, psb_get_erraction, psb_set_erraction + + interface psb_error + module procedure psb_serror + module procedure psb_perror + end interface + + integer, parameter :: act_ret=0, act_abort=1, no_err=0 + + private + + type psb_errstack_node + + integer :: err_code=0 ! the error code + character(len=20) :: routine='' ! the name of the routine generating the error + integer,dimension(5) :: i_err_data=0 ! array of integer data to complete the error msg + ! real(kind(1.d0))(dim=10) :: r_err_data=0.d0 ! array of real data to complete the error msg + ! complex(dim=10) :: c_err_data=0.c0 ! array of complex data to complete the error msg + character(len=20) :: a_err_data='' ! array of character data to complete the error msg + type(psb_errstack_node), pointer :: next ! pointer to the next element in the stack + + end type psb_errstack_node + + + type psb_errstack + + type(psb_errstack_node), pointer :: top => null() ! pointer to the top element of the stack + integer :: n_elems=0 ! number of entries in the stack + + end type psb_errstack + + + type(psb_errstack) :: error_stack ! the PSBLAS-2.0 error stack + integer :: error_status=0 ! the error status (maybe not here) + integer :: verbosity_level=1 ! the verbosity level (maybe not here) + integer :: err_action=1 + +contains + + + ! saves action to support error traceback + ! also changes error action to "return" + subroutine psb_erractionsave(err_act) + integer, intent(out) :: err_act + err_act=err_action + err_action=act_ret + end subroutine psb_erractionsave + + + ! return the action to take upon error occurrence + subroutine psb_get_erraction(err_act) + integer, intent(out) :: err_act + err_act=err_action + end subroutine psb_get_erraction + + ! sets the action to take upon error occurrence + subroutine psb_set_erraction(err_act) + integer, intent(in) :: err_act + err_action=err_act + end subroutine psb_set_erraction + + + ! restores error action previously saved with psb_erractionsave + subroutine psb_erractionrestore(err_act) + integer, intent(in) :: err_act + err_action=err_act + end subroutine psb_erractionrestore + + + ! checks wether an error has occurred on one of the porecesses in the execution pool + subroutine psb_errcomm(icontxt, err) + integer, intent(in) :: icontxt + integer, intent(inout):: err + integer :: temp(2) + integer, parameter :: ione=1 + + call igamx2d(icontxt, 'A', ' ', ione, ione, err, ione,& + &temp ,temp,-ione ,-ione,-ione) + end subroutine psb_errcomm + + + + ! sets verbosity of the error message + subroutine psb_set_errverbosity(v) + integer, intent(in) :: v + verbosity_level=v + end subroutine psb_set_errverbosity + + + + ! returns verbosity of the error message + subroutine psb_get_errverbosity(v) + integer, intent(out) :: v + v=verbosity_level + end subroutine psb_get_errverbosity + + + + ! checks the status of the error condition + subroutine psb_get_errstatus(s) + integer, intent(out) :: s + s=error_status + end subroutine psb_get_errstatus + + + + ! pushes an error on the error stack + subroutine psb_errpush(err_c, r_name, i_err, a_err) + + integer, intent(in) :: err_c + character(len=20), intent(in) :: r_name + character(len=20), optional :: a_err + integer, optional :: i_err(5) + + type(psb_errstack_node), pointer :: new_node + + + allocate(new_node) + + new_node%err_code = err_c + new_node%routine = r_name + if(present(i_err)) new_node%i_err_data = i_err + if(present(a_err)) new_node%a_err_data = a_err + new_node%next => error_stack%top + error_stack%top => new_node + error_stack%n_elems = error_stack%n_elems+1 + if(error_status.eq.0) error_status=1 + nullify(new_node) + + end subroutine psb_errpush + + + ! pops an error from the error stack + subroutine psb_errpop(err_c, r_name, i_e_d, a_e_d) + + integer, intent(out) :: err_c + character(len=20), intent(out) :: r_name, a_e_d + integer, intent(out) :: i_e_d(5) + + type(psb_errstack_node), pointer :: old_node + + err_c = error_stack%top%err_code + r_name = error_stack%top%routine + i_e_d = error_stack%top%i_err_data + a_e_d = error_stack%top%a_err_data + + old_node => error_stack%top + error_stack%top => old_node%next + error_stack%n_elems = error_stack%n_elems - 1 + if(error_stack%n_elems.eq.0) error_status=0 + + deallocate(old_node) + + end subroutine psb_errpop + + + + ! handles the occurence of an error in a parallel routine + subroutine psb_perror(icontxt) + + integer, intent(in) :: icontxt + integer :: err_c + character(len=20) :: r_name, a_e_d + integer :: i_e_d(5) + integer :: nprow, npcol, me, mypcol, temp(2) + integer, parameter :: ione=1, izero=0 + + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + + if(error_status.gt.0) then + if(verbosity_level.gt.1) then + + do while (error_stack%n_elems.gt.izero) + write(0,'(50("="))') + call psb_errpop(err_c, r_name, i_e_d, a_e_d) + call psb_errmsg(err_c, r_name, i_e_d, a_e_d,me) + ! write(0,'(50("="))') + end do + call blacs_abort(icontxt,-1) + else + + call psb_errpop(err_c, r_name, i_e_d, a_e_d) + call psb_errmsg(err_c, r_name, i_e_d, a_e_d,me) + do while (error_stack%n_elems.gt.0) + call psb_errpop(err_c, r_name, i_e_d, a_e_d) + end do + call blacs_abort(icontxt,-1) + end if + end if + + if(error_status.gt.izero) then + call blacs_abort(icontxt,err_c) + end if + + + end subroutine psb_perror + + + ! handles the occurence of an error in a serial routine + subroutine psb_serror() + + integer :: err_c + character(len=20) :: r_name, a_e_d + integer :: i_e_d(5) + integer :: nprow, npcol, me, mypcol, temp(2) + integer, parameter :: ione=1, izero=0 + + if(error_status.gt.0) then + if(verbosity_level.gt.1) then + + do while (error_stack%n_elems.gt.izero) + write(0,'(50("="))') + call psb_errpop(err_c, r_name, i_e_d, a_e_d) + call psb_errmsg(err_c, r_name, i_e_d, a_e_d) + ! write(0,'(50("="))') + end do + + else + + call psb_errpop(err_c, r_name, i_e_d, a_e_d) + call psb_errmsg(err_c, r_name, i_e_d, a_e_d) + do while (error_stack%n_elems.gt.0) + call psb_errpop(err_c, r_name, i_e_d, a_e_d) + end do + end if + end if + + end subroutine psb_serror + + + ! prints the error msg associated to a specific error code + subroutine psb_errmsg(err_c, r_name, i_e_d, a_e_d,me) + + integer, intent(in) :: err_c + character(len=20), intent(in) :: r_name, a_e_d + integer, intent(in) :: i_e_d(5) + integer, optional :: me + + if(present(me)) then + write(0,'("Process: ",i0,". PSBLAS Error (",i0,") in subroutine: ",a20)')me,err_c,r_name + else + write(0,'("PSBLAS Error (",i0,") in subroutine: ",a20)')err_c,r_name + end if + + + select case (err_c) + case(:0) + write (0,'("error on calling sperror. err_c must be greater than 0")') + case(2) + write (0,'("pivot too small")') + case(3) + write (0,'("Invalid number of ovr:",i0)')i_e_d(1) + case(5) + write (0,'("Invalid input")') + + case(10) + write (0,'("input argument n. ",i0," cannot be less than 0")')i_e_d(1) + write (0,'("current value is ",i0)')i_e_d(2) + + case(20) + write (0,'("input argument n. ",i0," cannot be greater than 0")')i_e_d(1) + write (0,'("current value is ",i0)')i_e_d(2) + case(30) + write (0,'("input argument n. ",i0," has an invalid value")')i_e_d(1) + write (0,'("current value is ",i0)')i_e_d(2) + case(35) + write (0,'("Size of input array argument n. ",i0," is invalid.")')i_e_d(1) + write (0,'("Current value is ",i0)')i_e_d(2) + case(40) + write (0,'("input argument n. ",i0," has an invalid value")')i_e_d(1) + write (0,'("current value is ",a)')a_e_d(2:2) + case(50) + write (0,'("input argument n. ",i0," must be equal or greater than input argument n. ",i0)') i_e_d(1), i_e_d(2) + write (0,'("current values are ",i0," < ",i0)') i_e_d(3),i_e_d(4) + case(60) + write (0,'("input argument n. ",i0," must be equal or greater than ",i0)')i_e_d(1),i_e_d(2) + write (0,'("current value is ",i0," < ",i0)')i_e_d(3), i_e_d(2) + case(70) + write (0,'("input argument n. ",i0," in entry # ",i0," has an invalid value")')i_e_d(1:2) + write (0,'("current value is ",a)')a_e_d + case(71) + write (0,'("Impossible error in ASB: nrow>ncol,")') + write (0,'("Actual values are ",i0," > ",i0)')i_e_d(1:2) + ! ... csr format error ... + case(80) + write (0,'("input argument ia2(1) is less than 0")') + write (0,'("current value is ",i0)')i_e_d(1) + ! ... csr format error ... + case(90) + write (0,'("indices in ia2 array are not in increasing order")') + case(91) + write (0,'("indices in ia1 array are not in increasing order")') + ! ... csr format error ... + case(100) + write (0,'("indices in ia1 array are not within problem dimension")') + write (0,'("problem dimension is ",i0)')i_e_d(1) + case(110) + write (0,'("invalid combination of input arguments")') + case(115) + write (0,'("Invalid process identifier in input array argument n. ",i0,".")')i_e_d(1) + write (0,'("Current value is ",i0)')i_e_d(2) + case(120) + write (0,'("input argument n. ",i0," must be greater than input argument n. ",i0)')i_e_d(1:2) + write (0,'("current values are ",i0," < ",i0)') i_e_d(3:4) + ! ... coo format error ... + case(130) + write (0,'("there are duplicated elements in coo format")') + write (0,'("please set repflag flag to 2 or 3")') + case(134) + write (0,'("Invalid input format ",a3)')a_e_d(1:3) + case(135) + write (0,'("Format ",a3," not yet supported here")')a_e_d(1:3) + case(136) + write (0,'("Format ",a3," is unknown")')a_e_d(1:3) + case(140) + write (0,'("indices in input array are not within problem dimension ",2(i0,2x))')i_e_d(1:2) + case(150) + write (0,'("indices in input array are not belonging to the calling process ",i0)')i_e_d(1) + case(290) + write (0,'("Is not possible to call this routine without calling before psdalloc on the same matrix")') + case(295) + write (0,'("Is not possible to call this routine without calling before psdspalloc on the same matrix")') + case(300) + write (0,'("Input argument n. ",i0," must be equal to entry n. ",i0," in array input argument n.",i0)') & + & i_e_d(1),i_e_d(4),i_e_d(3) + write (0,'("Current values are ",i0," != ",i0)')i_e_d(2), i_e_d(5) + case(400) + write (0,'("MPI error:",i0)')i_e_d(1) + case(550) + write (0,'("Parameter n. ",i0," must be equal on all BLACS processes. ",i0)')i_e_d(1) + case(570) + write (0,'("partition function passed as input argument n. ",i0," returns number of processes")')i_e_d(1) + write (0,'("greater than No of grid s processes on global point ",i0,". Actual number of grid s ")')i_e_d(4) + write (0,'("processes is ",i0,", number returned is ",i0)')i_e_d(2),i_e_d(3) + case(575) + write (0,'("partition function passed as input argument n. ",i0," returns number of processes")')i_e_d(1) + write (0,'("less or equal to 0 on global point ",i0,". Number returned is ",i0)')i_e_d(3),i_e_d(2) + case(580) + write (0,'("partition function passed as input argument n. ",i0," returns wrong processes identifier")')i_e_d(1) + write (0,'("on global point ",i0,". Current value returned is : ",i0)')i_e_d(3),i_e_d(2) + case(600) + write (0,'("Sparse Matrix and decsriptors are in an invalid state for this subroutine call: ",i0)')i_e_d(1) + case (1122) + write (0,'("Invalid state for DESC_A")') + case (1123) + write (0,'("Invalid combined state for A and DESC_A")') + case(1124:1999) + write (0,'("computational error. code: ",i0)')err_c + case(2010) + write (0,'("BLACS error. Number of processes=-1")') + case(2025) + write (0,'("Cannot allocate ",i0," bytes")')i_e_d(1) + case(2030) + write (0,'("BLACS ERROR: Number of grid columns must be equal to 1\nCurrent value is ",i4," != 1.")')i_e_d(1) + case(2231) + write (0,'("Invalid input state for matrix.")') + case(2232) + write (0,'("Input state for matrix is not adequate for regeneration.")') + case (2233:2999) + write(0,'("resource error. code: ",i0)')err_c + case(3000:3009) + write (0,'("sparse matrix representation ",a3," not yet implemented")')a_e_d(1:3) + case(3010) + write (0,'("Case lld not equal matrix_data[N_COL_] is not yet implemented.")') + case(3015) + write (0,'("transpose option for sparse matrix representation ",a3," not implemented")')a_e_d(1:3) + case(3020) + write (0,'("Case trans = C is not yet implemented.")') + case(3021) + write (0,'("Case trans /= N is not yet implemented.")') + case(3022) + write (0,'("Only unit diagonal so far for triangular matrices. ")') + case(3023) + write (0,'("Cases DESCRA(1:1)=S DESCRA(1:1)=T not yet implemented. ")') + case(3024) + write (0,'("Cases DESCRA(1:1)=G not yet implemented. ")') + case(3030) + write (0,'("Case ja/=ix or ia/=iy is not yet implemented.")') + case(3040) + write (0,'("Case ix /= 1 or iy /= 1 is not yet implemented.")') + case(3050) + write (0,'("Case ix /= iy is not yet implemented.")') + case(3060) + write (0,'("Case ix /= 1 is not yet implemented.")') + case(3070) + write (0,'("This operation is only implemented with no overlap.")') + case(3080) + write (0,'("Decompostion type ",i0," not yet supported.")')i_e_d(1) + case(3090) + write (0,'("Insert matrix mode not yet implemented.")') + case(3100) + write (0,'("Error on index. Element has not been inserted")') + write (0,'("local index is: ",i0," and global index is:",i0)')i_e_d(1:2) + case(3110) + write (0,'("Before you call this routine, you must assembly sparse matrix")') + case(3111:3999) + write(0,'("miscellaneus error. code: ",i0)')err_c + case(4000) + write(0,'("Allocation/deallocation error")') + case(4010) + write (0,'("Error from call to subroutine ",a)')a_e_d + case(4011) + write (0,'("Error from call to a subroutine ")') + case(4012) + write (0,'("Error ",i0," from call to a subroutine ")')i_e_d(1) + case (5001) + write (0,'("Invalid ISTOP: ",i0)')i_e_d(1) + case (5002) + write (0,'("Invalid PREC: ",i0)')i_e_d(1) + case (5003) + write (0,'("Invalid PREC: ",a3)')a_e_d(1:3) + case default + write(0,'("unknown error (",i0,") in subroutine ",a)')err_c,r_name + write(0,'(5(i0,2x))') i_e_d + write(0,'(a)') a_e_d + + end select + + end subroutine psb_errmsg + + + +end module psb_error_mod diff --git a/src/modules/psb_methd_mod.f90 b/src/modules/psb_methd_mod.f90 new file mode 100644 index 00000000..96833a4a --- /dev/null +++ b/src/modules/psb_methd_mod.f90 @@ -0,0 +1,123 @@ +Module psb_methd_mod + + interface psb_cg + subroutine psb_dcg(a,prec,b,x,eps,& + & desc_a,info,itmax,iter,err,itrace,istop) + use psb_serial_mod + use psb_descriptor_type + use psb_prec_type + type(psb_dspmat_type), intent(in) :: a + type(psb_desc_type), intent(in) :: desc_a + real(kind(1.d0)), intent(in) :: b(:) + real(kind(1.d0)), intent(inout) :: x(:) + real(kind(1.d0)), intent(in) :: eps + type(psb_dprec_type), intent(in) :: prec + integer, intent(out) :: info + integer, optional, intent(in) :: itmax, itrace,istop + integer, optional, intent(out) :: iter + real(kind(1.d0)), optional, intent(out) :: err + end subroutine psb_dcg + end interface + + interface spb_bicg + subroutine psb_dbicg(a,prec,b,x,eps,& + & desc_a,info,itmax,iter,err,itrace,istop) + use psb_serial_mod + use psb_descriptor_type + use psb_prec_type + type(psb_dspmat_type), intent(in) :: a + type(psb_desc_type), intent(in) :: desc_a + real(kind(1.d0)), intent(in) :: b(:) + real(kind(1.d0)), intent(inout) :: x(:) + real(kind(1.d0)), intent(in) :: eps + type(psb_dprec_type), intent(in) :: prec + integer, intent(out) :: info + integer, optional, intent(in) :: itmax, itrace,istop + integer, optional, intent(out) :: iter + real(kind(1.d0)), optional, intent(out) :: err + end subroutine psb_dbicg + end interface + + interface ppsb_bicgstab + subroutine psb_dcgstab(a,prec,b,x,eps,& + & desc_a,info,itmax,iter,err,itrace,istop) + use psb_serial_mod + use psb_descriptor_type + use psb_prec_type + type(psb_dspmat_type), intent(in) :: a + type(psb_desc_type), intent(in) :: desc_a + real(kind(1.d0)), intent(in) :: b(:) + real(kind(1.d0)), intent(inout) :: x(:) + real(kind(1.d0)), intent(in) :: eps + type(psb_dprec_type), intent(in) :: prec + integer, intent(out) :: info + integer, optional, intent(in) :: itmax, itrace,istop + integer, optional, intent(out) :: iter + real(kind(1.d0)), optional, intent(out) :: err + end subroutine psb_dcgstab + end interface + + interface psb_bicgstabl + Subroutine psb_dcgstabl(a,prec,b,x,eps,desc_a,info,& + &itmax,iter,err, itrace,irst,istop) + use psb_serial_mod + use psb_descriptor_type + Use psb_prec_type +!!$ parameters + Type(psb_dspmat_type), Intent(in) :: a + Type(psb_desc_type), Intent(in) :: desc_a + type(psb_dprec_type), intent(in) :: prec + Real(Kind(1.d0)), Intent(in) :: b(:) + Real(Kind(1.d0)), Intent(inout) :: x(:) + Real(Kind(1.d0)), Intent(in) :: eps + integer, intent(out) :: info + Integer, Optional, Intent(in) :: itmax, itrace, irst,istop + Integer, Optional, Intent(out) :: iter + Real(Kind(1.d0)), Optional, Intent(out) :: err + end subroutine psb_dcgstabl + end interface + + interface psb_rgmres + Subroutine psb_dgmresr(a,prec,b,x,eps,desc_a,info,& + &itmax,iter,err,itrace,irst,istop) + use psb_serial_mod + use psb_descriptor_type + Use psb_prec_type +!!$ parameters + Type(psb_dspmat_type), Intent(in) :: a + Type(psb_desc_type), Intent(in) :: desc_a + type(psb_dprec_type), intent(in) :: prec + Real(Kind(1.d0)), Intent(in) :: b(:) + Real(Kind(1.d0)), Intent(inout) :: x(:) + Real(Kind(1.d0)), Intent(in) :: eps + integer, intent(out) :: info + Integer, Optional, Intent(in) :: itmax, itrace, irst,istop + Integer, Optional, Intent(out) :: iter + Real(Kind(1.d0)), Optional, Intent(out) :: err + end subroutine psb_dgmresr + end interface + + interface psb_cgs + subroutine psb_dcgs(a,prec,b,x,eps,desc_a,info,& + &itmax,iter,err,itrace,istop) + use psb_serial_mod + use psb_descriptor_type + use psb_prec_type +!!$ parameters + type(psb_dspmat_type), intent(in) :: a + type(psb_desc_type), intent(in) :: desc_a + type(psb_dprec_type), intent(in) :: prec + real(kind(1.d0)), intent(in) :: b(:) + real(kind(1.d0)), intent(inout) :: x(:) + real(kind(1.d0)), intent(in) :: eps + integer, intent(out) :: info + integer, optional, intent(in) :: itmax, itrace,istop + integer, optional, intent(out) :: iter + real(kind(1.d0)), optional, intent(out) :: err + end subroutine psb_dcgs + end interface + +end module psb_methd_mod + + + diff --git a/src/modules/psb_prec_mod.f90 b/src/modules/psb_prec_mod.f90 new file mode 100644 index 00000000..59913868 --- /dev/null +++ b/src/modules/psb_prec_mod.f90 @@ -0,0 +1,124 @@ + +module psb_prec_mod + use psb_prec_type + + interface psb_bldaggrmat + subroutine psb_dbldaggrmat(a,desc_a,p,info) + use psb_prec_type + use psb_descriptor_type + use psb_spmat_type + type(psb_dspmat_type), intent(in), target :: a + type(psb_dbase_prec), intent(inout) :: p + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + end subroutine psb_dbldaggrmat + end interface + + +interface psb_genaggrmap + subroutine psb_dgenaggrmap(aggr_type,a,desc_a,nlaggr,ilaggr,info) + use psb_spmat_type + use psb_descriptor_type + implicit none + integer, intent(in) :: aggr_type + type(psb_dspmat_type), intent(in) :: a + type(psb_desc_type), intent(in) :: desc_a + integer, pointer :: ilaggr(:),nlaggr(:) + integer, intent(out) :: info + end subroutine psb_dgenaggrmap +end interface + + interface psb_precbld + subroutine psb_dprecbld(a,prec,desc_a,ierr,upd) + use psb_descriptor_type + use psb_prec_type + implicit none + integer, intent(out) :: ierr + type(psb_dspmat_type), intent(in), target :: a + type(psb_dprec_type), intent(inout) :: prec + type(psb_desc_type), intent(in) :: desc_a + character, intent(in),optional :: upd + end subroutine psb_dprecbld + end interface + + interface psb_precset + subroutine psb_dprecset(prec,ptype,iv,rs,rv,ierr) + use psb_serial_mod + use psb_descriptor_type + use psb_prec_type + implicit none + type(psb_dprec_type), intent(inout) :: prec + character(len=10), intent(in) :: ptype + integer, optional, intent(in) :: iv(:) + real(kind(1.d0)), optional, intent(in) :: rs + real(kind(1.d0)), optional, intent(in) :: rv(:) + integer, optional, intent(out) :: ierr + end subroutine psb_dprecset + end interface + + + interface psb_precfree + subroutine psb_dprecfree(p,info) + use psb_descriptor_type + use psb_serial_mod + use psb_const_mod + use psb_prec_type + type(psb_dprec_type), intent(inout) :: p + integer, intent(out) :: info + end subroutine psb_dprecfree + end interface + + interface psb_cslu + subroutine psb_dcslu(a,desc_data,p,upd,info) + use psb_serial_mod + use psb_descriptor_type + use psb_prec_type + integer, intent(out) :: info + type(psb_dspmat_type), intent(in), target :: a + type(psb_desc_type),intent(in) :: desc_data + type(psb_dbase_prec), intent(inout) :: p + character, intent(in) :: upd + end subroutine psb_dcslu + end interface + + interface psb_csrsetup + Subroutine psb_dcsrsetup(ptype,novr,a,blk,desc_data,upd,desc_p,info,outfmt) + use psb_serial_mod + Use psb_descriptor_type + Use psb_prec_type + integer, intent(in) :: ptype,novr + Type(psb_dspmat_type), Intent(in) :: a + Type(psb_dspmat_type), Intent(inout) :: blk + Type(psb_desc_type), Intent(inout) :: desc_p + Type(psb_desc_type), Intent(in) :: desc_data + Character, Intent(in) :: upd + integer, intent(out) :: info + character(len=5), optional :: outfmt + end Subroutine psb_dcsrsetup + end interface + + interface psb_prcaply + subroutine psb_dprecaply(prec,x,y,desc_data,info,trans,work) + use psb_serial_mod + use psb_descriptor_type + use psb_prec_type + type(psb_desc_type),intent(in) :: desc_data + type(psb_dprec_type), intent(in) :: prec + real(kind(0.d0)),intent(inout) :: x(:), y(:) + integer, intent(out) :: info + character(len=1), optional :: trans + real(kind(0.d0)),intent(inout), optional, target :: work(:) + end subroutine psb_dprecaply + subroutine psb_dprecaply1(prec,x,desc_data,info,trans) + use psb_serial_mod + use psb_descriptor_type + use psb_prec_type + type(psb_desc_type),intent(in) :: desc_data + type(psb_dprec_type), intent(in) :: prec + real(kind(0.d0)),intent(inout) :: x(:) + integer, intent(out) :: info + character(len=1), optional :: trans + end subroutine psb_dprecaply1 + end interface + +end module psb_prec_mod diff --git a/src/modules/psb_prec_type.f90 b/src/modules/psb_prec_type.f90 new file mode 100644 index 00000000..cdc98812 --- /dev/null +++ b/src/modules/psb_prec_type.f90 @@ -0,0 +1,373 @@ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!! Module to define PREC_DATA, !! +!! structure for preconditioning. !! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +module psb_prec_type + + use psb_spmat_type + use psb_descriptor_type + + integer, parameter :: min_prec_=0, noprec_=0, diagsc_=1, bja_=2,& + & asm_=3, ras_=5, ash_=4, rash_=6, ras2lv_=7, ras2lvm_=8,& + & lv2mras_=9, lv2smth_=10, lv2lsm_=11, sl2sm_=12, superlu_=13,& + & new_loc_smth_=14, new_glb_smth_=15, max_prec_=15 + ! Multilevel stuff. + integer, parameter :: no_ml_=0, add_ml_prec_=1, mult_ml_prec_=2 + integer, parameter :: new_ml_prec_=3, max_ml_=new_ml_prec_ + integer, parameter :: pre_smooth_=1, post_smooth_=2, smooth_both_=3,& + & max_smooth_=smooth_both_ + integer, parameter :: loc_aggr_=0, glb_aggr_=1, new_loc_aggr_=2 + integer, parameter :: new_glb_aggr_=3, max_aggr_=new_glb_aggr_ + integer, parameter :: no_smth_=0, smth_omg_=1, smth_biz_=2 + integer, parameter :: lib_choice_=0, user_choice_=1 + integer, parameter :: mat_distr_=0, mat_repl_=1 + ! Entries in iprcparm: preconditioner type, factorization type, + ! prolongation type, restriction type, renumbering algorithm, + ! number of overlap layers, pointer to SuperLU factors, + ! levels of fill in for ILU(N), + integer, parameter :: p_type_=1, f_type_=2, restr_=3, prol_=4 + integer, parameter :: iren_=5, n_ovr_=6, slu_ptr_=7 + integer, parameter :: ilu_fill_in_=8, jac_sweeps_=9, ml_type_=10 + integer, parameter :: smth_pos_=11, aggr_alg_=12, smth_kind_=13 + integer, parameter :: om_choice_=14, glb_smth_=15, coarse_mat_=16 + integer, parameter :: ifpsz=20 + ! Entries in dprcparm: ILU(E) epsilon, smoother omega + integer, parameter :: fact_eps_=1, smooth_omega_=2 + integer, parameter :: dfpsz=4 + ! Factorization types: none, ILU(N), ILU(E), SuperLU + integer, parameter :: f_none_=0,f_ilu_n_=1, f_ilu_e_=2,f_slu_=3 + ! Fields for sparse matrices ensembles: + integer, parameter :: l_pr_=1, u_pr_=2, bp_ilu_avsz=2 + integer, parameter :: ap_nd_=3, ac_=4, sm_pr_t_=5, sm_pr_=6 + integer, parameter :: smth_avsz=6 + + + type psb_dbase_prec + + type(psb_dspmat_type), pointer :: av(:) => null() + real(kind(1.d0)), pointer :: d(:) => null() + type(psb_desc_type), pointer :: desc_data => null() + integer, pointer :: iprcparm(:) => null() + real(kind(1.d0)), pointer :: dprcparm(:) => null() + integer, pointer :: perm(:) => null(), invperm(:) => null() + integer, pointer :: mlia(:) => null(), nlaggr(:) => null() + type(psb_dspmat_type), pointer :: aorig => null() + real(kind(1.d0)), pointer :: dorig(:) => null() + + end type psb_dbase_prec + + type psb_dprec_type + type(psb_dbase_prec), pointer :: baseprecv(:) => null() + ! contain type of preconditioning to be performed + integer :: prec, base_prec + end type psb_dprec_type + + + character(len=15), parameter, private :: & + & smooth_names(1:2)=(/'Pre-smoothing ','Post-smoothing'/) + character(len=15), parameter, private :: & + & smooth_kinds(0:2)=(/'No smoother ','Omega smoother',& + & 'Bizr. smoother'/) + character(len=15), parameter, private :: & + & matrix_names(0:1)=(/'Distributed ','Replicated '/) + character(len=18), parameter, private :: & + & aggr_names(0:3)=(/'Local aggregation ','Global aggregation',& + & 'New local aggr. ','New global aggr. '/) + character(len=6), parameter, private :: & + & restrict_names(0:4)=(/'None ',' ',' ',' ','Halo '/) + character(len=12), parameter, private :: & + & prolong_names(0:3)=(/'None ','Sum ','Average ','Square root'/) + character(len=15), parameter, private :: & + & ml_names(0:3)=(/'None ','Additive ','Multiplicative',& + & 'New ML '/) + character(len=15), parameter, private :: & + & fact_names(0:3)=(/'None ','ILU(n) ',& + & 'ILU(eps) ','Sparse LU '/) + + interface psb_base_precfree + module procedure psb_dbase_precfree + end interface + interface psb_check_def + module procedure psb_icheck_def, psb_dcheck_def + end interface + + interface psb_prec_descr + module procedure psb_file_prec_descr + end interface + +contains + + subroutine psb_file_prec_descr(iout,p) + integer, intent(in) :: iout + type(psb_dprec_type), intent(in) :: p + + write(iout,*) 'Preconditioner description' + if (associated(p%baseprecv)) then + if (size(p%baseprecv)>=1) then + write(iout,*) 'Base preconditioner' + select case(p%baseprecv(1)%iprcparm(p_type_)) + case(noprec_) + write(iout,*) 'No preconditioning' + case(diagsc_) + write(iout,*) 'Diagonal scaling' + case(bja_) + write(iout,*) 'Block Jacobi with: ',& + & fact_names(p%baseprecv(1)%iprcparm(f_type_)) + case(asm_,ras_,ash_,rash_) + write(iout,*) 'Additive Schwarz with: ',& + & fact_names(p%baseprecv(1)%iprcparm(f_type_)) + write(iout,*) 'Overlap:',& + & p%baseprecv(1)%iprcparm(n_ovr_) + write(iout,*) 'Restriction: ',& + & restrict_names(p%baseprecv(1)%iprcparm(restr_)) + write(iout,*) 'Prolongation: ',& + & prolong_names(p%baseprecv(1)%iprcparm(prol_)) + end select + end if + if (size(p%baseprecv)>=2) then + if (.not.associated(p%baseprecv(2)%iprcparm)) then + write(iout,*) 'Inconsistent MLPREC part!' + return + endif + write(iout,*) 'Multilevel: ',ml_names(p%baseprecv(2)%iprcparm(ml_type_)) + if (p%baseprecv(2)%iprcparm(ml_type_)>no_ml_) then + write(iout,*) 'Multilevel aggregation: ', & + & aggr_names(p%baseprecv(2)%iprcparm(aggr_alg_)) + write(iout,*) 'Smoother: ', & + & smooth_kinds(p%baseprecv(2)%iprcparm(smth_kind_)) + write(iout,*) 'Smoothing omega: ', p%baseprecv(2)%dprcparm(smooth_omega_) + write(iout,*) 'Smoothing position: ',& + & smooth_names(p%baseprecv(2)%iprcparm(smth_pos_)) + write(iout,*) 'Coarse matrix: ',& + & matrix_names(p%baseprecv(2)%iprcparm(coarse_mat_)) + write(iout,*) 'Factorization type: ',& + & fact_names(p%baseprecv(2)%iprcparm(f_type_)) + select case(p%baseprecv(2)%iprcparm(f_type_)) + case(f_ilu_n_) + write(iout,*) 'Fill level :',p%baseprecv(2)%iprcparm(ilu_fill_in_) + case(f_ilu_e_) + write(iout,*) 'Fill threshold :',p%baseprecv(2)%dprcparm(fact_eps_) + case(f_slu_) + case default + write(iout,*) 'Should never get here!' + end select + write(iout,*) 'Number of Jacobi sweeps: ', & + & (p%baseprecv(2)%iprcparm(jac_sweeps_)) + + end if + end if + + else + write(iout,*) 'No Base preconditioner available, something is wrong!' + return + endif + + end subroutine psb_file_prec_descr + + function is_legal_base_prec(ip) + integer, intent(in) :: ip + logical :: is_legal_base_prec + + is_legal_base_prec = ((ip>=noprec_).and.(ip<=rash_)) + return + end function is_legal_base_prec + function is_legal_n_ovr(ip) + integer, intent(in) :: ip + logical :: is_legal_n_ovr + + is_legal_n_ovr = (ip >=0) + return + end function is_legal_n_ovr + function is_legal_jac_sweeps(ip) + integer, intent(in) :: ip + logical :: is_legal_jac_sweeps + + is_legal_jac_sweeps = (ip >= 1) + return + end function is_legal_jac_sweeps + function is_legal_prolong(ip) + integer, intent(in) :: ip + logical :: is_legal_prolong + + is_legal_prolong = ((ip>=none_).and.(ip<=square_root_)) + return + end function is_legal_prolong + function is_legal_restrict(ip) + integer, intent(in) :: ip + logical :: is_legal_restrict + + is_legal_restrict = ((ip==nohalo_).or.(ip==halo_)) + return + end function is_legal_restrict + function is_legal_ml_type(ip) + integer, intent(in) :: ip + logical :: is_legal_ml_type + + is_legal_ml_type = ((ip>=no_ml_).and.(ip<=max_ml_)) + return + end function is_legal_ml_type + function is_legal_ml_aggr_kind(ip) + integer, intent(in) :: ip + logical :: is_legal_ml_aggr_kind + + is_legal_ml_aggr_kind = ((ip>=loc_aggr_).and.(ip<=max_aggr_)) + return + end function is_legal_ml_aggr_kind + function is_legal_ml_smooth_pos(ip) + integer, intent(in) :: ip + logical :: is_legal_ml_smooth_pos + + is_legal_ml_smooth_pos = ((ip>=pre_smooth_).and.(ip<=max_smooth_)) + return + end function is_legal_ml_smooth_pos + function is_legal_ml_smth_kind(ip) + integer, intent(in) :: ip + logical :: is_legal_ml_smth_kind + + is_legal_ml_smth_kind = ((ip>=no_smth_).and.(ip<=smth_biz_)) + return + end function is_legal_ml_smth_kind + function is_legal_ml_coarse_mat(ip) + integer, intent(in) :: ip + logical :: is_legal_ml_coarse_mat + + is_legal_ml_coarse_mat = ((ip>=mat_distr_).and.(ip<=mat_repl_)) + return + end function is_legal_ml_coarse_mat + function is_legal_ml_fact(ip) + integer, intent(in) :: ip + logical :: is_legal_ml_fact + + is_legal_ml_fact = ((ip>=f_ilu_n_).and.(ip<=f_slu_)) + return + end function is_legal_ml_fact + function is_legal_ml_lev(ip) + integer, intent(in) :: ip + logical :: is_legal_ml_lev + + is_legal_ml_lev = (ip>=0) + return + end function is_legal_ml_lev + function is_legal_omega(ip) + real(kind(1.d0)), intent(in) :: ip + logical :: is_legal_omega + + is_legal_omega = ((ip>=0.0d0).and.(ip<=2.0d0)) + return + end function is_legal_omega + function is_legal_ml_eps(ip) + real(kind(1.d0)), intent(in) :: ip + logical :: is_legal_ml_eps + + is_legal_ml_eps = (ip>=0.0d0) + return + end function is_legal_ml_eps + + + subroutine psb_icheck_def(ip,name,id,is_legal) + integer, intent(inout) :: ip + integer, intent(in) :: id + character(len=*), intent(in) :: name + interface + function is_legal(i) + integer, intent(in) :: i + logical :: is_legal + end function is_legal + end interface + + if (.not.is_legal(ip)) then + write(0,*) 'Illegal value for ',name,' :',ip, '. defaulting to ',id + ip = id + end if + end subroutine psb_icheck_def + + subroutine psb_dcheck_def(ip,name,id,is_legal) + real(kind(1.d0)), intent(inout) :: ip + real(kind(1.d0)), intent(in) :: id + character(len=*), intent(in) :: name + interface + function is_legal(i) + real(kind(1.d0)), intent(in) :: i + logical :: is_legal + end function is_legal + end interface + + if (.not.is_legal(ip)) then + write(0,*) 'Illegal value for ',name,' :',ip, '. defaulting to ',id + ip = id + end if + end subroutine psb_dcheck_def + + subroutine psb_dbase_precfree(p,info) + use psb_serial_mod + use psb_descriptor_type + use psb_tools_mod + type(psb_dbase_prec), intent(inout) :: p + integer, intent(out) :: info + integer :: i + + info = 0 + + if (associated(p%d)) then + deallocate(p%d,stat=info) + end if + + if (associated(p%av)) then + do i=1,size(p%av) + call psb_spfree(p%av(i),info) + if (info /= 0) then + ! Actually, we don't care here about this. + ! Just let it go. + ! return + end if + enddo + deallocate(p%av,stat=info) + p%av => null() + end if + if (associated(p%desc_data)) then + if (associated(p%desc_data%matrix_data)) then + call psb_dscfree(p%desc_data,info) + end if + deallocate(p%desc_data) + endif + if (associated(p%dprcparm)) then + deallocate(p%dprcparm,stat=info) + end if + if (associated(p%aorig)) then + ! This is a pointer to something else, must not free it here. + nullify(p%aorig) + endif + if (associated(p%dorig)) then + deallocate(p%dorig,stat=info) + nullify(p%dorig) + endif + + if (associated(p%mlia)) then + deallocate(p%mlia,stat=info) + endif + + if (associated(p%nlaggr)) then + deallocate(p%nlaggr,stat=info) + endif + + if (associated(p%iprcparm)) then + if (p%iprcparm(f_type_)==f_slu_) then + call fort_slu_free(p%iprcparm(slu_ptr_),info) + end if + deallocate(p%iprcparm,stat=info) + end if + call psb_nullify_baseprec(p) + end subroutine psb_dbase_precfree + + subroutine psb_nullify_baseprec(p) + use psb_descriptor_type + type(psb_dbase_prec), intent(inout) :: p + + nullify(p%av,p%d,p%iprcparm,p%dprcparm,p%perm,p%invperm,p%mlia,& + & p%nlaggr,p%aorig,p%dorig,p%desc_data) + + end subroutine psb_nullify_baseprec + + +end module psb_prec_type diff --git a/src/modules/psb_psblas_mod.f90 b/src/modules/psb_psblas_mod.f90 new file mode 100644 index 00000000..16debb85 --- /dev/null +++ b/src/modules/psb_psblas_mod.f90 @@ -0,0 +1,238 @@ +module psb_psblas_mod + use psb_comm_mod + + interface psb_dot + function psb_ddotv(x, y, desc_a,info) + use psb_descriptor_type + real(kind(1.d0)) :: psb_ddotv + real(kind(1.d0)), intent(in) :: x(:), y(:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + end function psb_ddotv + function psb_ddot(x, y, desc_a, info, jx, jy) + use psb_descriptor_type + real(kind(1.d0)) :: psb_ddot + real(kind(1.d0)), intent(in) :: x(:,:), y(:,:) + type(psb_desc_type), intent(in) :: desc_a + integer, optional, intent(in) :: jx, jy + integer, intent(out) :: info + end function psb_ddot + end interface + + interface psb_dots + subroutine psb_ddotvs(res,x, y, desc_a, info) + use psb_descriptor_type + real(kind(1.d0)), intent(out) :: res + real(kind(1.d0)), intent(in) :: x(:), y(:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + end subroutine psb_ddotvs + subroutine psb_dmdots(res,x, y, desc_a,info) + use psb_descriptor_type + real(kind(1.d0)), intent(out) :: res(:) + real(kind(1.d0)), intent(in) :: x(:,:), y(:,:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + end subroutine psb_dmdots + end interface + + interface psb_axpby + subroutine psb_daxpbyv(alpha, x, beta, y,& + & desc_a, info) + use psb_descriptor_type + real(kind(1.d0)), intent (in) :: x(:) + real(kind(1.d0)), intent (inout) :: y(:) + real(kind(1.d0)), intent (in) :: alpha, beta + type(psb_desc_type), intent (in) :: desc_a + integer, intent(out) :: info + end subroutine psb_daxpbyv + subroutine psb_daxpby(alpha, x, beta, y,& + & desc_a, info, n, jx, jy) + use psb_descriptor_type + real(kind(1.d0)), intent (in) :: x(:,:) + real(kind(1.d0)), intent (inout) :: y(:,:) + real(kind(1.d0)), intent (in) :: alpha, beta + type(psb_desc_type), intent (in) :: desc_a + integer, optional :: n, jx, jy + integer, intent(out) :: info + end subroutine psb_daxpby + end interface + + interface psb_amax + function psb_damax(x, desc_a, info, jx) + use psb_descriptor_type + real(kind(1.d0)) psb_damax + real(kind(1.d0)), intent (in) :: x(:,:) + type(psb_desc_type), intent (in) :: desc_a + integer, optional, intent (in) :: jx + integer, intent(out) :: info + end function psb_damax + function psb_damaxv(x, desc_a,info) + use psb_descriptor_type + real(kind(1.d0)) psb_damaxv + real(kind(1.d0)), intent (in) :: x(:) + type(psb_desc_type), intent (in) :: desc_a + integer, intent(out) :: info + end function psb_damaxv + end interface + + interface psb_amaxs + subroutine psb_damaxvs(res,x,desc_a,info) + use psb_descriptor_type + real(kind(1.d0)), intent (out) :: res + real(kind(1.d0)), intent (in) :: x(:) + type(psb_desc_type), intent (in) :: desc_a + integer, intent(out) :: info + end subroutine psb_damaxvs + subroutine psb_dmamax(res,x,desc_a,info) + use psb_descriptor_type + real(kind(1.d0)), intent (out) :: res(:) + real(kind(1.d0)), intent (in) :: x(:,:) + type(psb_desc_type), intent (in) :: desc_a + integer, intent(out) :: info + end subroutine psb_dmamax + end interface + + interface psb_asum + function psb_dasum(x, desc_a, info, jx) + use psb_descriptor_type + real(kind(1.d0)) psb_dasum + real(kind(1.d0)), intent (in) :: x(:,:) + type(psb_desc_type), intent (in) :: desc_a + integer, optional, intent (in) :: jx + integer, intent(out) :: info + end function psb_dasum + function psb_dasumv(x, desc_a, info) + use psb_descriptor_type + real(kind(1.d0)) psb_dasumv + real(kind(1.d0)), intent (in) :: x(:) + type(psb_desc_type), intent (in) :: desc_a + integer, intent(out) :: info + end function psb_dasumv + end interface + + interface psb_asums + subroutine psb_dasumvs(res,x,desc_a,info) + use psb_descriptor_type + real(kind(1.d0)), intent (out) :: res + real(kind(1.d0)), intent (in) :: x(:) + type(psb_desc_type), intent (in) :: desc_a + integer, intent(out) :: info + end subroutine psb_dasumvs + subroutine psb_dmasum(res,x,desc_a,info) + use psb_descriptor_type + real(kind(1.d0)), intent (out) :: res(:) + real(kind(1.d0)), intent (in) :: x(:,:) + type(psb_desc_type), intent (in) :: desc_a + integer, intent(out) :: info + end subroutine psb_dmasum + end interface + + + interface psb_nrm2 + function psb_dnrm2(x, desc_a, info, jx) + use psb_descriptor_type + real(kind(1.d0)) psb_dnrm2 + real(kind(1.d0)), intent (in) :: x(:,:) + type(psb_desc_type), intent (in) :: desc_a + integer, optional, intent (in) :: jx + integer, intent(out) :: info + end function psb_dnrm2 + function psb_dnrm2v(x, desc_a, info) + use psb_descriptor_type + real(kind(1.d0)) psb_dnrm2v + real(kind(1.d0)), intent (in) :: x(:) + type(psb_desc_type), intent (in) :: desc_a + integer, intent(out) :: info + end function psb_dnrm2v + end interface + + interface psb_nrm2s + subroutine psb_dnrm2vs(res,x,desc_a,info) + use psb_descriptor_type + real(kind(1.d0)), intent (out) :: res + real(kind(1.d0)), intent (in) :: x(:) + type(psb_desc_type), intent (in) :: desc_a + integer, intent(out) :: info + end subroutine psb_dnrm2vs + end interface + + + interface psb_nrmi + function psb_dnrmi(a, desc_a,info) + use psb_serial_mod + use psb_descriptor_type + real(kind(1.d0)) :: psb_dnrmi + type (psb_dspmat_type), intent (in) :: a + type (psb_desc_type), intent (in) :: desc_a + integer, intent(out) :: info + end function psb_dnrmi + end interface + + interface psb_spmm + subroutine psb_dspmm(alpha, a, x, beta, y, desc_a, info,& + &trans, k, jx, jy,work,doswap) + use psb_serial_mod + use psb_descriptor_type + type (psb_dspmat_type), intent(in) :: a + real(kind(1.d0)), intent(inout) :: x(:,:) + real(kind(1.d0)), intent(inout) :: y(:,:) + real(kind(1.d0)), intent(in) :: alpha, beta + type(psb_desc_type), intent(in) :: desc_a + character, optional, intent(in) :: trans + real(kind(1.d0)), optional, intent(inout) :: work(:) + integer, optional, intent(in) :: k, jx, jy,doswap + integer, intent(out) :: info + end subroutine psb_dspmm + subroutine psb_dspmv(alpha, a, x, beta, y,& + & desc_a, info, trans, work,doswap) + use psb_serial_mod + use psb_descriptor_type + type (psb_dspmat_type), intent(in) :: a + real(kind(1.d0)), intent(inout) :: x(:) + real(kind(1.d0)), intent(inout) :: y(:) + real(kind(1.d0)), intent(in) :: alpha, beta + type(psb_desc_type), intent(in) :: desc_a + character, optional, intent(in) :: trans + real(kind(1.d0)), optional, intent(inout) :: work(:) + integer, optional, intent(in) :: doswap + integer, intent(out) :: info + end subroutine psb_dspmv + end interface + + interface psb_spsm + subroutine psb_dspsm(alpha, t, x, beta, y,& + & desc_a, info, trans, unit, choice,& + & diag, n, jx, jy, work) + use psb_serial_mod + use psb_descriptor_type + type (psb_dspmat_type), intent(in) :: t + real(kind(1.d0)), intent(in) :: x(:,:) + real(kind(1.d0)), intent(inout) :: y(:,:) + real(kind(1.d0)), intent(in) :: alpha, beta + type(psb_desc_type), intent(in) :: desc_a + character, optional, intent(in) :: trans, unit + integer, optional, intent(in) :: n, jx, jy + integer, optional, intent(in) :: choice + real(kind(1.d0)), optional, intent(in) :: work(:), diag(:) + integer, intent(out) :: info + end subroutine psb_dspsm + subroutine psb_dspsv(alpha, t, x, beta, y,& + & desc_a, info, trans, unit, choice,& + & diag, work) + use psb_serial_mod + use psb_descriptor_type + type (psb_dspmat_type), intent(in) :: t + real(kind(1.d0)), intent(in) :: x(:) + real(kind(1.d0)), intent(inout) :: y(:) + real(kind(1.d0)), intent(in) :: alpha, beta + type(psb_desc_type), intent(in) :: desc_a + character, optional, intent(in) :: trans, unit + integer, optional, intent(in) :: choice + real(kind(1.d0)), optional, intent(in) :: work(:), diag(:) + integer, intent(out) :: info + end subroutine psb_dspsv + end interface + + +end module psb_psblas_mod diff --git a/src/modules/psb_realloc_mod.f90 b/src/modules/psb_realloc_mod.f90 new file mode 100644 index 00000000..2d08b7ac --- /dev/null +++ b/src/modules/psb_realloc_mod.f90 @@ -0,0 +1,398 @@ +module psb_realloc_mod + implicit none + + Interface psb_realloc + module procedure psb_dreallocate1i + module procedure psb_dreallocate2i + module procedure psb_dreallocate2i1d + module procedure psb_dreallocate1d + module procedure psb_dreallocated2 + end Interface + + Interface psb_realloc1it + module procedure psb_dreallocate1it + end Interface + +Contains + + Subroutine psb_dreallocate1i(len,rrax,info,pad) + use psb_error_mod + + ! ...Subroutine Arguments + Integer,Intent(in) :: len + Integer,pointer :: rrax(:) + integer :: info + integer, optional, intent(in) :: pad + ! ...Local Variables + Integer,Pointer :: tmp(:) + Integer :: dim, err_act, err,i + character(len=20) :: name + + name='psb_dreallocate1i' + call psb_erractionsave(err_act) + + info=0 + if (associated(rrax)) then + dim=size(rrax) + If (dim /= len) Then + Allocate(tmp(len),stat=info) + if (info /= 0) then + err=4000 + call psb_errpush(err,name) + goto 9999 + end if +!!$ write(0,*) 'IA: copying ',len,dim + if (.true.) then + do i=1, min(len,dim) + tmp(i)=rrax(i) + end do + else + tmp(1:min(len,dim))=rrax(1:min(len,dim)) + end if +!!$ write(0,*) 'IA: copying done' + Deallocate(rrax,stat=info) + if (info /= 0) then + err=4000 + call psb_errpush(err,name) + goto 9999 + end if + rrax=>tmp + End If + else +!!$ write(0,*) 'IA: allocating ',len + allocate(rrax(len),stat=info) + if (info /= 0) then + err=4000 + call psb_errpush(err,name) + goto 9999 + end if + endif + if (present(pad)) then +!!$ write(0,*) 'IA: padding' + rrax(dim+1:len) = pad + endif + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_ret) then + return + else + call psb_error() + end if + return + + + End Subroutine psb_dreallocate1i + + + + + + Subroutine psb_dreallocate1d(len,rrax,info,pad) + use psb_error_mod + + ! ...Subroutine Arguments + Integer,Intent(in) :: len + Real(kind(1.d0)),pointer :: rrax(:) + integer :: info + real(kind(1.d0)), optional, intent(in) :: pad + + ! ...Local Variables + Real(kind(1.d0)),Pointer :: tmp(:) + Integer :: dim,err_act,err,i, m + character(len=20) :: name + + name='psb_dreallocate1d' + call psb_erractionsave(err_act) + + if (associated(rrax)) then + dim=size(rrax) + + If (dim /= len) Then + Allocate(tmp(len),stat=info) + if (info /= 0) then + err=4000 + call psb_errpush(err,name) + goto 9999 + end if + m = min(dim,len) +!!$ write(0,*) 'DA: copying ',min(len,dim) + if (.true.) then + do i=1,m + tmp(i) = rrax(i) + end do + else + tmp(1:m) = rrax(1:m) + end if +!!$ write(0,*) 'DA: copying done ',m + Deallocate(rrax,stat=info) + if (info /= 0) then + err=4000 + call psb_errpush(err,name) + goto 9999 + end if + rrax=>tmp + End If + else + dim = 0 + Allocate(rrax(len),stat=info) + if (info /= 0) then + err=4000 + call psb_errpush(err,name) + goto 9999 + end if + endif + if (present(pad)) then + rrax(dim+1:len) = pad + endif + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_ret) then + return + else + call psb_error() + end if + return + + End Subroutine psb_dreallocate1d + + + + Subroutine psb_dreallocated2(len1,len2,rrax,info,pad) + use psb_error_mod + ! ...Subroutine Arguments + Integer,Intent(in) :: len1,len2 + Real(kind(1.d0)),pointer :: rrax(:,:) + integer :: info + real(kind(1.d0)), optional, intent(in) :: pad + + ! ...Local Variables + Real(kind(1.d0)),Pointer :: tmp(:,:) + Integer :: dim,err_act,err,i, m + character(len=20) :: name + + name='psb_dreallocated2' + call psb_erractionsave(err_act) + + if (associated(rrax)) then + dim=size(rrax,1) + + If (dim /= len1) Then + Allocate(tmp(len1,len2),stat=info) + if (info /= 0) then + err=4000 + call psb_errpush(err,name) + goto 9999 + end if + m = min(dim,len1) +!!$ write(0,*) 'DA: copying ',min(len,dim) + if (.true.) then + do i=1,m + tmp(i,:) = rrax(i,:) + end do + else + tmp(1:m,:) = rrax(1:m,:) + end if +!!$ write(0,*) 'DA: copying done ',m + Deallocate(rrax,stat=info) + if (info /= 0) then + err=4000 + call psb_errpush(err,name) + goto 9999 + end if + rrax=>tmp + End If + else + dim = 0 + Allocate(rrax(len1,len2),stat=info) + if (info /= 0) then + err=4000 + call psb_errpush(err,name) + goto 9999 + end if + endif + if (present(pad)) then + rrax(dim+1:len1,:) = pad + endif + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_ret) then + return + else + call psb_error() + end if + return + + End Subroutine psb_dreallocated2 + + + Subroutine psb_dreallocate2i(len,rrax,y,info,pad) + use psb_error_mod + ! ...Subroutine Arguments + + Integer,Intent(in) :: len + Integer,pointer :: rrax(:),y(:) + integer :: info + integer, optional, intent(in) :: pad + character(len=20) :: name + integer :: err_act, err + + name='psb_dreallocate2i' + call psb_erractionsave(err_act) + + info=0 + call psb_dreallocate1i(len,rrax,info,pad=pad) + if (info /= 0) then + err=4000 + call psb_errpush(err,name) + goto 9999 + end if + call psb_dreallocate1i(len,y,info,pad=pad) + if (info /= 0) then + err=4000 + call psb_errpush(err,name) + goto 9999 + end if + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_ret) then + return + else + call psb_error() + end if + return + + End Subroutine psb_dreallocate2i + + + + + Subroutine psb_dreallocate2i1d(len,rrax,y,z,info) + use psb_error_mod + ! ...Subroutine Arguments + Integer,Intent(in) :: len + Integer,pointer :: rrax(:),y(:) + Real(Kind(1.d0)),pointer :: z(:) + integer :: info + character(len=20) :: name + integer :: err_act, err + + name='psb_dreallocate2i1d' + call psb_erractionsave(err_act) + + + info = 0 + call psb_dreallocate1i(len,rrax,info) + if (info /= 0) then + err=4000 + call psb_errpush(err,name) + goto 9999 + end if + call psb_dreallocate1i(len,y,info) + if (info /= 0) then + err=4000 + call psb_errpush(err,name) + goto 9999 + end if + call psb_dreallocate1d(len,z,info) + if (info /= 0) then + err=4000 + call psb_errpush(err,name) + goto 9999 + end if + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_ret) then + return + else + call psb_error() + end if + return + End Subroutine psb_dreallocate2i1d + + + Subroutine psb_dreallocate1it(len,rrax,info,pad) + use psb_error_mod + ! ...Subroutine Arguments + Integer,Intent(in) :: len + Integer,pointer :: rrax(:) + integer :: info + integer, optional, intent(in) :: pad + ! ...Local Variables + Integer,Pointer :: tmp(:) + Integer :: dim,err_act,err + character(len=20) :: name + + name='psb_dreallocate1it' + call psb_erractionsave(err_act) + + info=0 + if (associated(rrax)) then + dim=size(rrax) + If (dim /= len) Then + Allocate(tmp(len),stat=info) + if (info /= 0) then + err=4000 + call psb_errpush(err,name) + goto 9999 + end if +!!$ write(0,*) 'IA: copying ',min(len,dim) + tmp(1:min(len,dim))=rrax(1:min(len,dim)) +!!$ write(0,*) 'IA: copying done' + Deallocate(rrax,stat=info) + if (info /= 0) then + err=4000 + call psb_errpush(err,name) + goto 9999 + end if + rrax=>tmp + End If + else + allocate(rrax(len),stat=info) + if (info /= 0) then + err=4000 + call psb_errpush(err,name) + goto 9999 + end if + endif + if (present(pad)) then +!!$ write(0,*) 'IA: padding' + rrax(dim+1:len) = pad + endif + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_ret) then + return + else + call psb_error() + end if + return + + End Subroutine psb_dreallocate1it + + +end module psb_realloc_mod diff --git a/src/modules/psb_serial_mod.f90 b/src/modules/psb_serial_mod.f90 new file mode 100644 index 00000000..56ecabb4 --- /dev/null +++ b/src/modules/psb_serial_mod.f90 @@ -0,0 +1,211 @@ +module psb_serial_mod + use psb_spmat_type + use psb_string_mod + + interface psb_csdp + subroutine psb_dcsdp(a, b,info,ifc,check,trans,unitd) + use psb_spmat_type + type(psb_dspmat_type), intent(in) :: a + type(psb_dspmat_type), intent(inout) :: b + integer, intent(out) :: info + integer, intent(in), optional :: ifc + character, intent(in), optional :: check,trans,unitd + end subroutine psb_dcsdp + end interface + + interface psb_csrws + subroutine psb_dcsrws(rw,a,info,trans) + use psb_spmat_type + type(psb_dspmat_type) :: a + real(kind(1.d0)), pointer :: rw(:) + integer :: info + character, optional :: trans + end subroutine psb_dcsrws + end interface + + + + interface psb_cssm + subroutine psb_dcssm(alpha,t,b,beta,c,info,trans,unitd,d) + use psb_spmat_type + type(psb_dspmat_type) :: t + real(kind(1.d0)) :: alpha, beta, b(:,:), c(:,:) + integer :: info + character, optional :: trans, unitd + real(kind(1.d0)), optional, target :: d(:) + end subroutine psb_dcssm + subroutine psb_dcssv(alpha,t,b,beta,c,info,trans,unitd,d) + use psb_spmat_type + type(psb_dspmat_type) :: t + real(kind(1.d0)) :: alpha, beta, b(:), c(:) + integer :: info + character, optional :: trans, unitd + real(kind(1.d0)), optional, target :: d(:) + end subroutine psb_dcssv + end interface + + interface psb_csmm + subroutine psb_dcsmv(alpha,a,b,beta,c,info,trans) + use psb_spmat_type + type(psb_dspmat_type) :: a + real(kind(1.d0)) :: alpha, beta, b(:), c(:) + integer :: info + character, optional :: trans + end subroutine psb_dcsmv + subroutine psb_dcsmm(alpha,a,b,beta,c,info,trans) + use psb_spmat_type + type(psb_dspmat_type) :: a + real(kind(1.d0)) :: alpha, beta, b(:,:), c(:,:) + integer :: info + character, optional :: trans + end subroutine psb_dcsmm + end interface + + interface psb_fixcoo + subroutine psb_dfixcoo(a,info) + use psb_spmat_type + type(psb_dspmat_type), intent(inout) :: a + integer, intent(out) :: info + end subroutine psb_dfixcoo + end interface + + interface psb_ipcoo2csr + subroutine psb_dipcoo2csr(a,info,rwshr) + use psb_spmat_type + type(psb_dspmat_type), intent(inout) :: a + integer, intent(out) :: info + logical, optional :: rwshr + end subroutine psb_dipcoo2csr + end interface + + interface psb_ipcsr2coo + subroutine psb_dipcsr2coo(a,info) + use psb_spmat_type + type(psb_dspmat_type), intent(inout) :: a + integer, intent(out) :: info + end subroutine psb_dipcsr2coo + end interface + + interface psb_csprt + subroutine psb_dcsprt(iout,a,iv,irs,ics,head,ivr,ivc) + use psb_spmat_type + integer, intent(in) :: iout + type(psb_dspmat_type), intent(in) :: a + integer, intent(in), optional :: iv(:) + integer, intent(in), optional :: irs,ics + character(len=*), optional :: head + integer, intent(in), optional :: ivr(:),ivc(:) + end subroutine psb_dcsprt + end interface + + interface psb_spgtdiag + subroutine psb_dspgtdiag(a,d,info) + use psb_spmat_type + type(psb_dspmat_type), intent(in) :: a + real(kind(1.d0)), intent(inout) :: d(:) + integer, intent(out) :: info + end subroutine psb_dspgtdiag + end interface + + interface psb_spscal + subroutine psb_dspscal(a,d,info) + use psb_spmat_type + type(psb_dspmat_type), intent(inout) :: a + real(kind(1.d0)), intent(in) :: d(:) + integer, intent(out) :: info + end subroutine psb_dspscal + end interface + + + interface psb_spinfo + subroutine psb_dspinfo(ireq,a,ires,info,iaux) + use psb_spmat_type + type(psb_dspmat_type), intent(in) :: a + integer, intent(in) :: ireq + integer, intent(out) :: ires + integer, intent(out) :: info + integer, intent(in), optional :: iaux + end subroutine psb_dspinfo + end interface + + interface psb_spgtrow + subroutine psb_dspgtrow(irw,a,b,info,append,iren,lrw) + use psb_spmat_type + type(psb_dspmat_type), intent(in) :: a + integer, intent(in) :: irw + type(psb_dspmat_type), intent(inout) :: b + logical, intent(in), optional :: append + integer, intent(in), target, optional :: iren(:) + integer, intent(in), optional :: lrw + integer, intent(out) :: info + end subroutine psb_dspgtrow + end interface + + interface psb_neigh + subroutine psb_dneigh(a,idx,neigh,n,info,lev) + use psb_spmat_type + type(psb_dspmat_type), intent(in) :: a + integer, intent(in) :: idx + integer, intent(out) :: n + integer, pointer :: neigh(:) + integer, intent(out) :: info + integer, optional, intent(in) :: lev + end subroutine psb_dneigh + end interface + + interface psb_coins + subroutine psb_dcoins(nz,ia,ja,val,a,gtl,imin,imax,jmin,jmax,info) + use psb_spmat_type + integer, intent(in) :: nz, imin,imax,jmin,jmax + integer, intent(in) :: ia(:),ja(:),gtl(:) + real(kind(1.d0)), intent(in) :: val(:) + type(psb_dspmat_type), intent(inout) :: a + integer, intent(out) :: info + end subroutine psb_dcoins + end interface + + + interface psb_symbmm + subroutine psb_dsymbmm(a,b,c) + use psb_spmat_type + type(psb_dspmat_type) :: a,b,c + end subroutine psb_dsymbmm + end interface + + interface psb_numbmm + subroutine psb_dnumbmm(a,b,c) + use psb_spmat_type + type(psb_dspmat_type) :: a,b,c + end subroutine psb_dnumbmm + end interface + + interface psb_transp + subroutine psb_dtransp(a,b,c,fmt) + use psb_spmat_type + type(psb_dspmat_type) :: a,b + integer, optional :: c + character(len=*), optional :: fmt + end subroutine psb_dtransp + end interface + + interface psb_rwextd + subroutine psb_drwextd(nr,a,info,b) + use psb_spmat_type + integer, intent(in) :: nr + type(psb_dspmat_type), intent(inout) :: a + integer, intent(out) :: info + type(psb_dspmat_type), intent(in), optional :: b + end subroutine psb_drwextd + end interface + + interface psb_csnmi + real(kind(1.d0)) function psb_dcsnmi(a,info,trans) + use psb_spmat_type + type(psb_dspmat_type), intent(in) :: a + integer, intent(out) :: info + character, optional :: trans + end function psb_dcsnmi + end interface + +end module psb_serial_mod + diff --git a/src/modules/psb_sparse_mod.f90 b/src/modules/psb_sparse_mod.f90 new file mode 100644 index 00000000..c299c20e --- /dev/null +++ b/src/modules/psb_sparse_mod.f90 @@ -0,0 +1,16 @@ +module psb_sparse_mod + use psb_typedesc + use psb_typeprec + use psb_serial_mod + use psb_tools_mod + use psb_psblas_mod + use psb_prec_mod + use psb_methd_mod + use psb_error_mod + use psb_string +end module psb_sparse_mod + + + + + diff --git a/src/modules/psb_spmat_type.f90 b/src/modules/psb_spmat_type.f90 new file mode 100644 index 00000000..bb64931f --- /dev/null +++ b/src/modules/psb_spmat_type.f90 @@ -0,0 +1,341 @@ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!! Module to define D_SPMAT, structure !! +!! for sparse matrix. !! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +module psb_spmat_type + use psb_error_mod + use psb_realloc_mod + use psb_const_mod +! Typedef: psb_dspmat_type +! Contains a sparse matrix + type psb_dspmat_type + ! Rows & columns + integer :: m, k + ! Identify the representation method. Es: CSR, JAD, ... + character(len=5) :: fida + ! describe some chacteristics of sparse matrix + character(len=11) :: descra + ! Contains some additional informations on sparse matrix + integer :: infoa(10) + ! Contains sparse matrix coefficients + real(kind(1.d0)), pointer :: aspk(:)=>null() + ! Contains indeces that describes sparse matrix structure + integer, pointer :: ia1(:)=>null(), ia2(:)=>null() + ! Permutations matrix + integer, pointer :: pl(:)=>null(), pr(:)=>null() + end type psb_dspmat_type + + interface psb_nullify_sp + module procedure psb_nullify_dsp + end interface + + interface psb_spclone + module procedure psb_dspclone + end interface + + interface psb_spreall + module procedure psb_dspreallocate, psb_dspreall3 + end interface + + interface psb_spall + module procedure psb_dspallocate, psb_dspall3, psb_dspallmk, psb_dspallmknz + end interface + + interface psb_spfree + module procedure psb_dspfree + end interface + + interface psb_spreinit + module procedure psb_dspreinit + end interface + +contains + + subroutine psb_nullify_dsp(mat) + implicit none + type(psb_dspmat_type), intent(inout) :: mat + + nullify(mat%aspk,mat%ia1,mat%ia2,mat%pl,mat%pr) + mat%m=0 + mat%k=0 + mat%fida='' + mat%descra='' + + end subroutine psb_nullify_dsp + + Subroutine psb_dspreinit(a) + implicit none + !....Parameters... + Type(psb_dspmat_type), intent(inout) :: A + + !locals + logical, parameter :: debug=.false. + + if (debug) write(0,*) 'spreinit init ',a%fida,a%infoa(psb_nnz_) + if (a%fida=='COO') a%infoa(psb_nnz_) = 0 + if (associated(a%aspk)) a%aspk(:) = 0.d0 + if (debug) write(0,*) 'spreinit end ',a%fida,a%infoa(psb_nnz_) + + end Subroutine psb_dspreinit + + Subroutine psb_dspallocate(a, nnz,info) + implicit none + !....Parameters... + Type(psb_dspmat_type), intent(inout) :: A + Integer, intent(in) :: nnz + integer, intent(out) :: info + + !locals + logical, parameter :: debug=.false. + + info = 0 + if (nnz.lt.0) then + info=45 + return + Endif + if (debug) write(0,*) 'SPALL : NNZ ',nnz,a%m,a%k + call psb_spreall(a,nnz,info) + + a%pl(1)=0 + a%pr(1)=0 + ! set INFOA fields + a%fida = 'COO' + a%descra = 'GUN' + a%infoa(:) = 0 + a%m = 0 + a%k = 0 + if (debug) write(0,*) 'SPALL : end' + Return + + End Subroutine psb_dspallocate + + Subroutine psb_dspallmk(m,k,a,info) + implicit none + !....Parameters... + + Type(psb_dspmat_type), intent(inout) :: A + Integer, intent(in) :: m,k + Integer, intent(out) :: info + + !locals + logical, parameter :: debug=.false. + integer :: nnz + + INFO = 0 + nnz = 2*max(1,m,k) + if (debug) write(0,*) 'SPALL : NNZ ',nnz,a%m,a%k + a%m=max(0,m) + a%k=max(0,k) + call psb_spreall(a,nnz,info) + + a%pl(1)=0 + a%pr(1)=0 + ! set INFOA fields + a%fida = 'COO' + a%descra = 'GUN' + a%infoa(:) = 0 + if (debug) write(0,*) 'SPALL : end' + Return + + end subroutine psb_dspallmk + + Subroutine psb_dspallmknz(m,k,a, nnz,info) + implicit none + !....parameters... + + type(psb_dspmat_type), intent(inout) :: a + integer, intent(in) :: m,k,nnz + integer, intent(out) :: info + + !locals + logical, parameter :: debug=.false. + + info = 0 + if (nnz.lt.0) then + info=45 + return + endif + if (debug) write(0,*) 'spall : nnz ',nnz,a%m,a%k + a%m=max(0,m) + a%k=max(0,k) + call psb_spreall(a,nnz,info) + + a%pl(1)=0 + a%pr(1)=0 + ! set infoa fields + a%fida = 'COO' + a%descra = 'GUN' + a%infoa(:) = 0 + if (debug) write(0,*) 'spall : end' + return + + end subroutine psb_dspallmknz + + + subroutine psb_dspall3(a, ni1,ni2,nd,info) + implicit none + !....Parameters... + Type(psb_dspmat_type), intent(inout) :: A + Integer, intent(in) :: ni1,ni2,nd + Integer, intent(out) :: info + + !locals + logical, parameter :: debug=.false. + + info = 0 + + call psb_spreall(a, ni1,ni2,nd,info) + + a%pl(1)=0 + a%pr(1)=0 + ! set INFOA fields + a%fida = 'COO' + a%descra = 'GUN' + a%infoa(:) = 0 + a%m = 0 + a%k = 0 + if (debug) write(0,*) 'SPALL : end' + Return + + End Subroutine psb_dspall3 + + + subroutine psb_dspreallocate(a, nnz,info,ifc) + implicit none + !....Parameters... + Type(psb_dspmat_type), intent(inout) :: A + Integer, intent(in) :: NNZ + Integer, intent(out) :: info + ! + ! ifc is used here to allocate space in IA1 for smart + ! regeneration. This probably ought to be changed, + ! by adding a new component to d_spmat, or by making + ! infoa a pointer. + ! + Integer, intent(in), optional :: ifc + integer :: ifc_ + + !locals + logical, parameter :: debug=.false. + + info = 0 + if (nnz.lt.0) then + info=45 + return + endif + if (present(ifc)) then + ifc_ = max(1,ifc) + else + ifc_ = 1 + endif + + if (ifc_ == 1) then + call psrealloc(nnz,a%ia1,a%ia2,a%aspk,info) + else + call psrealloc(nnz,a%aspk,info) + if (info /= 0) return + call psrealloc(nnz,a%ia2,info) + if (info /= 0) return + call psrealloc(ifc*nnz+200,a%ia1,info) + if (info /= 0) return + end if + if (info /= 0) return + call psrealloc(max(1,a%m),a%pl,info) + if (info /= 0) return + call psrealloc(max(1,a%k),a%pr,info) + if (info /= 0) return + + Return + + End Subroutine psb_dspreallocate + + subroutine psb_dspreall3(a, ni1,ni2,nd,info) + implicit none + !....Parameters... + Type(psb_dspmat_type), intent(inout) :: A + Integer, intent(in) :: ni1,ni2,nd + Integer, intent(inout) :: info + + !locals + logical, parameter :: debug=.false. + + info = 0 + call psrealloc(nd,a%aspk,info) + if (info /= 0) return + call psrealloc(ni2,a%ia2,info) + if (info /= 0) return + call psrealloc(ni1,a%ia1,info) + if (info /= 0) return + call psrealloc(max(1,a%m),a%pl,info) + if (info /= 0) return + call psrealloc(max(1,a%k),a%pr,info) + if (info /= 0) return + + Return + + End Subroutine psb_dspreall3 + + + subroutine psb_dspclone(a, b,info) + implicit none + !....Parameters... + Type(psb_dspmat_type), intent(in) :: A + Type(psb_dspmat_type), intent(out) :: B + Integer, intent(out) :: info + + !locals + Integer :: nza,nz1, nz2, nzl, nzr + logical, parameter :: debug=.false. + + INFO = 0 + + nza = size(a%aspk) + nz1 = size(a%ia1) + nz2 = size(a%ia2) + nzl = size(a%pl) + nzr = size(a%pr) + allocate(b%aspk(nza),b%ia1(nz1),b%ia2(nz2),& + & b%pl(nzl),b%pr(nzr),stat=info) + if (info /= 0) then + info=2023 + return + Endif + b%aspk(:) = a%aspk(:) + b%ia1(:) = a%ia1(:) + b%ia2(:) = a%ia2(:) + b%pl(:) = a%pl(:) + b%pr(:) = a%pr(:) + b%infoa(:) = a%infoa(:) + b%fida = a%fida + b%descra = a%descra + b%m = a%m + b%k = a%k + + Return + + End Subroutine psb_dspclone + + + subroutine psb_dspfree(a,info) + implicit none + !....Parameters... + Type(psb_dspmat_type), intent(inout) :: A + Integer, intent(out) :: info + + !locals + logical, parameter :: debug=.false. + + INFO = 0 + + deallocate(a%aspk,a%ia1,a%ia2,a%pr,a%pl,STAT=INFO) + + call psb_nullify_sp(a) + + Return + + End Subroutine psb_dspfree + + +end module psb_spmat_type + diff --git a/src/modules/psb_string_mod.f90 b/src/modules/psb_string_mod.f90 new file mode 100644 index 00000000..3bbafc90 --- /dev/null +++ b/src/modules/psb_string_mod.f90 @@ -0,0 +1,20 @@ +module psb_string_mod + interface tolower + function tolowerc(string) + character(len=*), intent(in) :: string + character(len=len(string)) :: tolowerc + end function tolowerc + end interface + interface toupper + function toupperc(string) + character(len=*), intent(in) :: string + character(len=len(string)) :: toupperc + end function toupperc + end interface + interface touppers + subroutine sub_toupperc(string,strout) + character(len=*), intent(in) :: string + character(len=*), intent(out) :: strout + end subroutine sub_toupperc + end interface +end module psb_string_mod diff --git a/src/modules/psb_tools_mod.f90 b/src/modules/psb_tools_mod.f90 new file mode 100644 index 00000000..97ff1099 --- /dev/null +++ b/src/modules/psb_tools_mod.f90 @@ -0,0 +1,504 @@ +Module psb_tools_mod + use psb_const_mod + + interface psb_alloc + ! 2-D double precision version + subroutine psb_dalloc(m, n, x, desc_a, info, js) + use psb_descriptor_type + implicit none + integer, intent(in) :: m,n + real(kind(1.d0)), pointer :: x(:,:) + type(psb_desc_type), intent(inout) :: desc_a + integer :: info + integer, optional, intent(in) :: js + end subroutine psb_dalloc + ! 1-D double precision version + subroutine psb_dallocv(m, x, desc_a,info) + use psb_descriptor_type + integer, intent(in) :: m + real(kind(1.d0)), pointer :: x(:) + type(psb_desc_type), intent(in):: desc_a + integer :: info + end subroutine psb_dallocv + ! 2-D integer version + subroutine psb_ialloc(m, n, x, desc_a, info,js) + use psb_descriptor_type + integer, intent(in) :: m,n + integer, pointer :: x(:,:) + type(psb_desc_type), intent(inout) :: desc_a + integer, intent(out) :: info + integer, optional, intent(in) :: js + end subroutine psb_ialloc + subroutine psb_iallocv(m, x, desc_a,info) + use psb_descriptor_type + integer, intent(in) :: m + integer, pointer :: x(:) + type(psb_desc_type), intent(in):: desc_a + integer :: info + end subroutine psb_iallocv + end interface + + + interface psb_asb + ! 2-D double precision version + subroutine psb_dasb(x, desc_a, info) + use psb_descriptor_type + type(psb_desc_type), intent(in) :: desc_a + real(kind(1.d0)), pointer :: x(:,:) + integer, intent(out) :: info + end subroutine psb_dasb + ! 1-D double precision version + subroutine psb_dasbv(x, desc_a, info) + use psb_descriptor_type + type(psb_desc_type), intent(in) :: desc_a + real(kind(1.d0)), pointer :: x(:) + integer, intent(out) :: info + end subroutine psb_dasbv + ! 2-D integer version + subroutine psb_iasb(x, desc_a, info) + use psb_descriptor_type + type(psb_desc_type), intent(in) :: desc_a + integer, pointer :: x(:,:) + integer, intent(out) :: info + end subroutine psb_iasb + ! 1-D integer version + subroutine psb_iasbv(x, desc_a, info) + use psb_descriptor_type + type(psb_desc_type), intent(in) :: desc_a + integer, pointer :: x(:) + integer, intent(out) :: info + end subroutine psb_iasbv + end interface + + interface psb_csrovr + Subroutine psb_dcsrovr(a,desc_a,blk,info,rwcnv,clcnv,outfmt) + use psb_descriptor_type + use psb_spmat_type + Type(psb_dspmat_type),Intent(in) :: a + Type(psb_dspmat_type),Intent(inout) :: blk + Type(psb_desc_type),Intent(in) :: desc_a + integer, intent(out) :: info + logical, optional, intent(in) :: rwcnv,clcnv + character(len=5), optional :: outfmt + end Subroutine psb_dcsrovr + end interface + + + interface psb_csrp + subroutine psb_dcsrp(trans,iperm,a, desc_a, info) + use psb_descriptor_type + use psb_spmat_type + type(psb_dspmat_type), intent(inout) :: a + type(psb_desc_type), intent(in) :: desc_a + integer, intent(inout) :: iperm(:), info + character, intent(in) :: trans + end subroutine psb_dcsrp + end interface + + + interface psb_descasb + Subroutine psb_descasb(n_ovr,desc_p,desc_a,a,& + & l_tmp_halo,l_tmp_ovr_idx,lworks,lworkr,info) + use psb_descriptor_type + use psb_spmat_type + type(psb_dspmat_type),intent(in) :: a + type(psb_desc_type),intent(in) :: desc_a + type(psb_desc_type),intent(inout) :: desc_p + integer,intent(in) :: n_ovr + Integer, Intent(in) :: l_tmp_halo,l_tmp_ovr_idx + Integer, Intent(inout) :: lworks, lworkr + integer, intent(out) :: info + end Subroutine psb_descasb + end interface + + + interface psb_descprt + subroutine psb_descprt(iout,desc_p,glob,short) + use psb_const_mod + use psb_descriptor_type + implicit none + type(psb_desc_type), intent(in) :: desc_p + integer, intent(in) :: iout + logical, intent(in), optional :: glob,short + end subroutine psb_descprt + end interface + + + interface psb_free + ! 2-D double precision version + subroutine psb_dfree(x, desc_a, info) + use psb_descriptor_type + real(kind(1.d0)),pointer :: x(:,:) + type(psb_desc_type), intent(in) :: desc_a + integer :: info + end subroutine psb_dfree + ! 1-D double precision version + subroutine psb_dfreev(x, desc_a, info) + use psb_descriptor_type + real(kind(1.d0)),pointer :: x(:) + type(psb_desc_type), intent(in) :: desc_a + integer :: info + end subroutine psb_dfreev + ! 2-D integer version + subroutine psb_ifree(x, desc_a, info) + use psb_descriptor_type + integer,pointer :: x(:,:) + type(psb_desc_type), intent(in) :: desc_a + integer :: info + end subroutine psb_ifree + ! 1-D integer version + subroutine psb_ifreev(x, desc_a, info) + use psb_descriptor_type + integer, pointer :: x(:) + type(psb_desc_type), intent(in) :: desc_a + integer :: info + end subroutine psb_ifreev + end interface + + + interface psb_gelp + ! 2-D version + subroutine psb_dgelp(trans,iperm,x,desc_a,info) + use psb_descriptor_type + type(psb_desc_type), intent(in) :: desc_a + real(kind(1.d0)), intent(inout) :: x(:,:) + integer, intent(inout) :: iperm(:),info + character, intent(in) :: trans + end subroutine psb_dgelp + ! 1-D version + subroutine psb_dgelpv(trans,iperm,x,desc_a,info) + use psb_descriptor_type + type(psb_desc_type), intent(in) :: desc_a + real(kind(1.d0)), intent(inout) :: x(:) + integer, intent(inout) :: iperm(:), info + character, intent(in) :: trans + end subroutine psb_dgelpv + end interface + + + interface psb_ins + ! 2-D double precision version + subroutine psb_dins(m, n, x, ix, jx, blck, desc_a, info,& + & iblck, jblck) + use psb_descriptor_type + integer, intent(in) :: m,n + type(psb_desc_type), intent(in) :: desc_a + real(kind(1.d0)),pointer :: x(:,:) + integer, intent(in) :: ix,jx + real(kind(1.d0)), intent(in) :: blck(:,:) + integer,intent(out) :: info + integer, optional, intent(in) :: iblck,jblck + end subroutine psb_dins + ! 2-D double precision square version + subroutine psb_dinsvm(m, x, ix, jx, blck, desc_a,info,& + & iblck) + use psb_descriptor_type + integer, intent(in) :: m + type(psb_desc_type), intent(in) :: desc_a + real(kind(1.d0)),pointer :: x(:,:) + integer, intent(in) :: ix,jx + real(kind(1.d0)), intent(in) :: blck(:) + integer, intent(out) :: info + integer, optional, intent(in) :: iblck + end subroutine psb_dinsvm + ! 1-D double precision version + subroutine psb_dinsvv(m, x, ix, blck, desc_a, info,& + & iblck,insflag) + use psb_descriptor_type + integer, intent(in) :: m + type(psb_desc_type), intent(in) :: desc_a + real(kind(1.d0)),pointer :: x(:) + integer, intent(in) :: ix + real(kind(1.d0)), intent(in) :: blck(:) + integer, intent(out) :: info + integer, optional, intent(in) :: iblck + integer, optional, intent(in) :: insflag + end subroutine psb_dinsvv + ! 2-D integer version + subroutine psb_iins(m, n, x, ix, jx, blck, desc_a, info,& + & iblck, jblck) + use psb_descriptor_type + integer, intent(in) :: m,n + type(psb_desc_type), intent(in) :: desc_a + integer,pointer :: x(:,:) + integer, intent(in) :: ix,jx + integer, intent(in) :: blck(:,:) + integer,intent(out) :: info + integer, optional, intent(in) :: iblck,jblck + end subroutine psb_iins + ! 2-D integer square version + subroutine psb_iinsvm(m, x, ix, jx, blck, desc_a,info,& + & iblck) + use psb_descriptor_type + integer, intent(in) :: m + type(psb_desc_type), intent(in) :: desc_a + integer, pointer :: x(:,:) + integer, intent(in) :: ix,jx + integer, intent(in) :: blck(:) + integer, intent(out) :: info + integer, optional, intent(in) :: iblck + end subroutine psb_iinsvm + ! 1-D integer version + subroutine psb_iinsvv(m, x, ix, blck, desc_a, info,& + & iblck,insflag) + use psb_descriptor_type + integer, intent(in) :: m + type(psb_desc_type), intent(in) :: desc_a + integer, pointer :: x(:) + integer, intent(in) :: ix + integer, intent(in) :: blck(:) + integer, intent(out) :: info + integer, optional, intent(in) :: iblck + integer, optional, intent(in) :: insflag + end subroutine psb_iinsvv + end interface + + + + interface psb_ptins + subroutine psb_dptins(ia,ja,blck,desc_a,info) + use psb_descriptor_type + use psb_spmat_type + implicit none + type(psb_desc_type), intent(inout) :: desc_a + integer, intent(in) :: ia,ja + type(psb_dspmat_type), intent(in) :: blck + integer,intent(out) :: info + end subroutine psb_dptins + end interface + + interface psb_dscall + subroutine psb_dscall(m, n, parts, icontxt, desc_a, info) + use psb_descriptor_type + Integer, intent(in) :: M,N,ICONTXT + Type(psb_desc_type), intent(out) :: desc_a + integer, intent(out) :: info + end subroutine psb_dscall + end interface + + + interface psb_scalv + subroutine psb_dscalv(m, v, icontxt, desc_a, info, flag) + use psb_descriptor_type + Integer, intent(in) :: m,icontxt, v(:) + integer, intent(in), optional :: flag + integer, intent(out) :: info + Type(psb_desc_type), intent(out) :: desc_a + end subroutine psb_dscalv + end interface + + + interface psb_dscasb + subroutine psb_dscasb(desc_a,info) + use psb_descriptor_type + Type(psb_desc_type), intent(inout) :: desc_a + integer, intent(out) :: info + end subroutine psb_dscasb + end interface + + + + interface psb_dsccpy + subroutine psb_dsccpy(desc_out, desc_a, info) + use psb_descriptor_type + type(psb_desc_type), intent(out) :: desc_out + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + end subroutine psb_dsccpy + end interface + + + interface psb_dscfree + subroutine psb_dscfree(desc_a,info) + use psb_descriptor_type + type(psb_desc_type), intent(inout) :: desc_a + integer, intent(out) :: info + end subroutine psb_dscfree + end interface + + interface psb_dscins + subroutine psb_dscins(nz,ia,ja,desc_a,info,is,js) + use psb_descriptor_type + type(psb_desc_type), intent(inout) :: desc_a + Integer, intent(in) :: nz,IA(:),JA(:) + integer, intent(out) :: info + integer, intent(in), optional :: is,js + end subroutine psb_dscins + end interface + + + interface psb_dscov + Subroutine psb_dscov(a,desc_a,novr,desc_ov,info) + use psb_descriptor_type + Use psb_spmat_type + integer, intent(in) :: novr + Type(psb_dspmat_type), Intent(in) :: a + Type(psb_desc_type), Intent(in) :: desc_a + Type(psb_desc_type), Intent(inout) :: desc_ov + integer, intent(out) :: info + end Subroutine psb_dscov + end interface + + + interface psb_dscren + subroutine psb_dscren(trans,iperm,desc_a,info) + use psb_descriptor_type + type(psb_desc_type), intent(inout) :: desc_a + integer, intent(inout) :: iperm(:) + character, intent(in) :: trans + integer, intent(out) :: info + end subroutine psb_dscren + end interface + + interface psb_spalloc + subroutine psb_dspalloc(a, desc_a, info, nnz) + use psb_descriptor_type + use psb_spmat_type + type(psb_desc_type), intent(inout) :: desc_a + type(psb_dspmat_type), intent(out) :: a + integer, intent(out) :: info + integer, optional, intent(in) :: nnz + end subroutine psb_dspalloc + end interface + + interface psb_spasb + subroutine psb_dspasb(a,desc_a, info, afmt, up, dup) + use psb_descriptor_type + use psb_spmat_type + type(psb_dspmat_type), intent (inout) :: a + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + integer,optional, intent(in) :: dup + character, optional, intent(in) :: afmt*5, up + end subroutine psb_dspasb + end interface + + + interface psb_spcnv + subroutine psb_dspcnv(a,b,desc_a,info) + use psb_descriptor_type + use psb_spmat_type + type(psb_dspmat_type), intent(in) :: a + type(psb_dspmat_type), intent(out) :: b + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + end subroutine psb_dspcnv + end interface + + + interface psb_spfree + subroutine psb_dspfree(a, desc_a,info) + use psb_descriptor_type + use psb_spmat_type + type(psb_desc_type), intent(in) :: desc_a + type(psb_dspmat_type), intent(inout) ::a + integer, intent(out) :: info + end subroutine psb_dspfree + end interface + + + interface psb_spins + subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,is,js) + use psb_descriptor_type + use psb_spmat_type + type(psb_desc_type), intent(inout) :: desc_a + type(psb_dspmat_type), intent(inout) :: a + integer, intent(in) :: nz,ia(:),ja(:) + real(kind(1.d0)), intent(in) :: val(:) + integer, intent(out) :: info + integer, intent(in), optional :: is,js + end subroutine psb_dspins + end interface + + + interface psb_sprn + subroutine psb_dsprn(a, desc_a,info) + use psb_descriptor_type + use psb_spmat_type + type(psb_desc_type), intent(in) :: desc_a + type(psb_dspmat_type), intent(inout) :: a + integer, intent(out) :: info + end subroutine psb_dsprn + end interface + + + interface psb_spupdate + subroutine psb_dspupdate(a, ia, ja, blck, desc_a,info,ix,jx,updflag) + use psb_descriptor_type + use psb_spmat_type + type(psb_desc_type), intent(in) :: desc_a + type(psb_dspmat_type), intent(inout) :: a + integer, intent(in) :: ia,ja + type(psb_dspmat_type), intent(in) :: blck + integer, intent(out) :: info + integer, optional, intent(in) :: ix,jx + integer, optional, intent(in) :: updflag + end subroutine psb_dspupdate + end interface + + interface psb_glob_to_loc + subroutine psb_glob_to_loc2(x,y,desc_a,info,iact) + use psb_descriptor_type + type(psb_desc_type), intent(in) :: desc_a + integer,intent(in) :: x(:) + integer,intent(out) :: y(:) + integer, intent(out) :: info + character, intent(in), optional :: iact + end subroutine psb_glob_to_loc2 + subroutine psb_glob_to_loc(x,desc_a,info,iact) + use psb_descriptor_type + type(psb_desc_type), intent(in) :: desc_a + integer,intent(inout) :: x(:) + integer, intent(out) :: info + character, intent(in), optional :: iact + end subroutine psb_glob_to_loc + end interface + + interface psb_loc_to_glob + subroutine psb_loc_to_glob2(x,y,desc_a,info,iact) + use psb_descriptor_type + type(psb_desc_type), intent(in) :: desc_a + integer,intent(in) :: x(:) + integer,intent(out) :: y(:) + integer, intent(out) :: info + character, intent(in), optional :: iact + end subroutine psb_loc_to_glob2 + subroutine psb_loc_to_glob(x,desc_a,info,iact) + use psb_descriptor_type + type(psb_desc_type), intent(in) :: desc_a + integer,intent(inout) :: x(:) + integer, intent(out) :: info + character, intent(in), optional :: iact + end subroutine psb_loc_to_glob + end interface + + + interface psb_ptasb + subroutine psb_ptasb(desc_a,info) + use psb_descriptor_type + type(psb_desc_type), intent(inout) :: desc_a + integer,intent(out) :: info + end subroutine psb_ptasb + end interface + + + interface psb_dscrep + subroutine psb_dscrep(m, icontxt, desc_a,info) + use psb_descriptor_type + Integer, intent(in) :: m,icontxt + Type(psb_desc_type), intent(out) :: desc_a + integer, intent(out) :: info + end subroutine psb_dscrep + end interface + + interface psb_dscdec + subroutine psb_dscdec(nloc, icontxt, desc_a,info) + use psb_descriptor_type + Integer, intent(in) :: nloc,icontxt + Type(psb_desc_type), intent(out) :: desc_a + integer, intent(out) :: info + end subroutine psb_dscdec + end interface + + +end module psb_tools_mod diff --git a/src/modules/psi_mod.f90 b/src/modules/psi_mod.f90 new file mode 100644 index 00000000..da912aa5 --- /dev/null +++ b/src/modules/psi_mod.f90 @@ -0,0 +1,161 @@ +! Module containing interfaces for subroutine in SRC/F90/INTERNALS + +module psi_mod + + use psb_descriptor_type + + interface + subroutine psi_compute_size(desc_data,& + & index_in, dl_lda, info) + integer :: info, dl_lda + integer :: desc_data(:), index_in(:) + end subroutine psi_compute_size + end interface + + interface + subroutine psi_crea_bnd_elem(desc_a,info) + use psb_descriptor_type + type(psb_desc_type) :: desc_a + integer, intent(out) :: info + end subroutine psi_crea_bnd_elem + end interface + + interface + subroutine psi_crea_index(desc_a,index_in,index_out,glob_idx,info) + use psb_descriptor_type + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + integer, intent(in) :: index_in(:) + integer, intent(out) :: index_out(:) + logical :: glob_idx + end subroutine psi_crea_index + end interface + + interface + subroutine psi_crea_ovr_elem(desc_overlap,ovr_elem) + integer :: desc_overlap(:) + integer, pointer :: ovr_elem(:) + end subroutine psi_crea_ovr_elem + end interface + + interface + subroutine psi_desc_index(desc_data,index_in,dep_list,& + & length_dl,loc_to_glob,glob_to_loc,desc_index,& + & isglob_in,info) + integer :: desc_data(:),index_in(:),dep_list(:) + integer :: loc_to_glob(:),glob_to_loc(:) + integer,pointer :: desc_index(:) + integer :: length_dl, info + logical :: isglob_in + end subroutine psi_desc_index + end interface + + interface + subroutine psi_sort_dl(dep_list,l_dep_list,np,info) + integer :: np,dep_list(:,:), l_dep_list(:), info + end subroutine psi_sort_dl + end interface + + interface psi_swapdata + subroutine psi_dswapdatam(flag,n,beta,y,desc_a,work,info) + use psb_descriptor_type + integer, intent(in) :: flag, n + integer, intent(out) :: info + real(kind(1.d0)) :: y(:,:), beta, work(:) + type(psb_desc_type) :: desc_a + end subroutine psi_dswapdatam + subroutine psi_dswapdatav(flag,beta,y,desc_a,work,info) + use psb_descriptor_type + integer, intent(in) :: flag + integer, intent(out) :: info + real(kind(1.d0)) :: y(:), beta, work(:) + type(psb_desc_type) :: desc_a + end subroutine psi_dswapdatav + subroutine psi_iswapdatam(flag,n,beta,y,desc_a,work,info) + use psb_descriptor_type + integer, intent(in) :: flag, n + integer, intent(out) :: info + integer :: y(:,:), beta, work(:) + type(psb_desc_type) :: desc_a + end subroutine psi_iswapdatam + subroutine psi_iswapdatav(flag,beta,y,desc_a,work,info) + use psb_descriptor_type + integer, intent(in) :: flag + integer, intent(out) :: info + integer :: y(:), beta, work(:) + type(psb_desc_type) :: desc_a + end subroutine psi_iswapdatav + end interface + + + interface psi_swaptran + subroutine psi_dswaptranm(flag,n,beta,y,desc_a,work,info) + use psb_descriptor_type + integer, intent(in) :: flag, n + integer, intent(out) :: info + real(kind(1.d0)) :: y(:,:), beta, work(:) + type(psb_desc_type) :: desc_a + end subroutine psi_dswaptranm + subroutine psi_dswaptranv(flag,beta,y,desc_a,work,info) + use psb_descriptor_type + integer, intent(in) :: flag + integer, intent(out) :: info + real(kind(1.d0)) :: y(:), beta, work(:) + type(psb_desc_type) :: desc_a + end subroutine psi_dswaptranv + subroutine psi_iswaptranm(flag,n,beta,y,desc_a,work,info) + use psb_descriptor_type + integer, intent(in) :: flag, n + integer, intent(out) :: info + integer :: y(:,:), beta, work(:) + type(psb_desc_type) :: desc_a + end subroutine psi_iswaptranm + subroutine psi_iswaptranv(flag,beta,y,desc_a,work,info) + use psb_descriptor_type + integer, intent(in) :: flag + integer, intent(out) :: info + integer :: y(:), beta, work(:) + type(psb_desc_type) :: desc_a + end subroutine psi_iswaptranv + end interface + + + interface psi_gth + subroutine psi_dgthm(n,k,idx,x,y) + integer :: n, k, idx(:) + real(kind(1.d0)) :: x(:,:), y(:) + end subroutine psi_dgthm + subroutine psi_dgthv(n,idx,x,y) + integer :: n, idx(:) + real(kind(1.d0)) :: x(:), y(:) + end subroutine psi_dgthv + subroutine psi_igthm(n,k,idx,x,y) + integer :: n, k, idx(:) + integer :: x(:,:), y(:) + end subroutine psi_igthm + subroutine psi_igthv(n,idx,x,y) + integer :: n, idx(:) + integer :: x(:), y(:) + end subroutine psi_igthv + end interface + + interface psi_sct + subroutine psi_dsctm(n,k,idx,x,beta,y) + integer :: n, k, idx(:) + real(kind(1.d0)) :: beta, x(:), y(:,:) + end subroutine psi_dsctm + subroutine psi_dsctv(n,idx,x,beta,y) + integer :: n, idx(:) + real(kind(1.d0)) :: beta, x(:), y(:) + end subroutine psi_dsctv + subroutine psi_isctm(n,k,idx,x,beta,y) + integer :: n, k, idx(:) + integer :: beta, x(:), y(:,:) + end subroutine psi_isctm + subroutine psi_isctv(n,idx,x,beta,y) + integer :: n, idx(:) + integer :: beta, x(:), y(:) + end subroutine psi_isctv + end interface + +end module psi_mod diff --git a/src/modules/sparker.fh b/src/modules/sparker.fh new file mode 100644 index 00000000..49b13e54 --- /dev/null +++ b/src/modules/sparker.fh @@ -0,0 +1,26 @@ + INTEGER MINJDROWS, MAXJDROWS + PARAMETER (MINJDROWS=4, MAXJDROWS=8) + DOUBLE PRECISION PERCENT + INTEGER DBLEINT_ + INTEGER DCMPLXINT_ +C ... This parameter represent sizeof(DOUBLE)/sizeof(INTEGER) ... + PARAMETER (PERCENT=0.7,DBLEINT_=2) + PARAMETER (DCMPLXINT_ = 4) + character fidef*5 + parameter (fidef='CSR') + integer, parameter :: nnz_=1 + integer, parameter :: del_bnd_=6, srtd_=7 + integer, parameter :: state_=8, upd_=9 + integer, parameter :: upd_pnt_=10, ifasize_=10 + integer, parameter :: spmat_null=0, spmat_bld=1 + integer, parameter :: spmat_asb=2, spmat_upd=4 + + integer perm_update + parameter (perm_update=98765) + integer isrtdcoo + parameter (isrtdcoo=98764) + integer ireg_flgs + parameter (ireg_flgs=10) + integer ip2_, iflag_, ipc_, ichk_, nnzt_, zero_ + parameter (ip2_=0, iflag_=2, ichk_=3) + parameter ( nnzt_=4, zero_=5,ipc_=6) diff --git a/src/prec/Makefile b/src/prec/Makefile new file mode 100644 index 00000000..9a3836a2 --- /dev/null +++ b/src/prec/Makefile @@ -0,0 +1,31 @@ +include ../../Make.inc + + +LIBDIR=../../lib/ +LIBNAME=$(LIBDIR)/$(F90LIB) + +HERE=. +MPFOBJS=dcslu.o psbdbldaggrmat.o +F90OBJS= dcsrsetup.o dcsrlu.o f90_psdprec.o \ + dprecbld.o zprecbld.o gps.o psdprecfree.o dprecset.o \ + psbdgenaggrmap.o $(MPFOBJS) +#dcoocp.o dcoocpadd.o dcoofact.o dcoolu.o dcooluadd.o\ + +COBJS=fort_slu_impl.o +INCDIRS=-I. -I.. -I$(LIBDIR) + +OBJS=$(F90OBJS) $(COBJS) + +lib: mpobjs $(OBJS) + ar -cur $(LIBNAME) $(OBJS) + ranlib $(LIBNAME) + +#$(F90OBJS): $(MODS) +mpobjs: + (make $(MPFOBJS) F90="$(MPF90)" F90COPT="$(F90COPT)") + +veryclean: clean + /bin/rm -f $(LIBNAME) + +clean: + /bin/rm -f $(OBJS) $(LOCAL_MODS) diff --git a/src/prec/fort_slu_impl.c b/src/prec/fort_slu_impl.c new file mode 100644 index 00000000..7a36855e --- /dev/null +++ b/src/prec/fort_slu_impl.c @@ -0,0 +1,333 @@ +/* This file is an interface to the SuperLU routines for sparse + factorization. It was obtaned by modifying the + c_fortran_dgssv.c file from the source distribution; + original copyright terms reproduced below. + + PSBLAS v 2.0, rc1, May 03, 2005 */ + + +/* ===================== + +Copyright (c) 2003, The Regents of the University of California, through +Lawrence Berkeley National Laboratory (subject to receipt of any required +approvals from U.S. Dept. of Energy) + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +(1) Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +(2) Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. +(3) Neither the name of Lawrence Berkeley National Laboratory, U.S. Dept. of +Energy nor the names of its contributors may be used to endorse or promote +products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +/* + * -- SuperLU routine (version 3.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * October 15, 2003 + * + */ + +#ifdef Have_SLU_ +#include "dsp_defs.h" + +#define HANDLE_SIZE 8 +/* kind of integer to hold a pointer. Use int. + This might need to be changed on 64-bit systems. */ +typedef int fptr; /* 32-bit by default */ + +typedef struct { + SuperMatrix *L; + SuperMatrix *U; + int *perm_c; + int *perm_r; +} factors_t; + + +#else + +#include + +#endif + + +#ifdef Add_ +#define fort_slu_factor_ fort_slu_factor_ +#define fort_slu_solve_ fort_slu_solve_ +#define fort_slu_free_ fort_slu_free_ +#endif +#ifdef AddDouble_ +#define fort_slu_factor_ fort_slu_factor__ +#define fort_slu_solve_ fort_slu_solve__ +#define fort_slu_free_ fort_slu_free__ +#endif +#ifdef NoChange +#define fort_slu_factor_ fort_slu_factor +#define fort_slu_solve_ fort_slu_solve +#define fort_slu_free_ fort_slu_free +#endif + + + + +void +fort_slu_factor_(int *n, int *nnz, + double *values, int *rowind, int *colptr, +#ifdef Have_SLU_ + fptr *f_factors, /* a handle containing the address + pointing to the factored matrices */ +#else + void *f_factors, +#endif + int *info) + +{ +/* + * This routine can be called from Fortran. + * performs LU decomposition. + * + * f_factors (input/output) fptr* + * On output contains the pointer pointing to + * the structure of the factored matrices. + * + */ + +#ifdef Have_SLU_ + SuperMatrix A, AC, B; + SuperMatrix *L, *U; + int *perm_r; /* row permutations from partial pivoting */ + int *perm_c; /* column permutation vector */ + int *etree; /* column elimination tree */ + SCformat *Lstore; + NCformat *Ustore; + int i, panel_size, permc_spec, relax; + trans_t trans; + double drop_tol = 0.0; + mem_usage_t mem_usage; + superlu_options_t options; + SuperLUStat_t stat; + factors_t *LUfactors; + + trans = NOTRANS; + + + /* Set the default input options. */ + set_default_options(&options); + + /* Initialize the statistics variables. */ + StatInit(&stat); + + /* Adjust to 0-based indexing */ + for (i = 0; i < *nnz; ++i) --colptr[i]; + for (i = 0; i <= *n; ++i) --rowind[i]; + + dCreate_CompRow_Matrix(&A, *n, *n, *nnz, values, colptr, rowind, + SLU_NR, SLU_D, SLU_GE); + L = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); + U = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); + if ( !(perm_r = intMalloc(*n)) ) ABORT("Malloc fails for perm_r[]."); + if ( !(perm_c = intMalloc(*n)) ) ABORT("Malloc fails for perm_c[]."); + if ( !(etree = intMalloc(*n)) ) ABORT("Malloc fails for etree[]."); + + /* + * Get column permutation vector perm_c[], according to permc_spec: + * permc_spec = 0: natural ordering + * permc_spec = 1: minimum degree on structure of A'*A + * permc_spec = 2: minimum degree on structure of A'+A + * permc_spec = 3: approximate minimum degree for unsymmetric matrices + */ + options.ColPerm=2; + permc_spec = options.ColPerm; + get_perm_c(permc_spec, &A, perm_c); + + sp_preorder(&options, &A, perm_c, etree, &AC); + + panel_size = sp_ienv(1); + relax = sp_ienv(2); + + dgstrf(&options, &AC, drop_tol, relax, panel_size, + etree, NULL, 0, perm_c, perm_r, L, U, &stat, info); + + if ( *info == 0 ) { + Lstore = (SCformat *) L->Store; + Ustore = (NCformat *) U->Store; + dQuerySpace(L, U, &mem_usage); +#if 0 + printf("No of nonzeros in factor L = %d\n", Lstore->nnz); + printf("No of nonzeros in factor U = %d\n", Ustore->nnz); + printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz); + printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n", + mem_usage.for_lu/1e6, mem_usage.total_needed/1e6, + mem_usage.expansions); +#endif + } else { + printf("dgstrf() error returns INFO= %d\n", *info); + if ( *info <= *n ) { /* factorization completes */ + dQuerySpace(L, U, &mem_usage); + printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n", + mem_usage.for_lu/1e6, mem_usage.total_needed/1e6, + mem_usage.expansions); + } + } + + /* Restore to 1-based indexing */ + for (i = 0; i < *nnz; ++i) ++colptr[i]; + for (i = 0; i <= *n; ++i) ++rowind[i]; + + /* Save the LU factors in the factors handle */ + LUfactors = (factors_t*) SUPERLU_MALLOC(sizeof(factors_t)); + LUfactors->L = L; + LUfactors->U = U; + LUfactors->perm_c = perm_c; + LUfactors->perm_r = perm_r; + *f_factors = (fptr) LUfactors; + + /* Free un-wanted storage */ + SUPERLU_FREE(etree); + Destroy_SuperMatrix_Store(&A); + Destroy_CompCol_Permuted(&AC); + StatFree(&stat); +#else + fprintf(stderr," SLU Not Configured, fix make.inc and recompile\n"); + *info=-1; +#endif +} + + +void +fort_slu_solve_(int *itrans, int *n, int *nrhs, + double *b, int *ldb, +#ifdef Have_SLU_ + fptr *f_factors, /* a handle containing the address + pointing to the factored matrices */ +#else + void *f_factors, +#endif + int *info) + +{ +/* + * This routine can be called from Fortran. + * performs triangular solve + * + */ +#ifdef Have_SLU_ + SuperMatrix A, AC, B; + SuperMatrix *L, *U; + int *perm_r; /* row permutations from partial pivoting */ + int *perm_c; /* column permutation vector */ + int *etree; /* column elimination tree */ + SCformat *Lstore; + NCformat *Ustore; + int i, panel_size, permc_spec, relax; + trans_t trans; + double drop_tol = 0.0; + mem_usage_t mem_usage; + superlu_options_t options; + SuperLUStat_t stat; + factors_t *LUfactors; + + if (*itrans == 0) { + trans = NOTRANS; + } else if (*itrans ==1) { + trans = TRANS; + } else if (*itrans ==2) { + trans = CONJ; + } else { + trans = NOTRANS; + } + /* Initialize the statistics variables. */ + StatInit(&stat); + + /* Extract the LU factors in the factors handle */ + LUfactors = (factors_t*) *f_factors; + L = LUfactors->L; + U = LUfactors->U; + perm_c = LUfactors->perm_c; + perm_r = LUfactors->perm_r; + + dCreate_Dense_Matrix(&B, *n, *nrhs, b, *ldb, SLU_DN, SLU_D, SLU_GE); + /* Solve the system A*X=B, overwriting B with X. */ + dgstrs (trans, L, U, perm_c, perm_r, &B, &stat, info); + + Destroy_SuperMatrix_Store(&B); + StatFree(&stat); +#else + fprintf(stderr," SLU Not Configured, fix make.inc and recompile\n"); + *info=-1; +#endif + +} + + +void +fort_slu_free_( +#ifdef Have_SLU_ + fptr *f_factors, /* a handle containing the address + pointing to the factored matrices */ +#else + void *f_factors, +#endif + int *info) + +{ +/* + * This routine can be called from Fortran. + * + * free all storage in the end + * + */ +#ifdef Have_SLU_ + SuperMatrix A, AC, B; + SuperMatrix *L, *U; + int *perm_r; /* row permutations from partial pivoting */ + int *perm_c; /* column permutation vector */ + int *etree; /* column elimination tree */ + SCformat *Lstore; + NCformat *Ustore; + int i, panel_size, permc_spec, relax; + trans_t trans; + double drop_tol = 0.0; + mem_usage_t mem_usage; + superlu_options_t options; + SuperLUStat_t stat; + factors_t *LUfactors; + + trans = NOTRANS; + /* Free the LU factors in the factors handle */ + LUfactors = (factors_t*) *f_factors; + SUPERLU_FREE (LUfactors->perm_r); + SUPERLU_FREE (LUfactors->perm_c); + Destroy_SuperNode_Matrix(LUfactors->L); + Destroy_CompCol_Matrix(LUfactors->U); + SUPERLU_FREE (LUfactors->L); + SUPERLU_FREE (LUfactors->U); + SUPERLU_FREE (LUfactors); + *info = 0; +#else + fprintf(stderr," SLU Not Configured, fix make.inc and recompile\n"); + *info=-1; +#endif +} + + diff --git a/src/prec/gps.f b/src/prec/gps.f new file mode 100644 index 00000000..ad02b7c4 --- /dev/null +++ b/src/prec/gps.f @@ -0,0 +1,576 @@ + SUBROUTINE REDUCE(NDSTK, NR, IOLD, RENUM, NDEG, LVL, LVLS1, + * LVLS2, CCSTOR, IBW2, IPF2) +C SUBROUTINE REDUCE DETERMINES A ROW AND COLUMN PERMUTATION WHICH, +C WHEN APPLIED TO A GIVEN SPARSE MATRIX, PRODUCES A PERMUTED +C MATRIX WITH A SMALLER BANDWIDTH AND PROFILE. +C THE INPUT ARRAY IS A CONNECTION TABLE WHICH REPRESENTS THE +C INDICES OF THE NONZERO ELEMENTS OF THE MATRIX, A. THE ALGO- +C RITHM IS DESCRIBED IN TERMS OF THE ADJACENCY GRAPH WHICH +C HAS THE CHARACTERISTIC THAT THERE IS AN EDGE (CONNECTION) +C BETWEEN NODES I AND J IF A(I,J) .NE. 0 AND I .NE. J. +C DIMENSIONING INFORMATION--THE FOLLOWING INTEGER ARRAYS MUST BE +C DIMENSIONED IN THE CALLING ROUTINE. +C NDSTK(NR,D1) D1 IS .GE. MAXIMUM DEGREE OF ALL NODES. +C IOLD(D2) D2 AND NR ARE .GE. THE TOTAL NUMBER OF +C RENUM(D2+1) NODES IN THE GRAPH. +C NDEG(D2) STORAGE REQUIREMENTS CAN BE SIGNIFICANTLY +C LVL(D2) DECREASED FOR IBM 360 AND 370 COMPUTERS +C LVLS1(D2) BY REPLACING INTEGER NDSTK BY +C LVLS2(D2) INTEGER*2 NDSTK IN SUBROUTINES REDUCE, +C CCSTOR(D2) DGREE, FNDIAM, TREE AND NUMBER. +C COMMON INFORMATION--THE FOLLOWING COMMON BLOCK MUST BE IN THE +C CALLING ROUTINE. +C COMMON/GRA/N,IDPTH,IDEG +C EXPLANATION OF INPUT VARIABLES-- +C NDSTK- CONNECTION TABLE REPRESENTING GRAPH. +C NDSTK(I,J)=NODE NUMBER OF JTH CONNECTION TO NODE +C NUMBER I. A CONNECTION OF A NODE TO ITSELF IS NOT +C LISTED. EXTRA POSITIONS MUST HAVE ZERO FILL. +C NR- ROW DIMENSION ASSIGNED NDSTK IN CALLING PROGRAM. +C IOLD(I)- NUMBERING OF ITH NODE UPON INPUT. +C IF NO NUMBERING EXISTS THEN IOLD(I)=I. +C N- NUMBER OF NODES IN GRAPH (EQUAL TO ORDER OF MATRIX). +C IDEG- MAXIMUM DEGREE OF ANY NODE IN THE GRAPH. +C EXPLANATION OF OUTPUT VARIABLES-- +C RENUM(I)- THE NEW NUMBER FOR THE ITH NODE. +C NDEG(I)- THE DEGREE OF THE ITH NODE. +C IBW2- THE BANDWIDTH AFTER RENUMBERING. +C IPF2- THE PROFILE AFTER RENUMBERING. +C IDPTH- NUMBER OF LEVELS IN REDUCE LEVEL STRUCTURE. +C THE FOLLOWING ONLY HAVE MEANING IF THE GRAPH WAS CONNECTED-- +C LVL(I)- INDEX INTO LVLS1 TO THE FIRST NODE IN LEVEL I. +C LVL(I+1)-LVL(I)= NUMBER OF NODES IN ITH LEVEL +C LVLS1- NODE NUMBERS LISTED BY LEVEL. +C LVLS2(I)- THE LEVEL ASSIGNED TO NODE I BY REDUCE. +C WORKING STORAGE VARIABLE-- +C CCSTOR +C LOCAL STORAGE-- +C COMMON/CC/-SUBROUTINES REDUCE, SORT2 AND PIKLVL ASSUME THAT +C THE GRAPH HAS AT MOST 50 CONNECTED COMPONENTS. +C SUBROUTINE FNDIAM ASSUMES THAT THERE ARE AT MOST +C 100 NODES IN THE LAST LEVEL. +C COMMON/LVLW/-SUBROUTINES SETUP AND PIKLVL ASSUME THAT THERE +C ARE AT MOST 100 LEVELS. +C USE INTEGER*2 NDSTK WITH AN IBM 360 OR 370. + INTEGER NDSTK + INTEGER STNODE, RVNODE, RENUM, XC, SORT2, STNUM, CCSTOR, + * SIZE, STPT, SBNUM + COMMON /GRA/ N, IDPTH, IDEG +C IT IS ASSUMED THAT THE GRAPH HAS AT MOST 50 CONNECTED COMPONENTS. + COMMON /CC/ XC, SIZE(5000), STPT(5000) + COMMON /LVLW/ NHIGH(10000), NLOW(10000), NACUM(10000) + save gra, cc, lvlw + DIMENSION CCSTOR(1), IOLD(1) + DIMENSION NDSTK(NR,1), LVL(1), LVLS1(1), LVLS2(1), RENUM(1), + * NDEG(1) + IBW2 = 0 + IPF2 = 0 +C SET RENUM(I)=0 FOR ALL I TO INDICATE NODE I IS UNNUMBERED + DO 10 I=1,N + RENUM(I) = 0 + 10 CONTINUE +C COMPUTE DEGREE OF EACH NODE AND ORIGINAL BANDWIDTH AND PROFILE + CALL DGREE(NDSTK, NR, NDEG, IOLD, IBW1, IPF1) +C SBNUM= LOW END OF AVAILABLE NUMBERS FOR RENUMBERING +C STNUM= HIGH END OF AVAILABLE NUMBERS FOR RENUMBERING + SBNUM = 1 + STNUM = N +C NUMBER THE NODES OF DEGREE ZERO + DO 20 I=1,N + IF (NDEG(I).GT.0) GO TO 20 + RENUM(I) = STNUM + STNUM = STNUM - 1 + 20 CONTINUE +C FIND AN UNNUMBERED NODE OF MIN DEGREE TO START ON + 30 LOWDG = IDEG + 1 + NFLG = 1 + ISDIR = 1 + DO 40 I=1,N + IF (NDEG(I).GE.LOWDG) GO TO 40 + IF (RENUM(I).GT.0) GO TO 40 + LOWDG = NDEG(I) + STNODE = I + 40 CONTINUE +C FIND PSEUDO-DIAMETER AND ASSOCIATED LEVEL STRUCTURES. +C STNODE AND RVNODE ARE THE ENDS OF THE DIAM AND LVLS1 AND LVLS2 +C ARE THE RESPECTIVE LEVEL STRUCTURES. + CALL FNDIAM(STNODE, RVNODE, NDSTK, NR, NDEG, LVL, LVLS1, + * LVLS2, CCSTOR, IDFLT) + IF (NDEG(STNODE).LE.NDEG(RVNODE)) GO TO 50 +C NFLG INDICATES THE END TO BEGIN NUMBERING ON + NFLG = -1 + STNODE = RVNODE + 50 CALL GPS_SETUP(LVL, LVLS1, LVLS2) +C FIND ALL THE CONNECTED COMPONENTS (XC COUNTS THEM) + XC = 0 + LROOT = 1 + LVLN = 1 + DO 60 I=1,N + IF (LVL(I).NE.0) GO TO 60 + XC = XC + 1 + STPT(XC) = LROOT + CALL TREE(I, NDSTK, NR, LVL, CCSTOR, NDEG, LVLWTH, LVLBOT, + * LVLN, MAXLW, N) + SIZE(XC) = LVLBOT + LVLWTH - LROOT + LROOT = LVLBOT + LVLWTH + LVLN = LROOT + 60 CONTINUE + IF (SORT2(DMY).EQ.0) GO TO 70 + CALL PIKLVL(LVLS1, LVLS2, CCSTOR, IDFLT, ISDIR) +C ON RETURN FROM PIKLVL, ISDIR INDICATES THE DIRECTION THE LARGEST +C COMPONENT FELL. ISDIR IS MODIFIED NOW TO INDICATE THE NUMBERING +C DIRECTION. NUM IS SET TO THE PROPER VALUE FOR THIS DIRECTION. + 70 ISDIR = ISDIR*NFLG + NUM = SBNUM + IF (ISDIR.LT.0) NUM = STNUM + CALL NUMBER(STNODE, NUM, NDSTK, LVLS2, NDEG, RENUM, LVLS1, + * LVL, NR, NFLG, IBW2, IPF2, CCSTOR, ISDIR) +C UPDATE STNUM OR SBNUM AFTER NUMBERING + IF (ISDIR.LT.0) STNUM = NUM + IF (ISDIR.GT.0) SBNUM = NUM + IF (SBNUM.LE.STNUM) GO TO 30 + IF (IBW2.LE.IBW1) RETURN +C IF ORIGINAL NUMBERING IS BETTER THAN NEW ONE, SET UP TO RETURN IT + DO 80 I=1,N + RENUM(I) = IOLD(I) + 80 CONTINUE + IBW2 = IBW1 + IPF2 = IPF1 + RETURN + END + SUBROUTINE DGREE(NDSTK, NR, NDEG, IOLD, IBW1, IPF1) +C DGREE COMPUTES THE DEGREE OF EACH NODE IN NDSTK AND STORES +C IT IN THE ARRAY NDEG. THE BANDWIDTH AND PROFILE FOR THE ORIGINAL +C OR INPUT RENUMBERING OF THE GRAPH IS COMPUTED ALSO. +C USE INTEGER*2 NDSTK WITH AN IBM 360 OR 370. + INTEGER NDSTK + COMMON /GRA/ N, IDPTH, IDEG + DIMENSION NDSTK(NR,1), NDEG(1), IOLD(1) + IBW1 = 0 + IPF1 = 0 + DO 40 I=1,N + NDEG(I) = 0 + IRW = 0 + DO 20 J=1,IDEG + ITST = NDSTK(I,J) + IF (ITST) 30, 30, 10 + 10 NDEG(I) = NDEG(I) + 1 + IDIF = IOLD(I) - IOLD(ITST) + IF (IRW.LT.IDIF) IRW = IDIF + 20 CONTINUE + 30 IPF1 = IPF1 + IRW + IF (IRW.GT.IBW1) IBW1 = IRW + 40 CONTINUE + RETURN + END + SUBROUTINE FNDIAM(SND1, SND2, NDSTK, NR, NDEG, LVL, LVLS1, + * LVLS2, IWK, IDFLT) +C FNDIAM IS THE CONTROL PROCEDURE FOR FINDING THE PSEUDO-DIAMETER OF +C NDSTK AS WELL AS THE LEVEL STRUCTURE FROM EACH END +C SND1- ON INPUT THIS IS THE NODE NUMBER OF THE FIRST +C ATTEMPT AT FINDING A DIAMETER. ON OUTPUT IT +C CONTAINS THE ACTUAL NUMBER USED. +C SND2- ON OUTPUT CONTAINS OTHER END OF DIAMETER +C LVLS1- ARRAY CONTAINING LEVEL STRUCTURE WITH SND1 AS ROOT +C LVLS2- ARRAY CONTAINING LEVEL STRUCTURE WITH SND2 AS ROOT +C IDFLT- FLAG USED IN PICKING FINAL LEVEL STRUCTURE, SET +C =1 IF WIDTH OF LVLS1 .LE. WIDTH OF LVLS2, OTHERWISE =2 +C LVL,IWK- WORKING STORAGE +C USE INTEGER*2 NDSTK WITH AN IBM 360 OR 370. + INTEGER NDSTK + INTEGER FLAG, SND, SND1, SND2 + COMMON /GRA/ N, IDPTH, IDEG +C IT IS ASSUMED THAT THE LAST LEVEL HAS AT MOST 100 NODES. + COMMON /CC/ NDLST(10001) + DIMENSION NDSTK(NR,1), NDEG(1), LVL(1), LVLS1(1), LVLS2(1), + * IWK(1) + FLAG = 0 + MTW2 = N + SND = SND1 +C ZERO LVL TO INDICATE ALL NODES ARE AVAILABLE TO TREE + 10 DO 20 I=1,N + LVL(I) = 0 + 20 CONTINUE + LVLN = 1 +C DROP A TREE FROM SND + CALL TREE(SND, NDSTK, NR, LVL, IWK, NDEG, LVLWTH, LVLBOT, + * LVLN, MAXLW, MTW2) + IF (FLAG.GE.1) GO TO 50 + FLAG = 1 + 30 IDPTH = LVLN - 1 + MTW1 = MAXLW +C COPY LEVEL STRUCTURE INTO LVLS1 + DO 40 I=1,N + LVLS1(I) = LVL(I) + 40 CONTINUE + NDXN = 1 + NDXL = 0 + MTW2 = N +C SORT LAST LEVEL BY DEGREE AND STORE IN NDLST + CALL SORTDG(NDLST, IWK(LVLBOT), NDXL, LVLWTH, NDEG) + SND = NDLST(1) + GO TO 10 + 50 IF (IDPTH.GE.LVLN-1) GO TO 60 +C START AGAIN WITH NEW STARTING NODE + SND1 = SND + GO TO 30 + 60 IF (MAXLW.GE.MTW2) GO TO 80 + MTW2 = MAXLW + SND2 = SND +C STORE NARROWEST REVERSE LEVEL STRUCTURE IN LVLS2 + DO 70 I=1,N + LVLS2(I) = LVL(I) + 70 CONTINUE + 80 IF (NDXN.EQ.NDXL) GO TO 90 +C TRY NEXT NODE IN NDLST + NDXN = NDXN + 1 + SND = NDLST(NDXN) + GO TO 10 + 90 IDFLT = 1 + IF (MTW2.LE.MTW1) IDFLT = 2 + RETURN + END + SUBROUTINE TREE(IROOT, NDSTK, NR, LVL, IWK, NDEG, LVLWTH, + * LVLBOT, LVLN, MAXLW, IBORT) +C TREE DROPS A TREE IN NDSTK FROM IROOT +C LVL- ARRAY INDICATING AVAILABLE NODES IN NDSTK WITH ZERO +C ENTRIES. TREE ENTERS LEVEL NUMBERS ASSIGNED +C DURING EXECUTION OF THIS PROCEDURE +C IWK- ON OUTPUT CONTAINS NODE NUMBERS USED IN TREE +C ARRANGED BY LEVELS (IWK(LVLN) CONTAINS IROOT +C AND IWK(LVLBOT+LVLWTH-1) CONTAINS LAST NODE ENTERED) +C LVLWTH- ON OUTPUT CONTAINS WIDTH OF LAST LEVEL +C LVLBOT- ON OUTPUT CONTAINS INDEX INTO IWK OF FIRST +C NODE IN LAST LEVEL +C MAXLW- ON OUTPUT CONTAINS THE MAXIMUM LEVEL WIDTH +C LVLN- ON INPUT THE FIRST AVAILABLE LOCATION IN IWK +C USUALLY ONE BUT IF IWK IS USED TO STORE PREVIOUS +C CONNECTED COMPONENTS, LVLN IS NEXT AVAILABLE LOCATION. +C ON OUTPUT THE TOTAL NUMBER OF LEVELS + 1 +C IBORT- INPUT PARAM WHICH TRIGGERS EARLY RETURN IF +C MAXLW BECOMES .GE. IBORT +C USE INTEGER*2 NDSTK WITH AN IBM 360 OR 370. + INTEGER NDSTK + DIMENSION NDSTK(NR,1), LVL(1), IWK(1), NDEG(1) + MAXLW = 0 + ITOP = LVLN + INOW = LVLN + LVLBOT = LVLN + LVLTOP = LVLN + 1 + LVLN = 1 + LVL(IROOT) = 1 + IWK(ITOP) = IROOT + 10 LVLN = LVLN + 1 + 20 IWKNOW = IWK(INOW) + NDROW = NDEG(IWKNOW) + DO 30 J=1,NDROW + ITEST = NDSTK(IWKNOW,J) + IF (LVL(ITEST).NE.0) GO TO 30 + LVL(ITEST) = LVLN + ITOP = ITOP + 1 + IWK(ITOP) = ITEST + 30 CONTINUE + INOW = INOW + 1 + IF (INOW.LT.LVLTOP) GO TO 20 + LVLWTH = LVLTOP - LVLBOT + IF (MAXLW.LT.LVLWTH) MAXLW = LVLWTH + IF (MAXLW.GE.IBORT) RETURN + IF (ITOP.LT.LVLTOP) RETURN + LVLBOT = INOW + LVLTOP = ITOP + 1 + GO TO 10 + END + SUBROUTINE SORTDG(STK1, STK2, X1, X2, NDEG) +C SORTDG SORTS STK2 BY DEGREE OF THE NODE AND ADDS IT TO THE END +C OF STK1 IN ORDER OF LOWEST TO HIGHEST DEGREE. X1 AND X2 ARE THE +C NUMBER OF NODES IN STK1 AND STK2 RESPECTIVELY. + INTEGER X1, X2, STK1, STK2, TEMP + COMMON /GRA/ N, IDPTH, IDEG + DIMENSION NDEG(1), STK1(1), STK2(1) + IND = X2 + 10 ITEST = 0 + IND = IND - 1 + IF (IND.LT.1) GO TO 30 + DO 20 I=1,IND + J = I + 1 + ISTK2 = STK2(I) + JSTK2 = STK2(J) + IF (NDEG(ISTK2).LE.NDEG(JSTK2)) GO TO 20 + ITEST = 1 + TEMP = STK2(I) + STK2(I) = STK2(J) + STK2(J) = TEMP + 20 CONTINUE + IF (ITEST.EQ.1) GO TO 10 + 30 DO 40 I=1,X2 + X1 = X1 + 1 + STK1(X1) = STK2(I) + 40 CONTINUE + RETURN + END + SUBROUTINE GPS_SETUP(LVL, LVLS1, LVLS2) +C SETUP COMPUTES THE REVERSE LEVELING INFO FROM LVLS2 AND STORES +C IT INTO LVLS2. NACUM(I) IS INITIALIZED TO NODES/ITH LEVEL FOR NODES +C ON THE PSEUDO-DIAMETER OF THE GRAPH. LVL IS INITIALIZED TO NON- +C ZERO FOR NODES ON THE PSEUDO-DIAM AND NODES IN A DIFFERENT +C COMPONENT OF THE GRAPH. + COMMON /GRA/ N, IDPTH, IDEG +C IT IS ASSUMED THAT THERE ARE AT MOST 100 LEVELS. + COMMON /LVLW/ NHIGH(10000), NLOW(10000), NACUM(10000) + DIMENSION LVL(1), LVLS1(1), LVLS2(1) + DO 10 I=1,IDPTH + NACUM(I) = 0 + 10 CONTINUE + DO 30 I=1,N + LVL(I) = 1 + LVLS2(I) = IDPTH + 1 - LVLS2(I) + ITEMP = LVLS2(I) + IF (ITEMP.GT.IDPTH) GO TO 30 + IF (ITEMP.NE.LVLS1(I)) GO TO 20 + NACUM(ITEMP) = NACUM(ITEMP) + 1 + GO TO 30 + 20 LVL(I) = 0 + 30 CONTINUE + RETURN + END + INTEGER FUNCTION SORT2(DMY) +C SORT2 SORTS SIZE AND STPT INTO DESCENDING ORDER ACCORDING TO +C VALUES OF SIZE. XC=NUMBER OF ENTRIES IN EACH ARRAY + INTEGER TEMP, CCSTOR, SIZE, STPT, XC +C IT IS ASSUMED THAT THE GRAPH HAS AT MOST 50 CONNECTED COMPONENTS. + COMMON /CC/ XC, SIZE(5000), STPT(5000) + SORT2 = 0 + IF (XC.EQ.0) RETURN + SORT2 = 1 + IND = XC + 10 ITEST = 0 + IND = IND - 1 + IF (IND.LT.1) RETURN + DO 20 I=1,IND + J = I + 1 + IF (SIZE(I).GE.SIZE(J)) GO TO 20 + ITEST = 1 + TEMP = SIZE(I) + SIZE(I) = SIZE(J) + SIZE(J) = TEMP + TEMP = STPT(I) + STPT(I) = STPT(J) + STPT(J) = TEMP + 20 CONTINUE + IF (ITEST.EQ.1) GO TO 10 + RETURN + END + SUBROUTINE PIKLVL(LVLS1, LVLS2, CCSTOR, IDFLT, ISDIR) +C PIKLVL CHOOSES THE LEVEL STRUCTURE USED IN NUMBERING GRAPH +C LVLS1- ON INPUT CONTAINS FORWARD LEVELING INFO +C LVLS2- ON INPUT CONTAINS REVERSE LEVELING INFO +C ON OUTPUT THE FINAL LEVEL STRUCTURE CHOSEN +C CCSTOR- ON INPUT CONTAINS CONNECTED COMPONENT INFO +C IDFLT- ON INPUT =1 IF WDTH LVLS1.LE.WDTH LVLS2, =2 OTHERWISE +C NHIGH KEEPS TRACK OF LEVEL WIDTHS FOR HIGH NUMBERING +C NLOW- KEEPS TRACK OF LEVEL WIDTHS FOR LOW NUMBERING +C NACUM- KEEPS TRACK OF LEVEL WIDTHS FOR CHOSEN LEVEL STRUCTURE +C XC- NUMBER OF CONNECTED COMPONENTS +C SIZE(I)- SIZE OF ITH CONNECTED COMPONENT +C STPT(I)- INDEX INTO CCSTORE OF 1ST NODE IN ITH CON COMPT +C ISDIR- FLAG WHICH INDICATES WHICH WAY THE LARGEST CONNECTED +C COMPONENT FELL. =+1 IF LOW AND -1 IF HIGH + INTEGER CCSTOR, SIZE, STPT, XC, END + COMMON /GRA/ N, IDPTH, IDEG +C IT IS ASSUMED THAT THE GRAPH HAS AT MOST 50 COMPONENTS AND +C THAT THERE ARE AT MOST 100 LEVELS. + COMMON /LVLW/ NHIGH(10000), NLOW(10000), NACUM(10000) + COMMON /CC/ XC, SIZE(5000), STPT(5000) + DIMENSION LVLS1(1), LVLS2(1), CCSTOR(1) +C FOR EACH CONNECTED COMPONENT DO + DO 80 I=1,XC + J = STPT(I) + END = SIZE(I) + J - 1 +C SET NHIGH AND NLOW EQUAL TO NACUM + DO 10 K=1,IDPTH + NHIGH(K) = NACUM(K) + NLOW(K) = NACUM(K) + 10 CONTINUE +C UPDATE NHIGH AND NLOW FOR EACH NODE IN CONNECTED COMPONENT + DO 20 K=J,END + INODE = CCSTOR(K) + LVLNH = LVLS1(INODE) + NHIGH(LVLNH) = NHIGH(LVLNH) + 1 + LVLNL = LVLS2(INODE) + NLOW(LVLNL) = NLOW(LVLNL) + 1 + 20 CONTINUE + MAX1 = 0 + MAX2 = 0 +C SET MAX1=LARGEST NEW NUMBER IN NHIGH +C SET MAX2=LARGEST NEW NUMBER IN NLOW + DO 30 K=1,IDPTH + IF (2*NACUM(K).EQ.NLOW(K)+NHIGH(K)) GO TO 30 + IF (NHIGH(K).GT.MAX1) MAX1 = NHIGH(K) + IF (NLOW(K).GT.MAX2) MAX2 = NLOW(K) + 30 CONTINUE +C SET IT= NUMBER OF LEVEL STRUCTURE TO BE USED + IT = 1 + IF (MAX1.GT.MAX2) IT = 2 + IF (MAX1.EQ.MAX2) IT = IDFLT + IF (IT.EQ.2) GO TO 60 + IF (I.EQ.1) ISDIR = -1 +C COPY LVLS1 INTO LVLS2 FOR EACH NODE IN CONNECTED COMPONENT + DO 40 K=J,END + INODE = CCSTOR(K) + LVLS2(INODE) = LVLS1(INODE) + 40 CONTINUE +C UPDATE NACUM TO BE THE SAME AS NHIGH + DO 50 K=1,IDPTH + NACUM(K) = NHIGH(K) + 50 CONTINUE + GO TO 80 +C UPDATE NACUM TO BE THE SAME AS NLOW + 60 DO 70 K=1,IDPTH + NACUM(K) = NLOW(K) + 70 CONTINUE + 80 CONTINUE + RETURN + END + SUBROUTINE NUMBER(SND, NUM, NDSTK, LVLS2, NDEG, RENUM, LVLST, + * LSTPT, NR, NFLG, IBW2, IPF2, IPFA, ISDIR) +C NUMBER PRODUCES THE NUMBERING OF THE GRAPH FOR MIN BANDWIDTH +C SND- ON INPUT THE NODE TO BEGIN NUMBERING ON +C NUM- ON INPUT AND OUTPUT, THE NEXT AVAILABLE NUMBER +C LVLS2- THE LEVEL STRUCTURE TO BE USED IN NUMBERING +C RENUM- THE ARRAY USED TO STORE THE NEW NUMBERING +C LVLST- ON OUTPUT CONTAINS LEVEL STRUCTURE +C LSTPT(I)- ON OUTPUT, INDEX INTO LVLST TO FIRST NODE IN ITH LVL +C LSTPT(I+1) - LSTPT(I) = NUMBER OF NODES IN ITH LVL +C NFLG- =+1 IF SND IS FORWARD END OF PSEUDO-DIAM +C =-1 IF SND IS REVERSE END OF PSEUDO-DIAM +C IBW2- BANDWIDTH OF NEW NUMBERING COMPUTED BY NUMBER +C IPF2- PROFILE OF NEW NUMBERING COMPUTED BY NUMBER +C IPFA- WORKING STORAGE USED TO COMPUTE PROFILE AND BANDWIDTH +C ISDIR- INDICATES STEP DIRECTION USED IN NUMBERING(+1 OR -1) +C USE INTEGER*2 NDSTK WITH AN IBM 360 OR 370. + INTEGER NDSTK + INTEGER SND, STKA, STKB, STKC, STKD, XA, XB, XC, XD, CX, END, + * RENUM, TEST + COMMON /GRA/ N, IDPTH, IDEG +C THE STORAGE IN COMMON BLOCKS CC AND LVLW IS NOW FREE AND CAN +C BE USED FOR STACKS. + COMMON /LVLW/ STKA(10000), STKB(10000), STKC(10000) + COMMON /CC/ STKD(10001) + DIMENSION IPFA(1) + DIMENSION NDSTK(NR,1), LVLS2(1), NDEG(1), RENUM(1), LVLST(1), + * LSTPT(1) +C SET UP LVLST AND LSTPT FROM LVLS2 + DO 10 I=1,N + IPFA(I) = 0 + 10 CONTINUE + write(0,*) 'NUMBER: initialization on NSTPT' + NSTPT = 1 + DO 30 I=1,IDPTH + LSTPT(I) = NSTPT + DO 20 J=1,N + IF (LVLS2(J).NE.I) GO TO 20 + LVLST(NSTPT) = J + NSTPT = NSTPT + 1 + 20 CONTINUE + 30 CONTINUE + LSTPT(IDPTH+1) = NSTPT + write(0,*) 'NUMBER: initialization completed', idpth,nstpt +C STKA, STKB, STKC AND STKD ARE STACKS WITH POINTERS +C XA,XB,XC, AND XD. CX IS A SPECIAL POINTER INTO STKC WHICH +C INDICATES THE PARTICULAR NODE BEING PROCESSED. +C LVLN KEEPS TRACK OF THE LEVEL WE ARE WORKING AT. +C INITIALLY STKC CONTAINS ONLY THE INITIAL NODE, SND. + LVLN = 0 + IF (NFLG.LT.0) LVLN = IDPTH + 1 + XC = 1 + STKC(XC) = SND + 40 CX = 1 + XD = 0 + LVLN = LVLN + NFLG + LST = LSTPT(LVLN) + LND = LSTPT(LVLN+1) - 1 +C BEGIN PROCESSING NODE STKC(CX) + 50 IPRO = STKC(CX) + RENUM(IPRO) = NUM + NUM = NUM + ISDIR + END = NDEG(IPRO) + XA = 0 + XB = 0 +C CHECK ALL ADJACENT NODES + DO 80 I=1,END +c$$$ write(0,*) 'NUMBER: loop 80 ',i,end, lvln + TEST = NDSTK(IPRO,I) + INX = RENUM(TEST) +C ONLY NODES NOT NUMBERED OR ALREADY ON A STACK ARE ADDED + IF (INX.EQ.0) GO TO 60 + IF (INX.LT.0) GO TO 80 +C DO PRELIMINARY BANDWIDTH AND PROFILE CALCULATIONS + NBW = (RENUM(IPRO)-INX)*ISDIR + IF (ISDIR.GT.0) INX = RENUM(IPRO) + IF (IPFA(INX).LT.NBW) IPFA(INX) = NBW + GO TO 80 + 60 RENUM(TEST) = -1 +C PUT NODES ON SAME LEVEL ON STKA, ALL OTHERS ON STKB + IF (LVLS2(TEST).EQ.LVLS2(IPRO)) GO TO 70 + XB = XB + 1 + if (xb>10000) write(0,*) 'XB>10000 in NUMBER' + STKB(XB) = TEST + GO TO 80 + 70 XA = XA + 1 + if (xa>10000) write(0,*) 'XA>10000 in NUMBER' + STKA(XA) = TEST + 80 CONTINUE +C SORT STKA AND STKB INTO INCREASING DEGREE AND ADD STKA TO STKC +C AND STKB TO STKD + IF (XA.EQ.0) GO TO 100 + IF (XA.EQ.1) GO TO 90 + CALL SORTDG(STKC, STKA, XC, XA, NDEG) + GO TO 100 + 90 XC = XC + 1 + if (xc>10000) write(0,*) 'XC>10000 in NUMBER' + STKC(XC) = STKA(XA) + + 100 IF (XB.EQ.0) GO TO 120 + IF (XB.EQ.1) GO TO 110 + CALL SORTDG(STKD, STKB, XD, XB, NDEG) + GO TO 120 + 110 XD = XD + 1 + if (xd>10000) write(0,*) 'XD>10000 in NUMBER' + STKD(XD) = STKB(XB) +C BE SURE TO PROCESS ALL NODES IN STKC + 120 CX = CX + 1 + if (cx>10000) write(0,*) 'CX>10000 in NUMBER' + IF (XC.GE.CX) GO TO 50 +C WHEN STKC IS EXHAUSTED LOOK FOR MIN DEGREE NODE IN SAME LEVEL +C WHICH HAS NOT BEEN PROCESSED + MAX = IDEG + 1 + SND = N + 1 + DO 130 I=LST,LND + TEST = LVLST(I) + IF (RENUM(TEST).NE.0) GO TO 130 + IF (NDEG(TEST).GE.MAX) GO TO 130 + RENUM(SND) = 0 + RENUM(TEST) = -1 + MAX = NDEG(TEST) + SND = TEST + 130 CONTINUE + IF (SND.EQ.N+1) GO TO 140 + XC = XC + 1 + if (xc>10000) write(0,*) 'XC>10000 ...2... in NUMBER' + STKC(XC) = SND + GO TO 50 +C IF STKD IS EMPTY WE ARE DONE, OTHERWISE COPY STKD ONTO STKC +C AND BEGIN PROCESSING NEW STKC + 140 IF (XD.EQ.0) GO TO 160 + DO 150 I=1,XD + STKC(I) = STKD(I) + 150 CONTINUE + XC = XD + GO TO 40 +C DO FINAL BANDWIDTH AND PROFILE CALCULATIONS + 160 DO 170 I=1,N + IF (IPFA(I).GT.IBW2) IBW2 = IPFA(I) + IPF2 = IPF2 + IPFA(I) + 170 CONTINUE + RETURN + END diff --git a/src/prec/psb_dbldaggrmat.f90 b/src/prec/psb_dbldaggrmat.f90 new file mode 100644 index 00000000..679c95e7 --- /dev/null +++ b/src/prec/psb_dbldaggrmat.f90 @@ -0,0 +1,918 @@ +subroutine psb_dbldaggrmat(a,desc_a,p,info) + use psb_serial_mod + use psb_prec_type + use psb_descriptor_type + use psb_spmat_type + use psb_tools_mod + use psb_psblas_mod + use psb_error_mod + implicit none + + type(psb_dspmat_type), intent(in), target :: a + type(psb_dbaseprec), intent(inout) :: p + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + + logical, parameter :: aggr_dump=.false. + integer ::icontxt,nprow,npcol,me,mycol, err_act + character(len=20) :: name, ch_err + name='psb_dbldaggrmat' + info=0 + call psb_erractionsave(err_act) + + icontxt=desc_a%matrix_data(psb_ctxt_) + call blacs_gridinfo(icontxt,nprow,npcol,me,mycol) + + select case (p%iprcparm(smth_kind_)) + case (no_smth_) + + call raw_aggregate(info) + + if(info /= 0) then + call psb_errpush(4010,name,a_err='raw_aggregate') + goto 9999 + end if + if (aggr_dump) call psb_csprt(90+me,p%av(ac_),head='% Raw aggregate.') + + case(smth_omg_,smth_biz_) + + call smooth_aggregate(info) + + if(info /= 0) then + call psb_errpush(4010,name,a_err='smooth_aggregate') + goto 9999 + end if + case default + call psb_errpush(4010,name,a_err=name) + goto 9999 + + end select + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +contains + + subroutine raw_aggregate(info) + use psb_prec_type + use psb_const_mod + use psb_psblas_mod + use psb_error_mod + implicit none + + include 'mpif.h' + integer, intent(out) :: info + type(psb_dspmat_type), pointer :: bg + type(psb_dspmat_type) :: b, tmp + integer, pointer :: nzbr(:), idisp(:) + integer :: icontxt, nrow, nglob, ncol, ntaggr, nzbg, ip, ndx,& + & naggr, np, myprow, mypcol, nprows, npcols,nzt,irs,jl,nzl,nlr,& + & icomm,naggrm1, mtype, i, j, err_act + name='raw_aggregate' + info=0 + call psb_erractionsave(err_act) + + bg => p%av(ac_) + + icontxt = desc_a%matrix_data(psb_ctxt_) + call blacs_gridinfo(icontxt,nprows,npcols,myprow,mypcol) + np = nprows*npcols + nglob = desc_a%matrix_data(m_) + nrow = desc_a%matrix_data(psb_n_row_) + ncol = desc_a%matrix_data(psb_n_col_) + + naggr = p%nlaggr(myprow+1) + ntaggr = sum(p%nlaggr) + allocate(nzbr(np), idisp(np),stat=info) + + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + naggrm1=sum(p%nlaggr(1:myprow)) + + if (p%iprcparm(coarse_mat_) == mat_repl_) then + do i=1, nrow + p%mlia(i) = p%mlia(i) + naggrm1 + end do + call psb_halo(p%mlia,desc_a,info) + + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_halo') + goto 9999 + end if + end if + + + call psb_spinfo(nztotreq,a,nzt,info) + + if(info /= 0) then + call psb_errpush(4010,name,a_err='spinfo') + goto 9999 + end if + + call psb_spall(b,nzt,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='spall') + goto 9999 + end if + + b%infoa(upd_) = 6 + b%fida = 'COO' + b%m=a%m + b%k=a%k + if (.false.) then + call psb_csdp(a,b,info) + if(info /= 0) then + info=4010 + ch_err='psb_csdp' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + call psb_spinfo(nztotreq,b,nzt,info) + if(info /= 0) then + info=4010 + ch_err='psb_spinfo' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + do i=1, nzt + b%ia1(i) = p%mlia(b%ia1(i)) + b%ia2(i) = p%mlia(b%ia2(i)) + enddo + + else + ! Ok, this is extremely dirty because we use pointers from + ! one sparse matrix into another. But it gives us something + ! in term of performance + jl = 0 + do i=1,a%m,50 + nlr = min(a%m-i+1,50) + call psb_spgtrow(i,a,b,info,append=.true.,iren=p%mlia,lrw=i+nlr-1) + if(info /= 0) then + call psb_errpush(4010,name,a_err='spgtrow') + goto 9999 + end if + + call psb_spinfo(nztotreq,b,nzl,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='spinfo') + goto 9999 + end if + nzl = nzl - jl + tmp%fida = 'COO' + tmp%infoa(nnz_) = nzl + tmp%aspk => b%aspk(jl+1:jl+nzl) + tmp%ia1 => b%ia1(jl+1:jl+nzl) + tmp%ia2 => b%ia2(jl+1:jl+nzl) + call psb_fixcoo(tmp,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_fixcoo') + goto 9999 + end if + nzl = tmp%infoa(nnz_) + b%infoa(nnz_) = jl+nzl + jl = jl + nzl + enddo + end if + + + call psb_fixcoo(b,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='fixcoo') + goto 9999 + end if + + irs = b%infoa(nnz_) + call psb_spreall(b,irs,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='spreall') + goto 9999 + end if + b%m = naggr + b%k = naggr + + if (p%iprcparm(coarse_mat_) == mat_repl_) then + + call psb_dscrep(ntaggr,icontxt,p%desc_data,info) + + nzbr(:) = 0 + nzbr(myprow+1) = irs + call igsum2d(icontxt,'All',' ',np,1,nzbr,np,-1,-1) + nzbg = sum(nzbr) + call psb_spall(ntaggr,ntaggr,bg,nzbg,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='spall') + goto 9999 + end if + + call blacs_get(icontxt,10,icomm ) + do ip=1,np + idisp(ip) = sum(nzbr(1:ip-1)) + enddo + ndx = nzbr(myprow+1) + + call mpi_allgatherv(b%aspk,ndx,mpi_double_precision,bg%aspk,nzbr,idisp,& + & mpi_double_precision,icomm,info) + call mpi_allgatherv(b%ia1,ndx,mpi_integer,bg%ia1,nzbr,idisp,& + & mpi_integer,icomm,info) + call mpi_allgatherv(b%ia2,ndx,mpi_integer,bg%ia2,nzbr,idisp,& + & mpi_integer,icomm,info) + if(info /= 0) then + info=-1 + call psb_errpush(info,name) + goto 9999 + end if + + bg%m = ntaggr + bg%k = ntaggr + bg%infoa(nnz_) = nzbg + bg%fida='COO' + bg%descra='G' + call psb_fixcoo(bg,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='fixcoo') + goto 9999 + end if + + call psb_spfree(b,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='spfree') + goto 9999 + end if + + else if (p%iprcparm(coarse_mat_) == mat_distr_) then + + call psb_dscdec(naggr,icontxt,p%desc_data,info) + call psb_spclone(b,bg,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='spclone') + goto 9999 + end if + call psb_spfree(b,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='spfree') + goto 9999 + end if + + else + + write(0,*) 'Unknown p%iprcparm(coarse_mat) in aggregate_sp',p%iprcparm(coarse_mat_) + end if + + + deallocate(nzbr,idisp) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + + end subroutine raw_aggregate + + + + subroutine smooth_aggregate(info) + use psb_serial_mod + use psb_tools_mod + use psb_error_mod + implicit none + include 'mpif.h' + + integer, intent(out) :: info + + type(psb_dspmat_type), pointer :: bg + type(psb_dspmat_type) :: b + integer, pointer :: nzbr(:), idisp(:), ivall(:) + integer :: icontxt, nrow, nglob, ncol, ntaggr, nzbg, ip, ndx,& + & naggr, np, myprow, mypcol, nprows, npcols,& + & icomm, naggrm1,naggrp1,mtype,i,j,err_act,k,nzl,itemp(1),jtemp(1) + type(psb_dspmat_type), pointer :: am1,am2 + type(psb_dspmat_type) :: am3,am4 + logical :: ml_global_nmb + + logical, parameter :: test_dump=.false. + integer, parameter :: ncmax=16 + real(kind(1.d0)) :: omega, anorm, tmp, dg + real(kind(1.d0)), parameter :: one=1.0d0, zero=0.0d0 + character(len=20) :: name, ch_err + + + name='smooth_aggregate' + info=0 + call psb_erractionsave(err_act) + + icontxt = desc_a%matrix_data(psb_ctxt_) + call blacs_gridinfo(icontxt,nprows,npcols,myprow,mypcol) + + bg => p%av(ac_) + + am2 => p%av(sm_pr_t_) + am1 => p%av(sm_pr_) + + + np = nprows*npcols + nglob = desc_a%matrix_data(m_) + nrow = desc_a%matrix_data(psb_n_row_) + ncol = desc_a%matrix_data(psb_n_col_) + + naggr = p%nlaggr(myprow+1) + ntaggr = sum(p%nlaggr) + + allocate(nzbr(np), idisp(np),stat=info) + + + naggrm1 = sum(p%nlaggr(1:myprow)) + naggrp1 = sum(p%nlaggr(1:myprow+1)) + + ml_global_nmb = ( (p%iprcparm(smth_kind_) == smth_omg_).or.& + & ( (p%iprcparm(smth_kind_) == smth_biz_).and.& + & (p%iprcparm(coarse_mat_) == mat_repl_)) ) + + + if (ml_global_nmb) then + p%mlia(1:nrow) = p%mlia(1:nrow) + naggrm1 + call psb_halo(p%mlia,desc_a,info) + + if(info /= 0) then + call psb_errpush(4010,name,a_err='f90_pshalo') + goto 9999 + end if + end if + + if (aggr_dump) then + open(30+me) + write(30+me,*) '% Aggregation map' + do i=1,ncol + write(30+me,*) i,p%mlia(i) + end do + close(30+me) + end if + + ! naggr: number of local aggregates + ! nrow: local rows. + ! + allocate(p%dorig(nrow),stat=info) + if (info/=0) then + write(0,*) 'Error from allocation',info + endif + + ! Get diagonal D + call psb_spgtdiag(a,p%dorig,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='spgtdiag') + goto 9999 + end if + + where (p%dorig /= zero) + p%dorig = one / p%dorig + elsewhere + p%dorig = one + end where + + + ! 1. Allocate Ptilde in sparse matrix form + call psb_spall(am4,ncol,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='spall') + goto 9999 + end if + + if (ml_global_nmb) then + do i=1,ncol + am4%aspk(i) = one + am4%ia1(i) = i + am4%ia2(i) = p%mlia(i) + end do + am4%infoa(nnz_) = ncol + else + do i=1,nrow + am4%aspk(i) = one + am4%ia1(i) = i + am4%ia2(i) = p%mlia(i) + end do + am4%infoa(nnz_) = nrow + endif + am4%fida='COO' + am4%m=ncol + + if (ml_global_nmb) then + am4%k=ntaggr + else + am4%k=naggr + endif + + + if (test_dump) call & + & csprt(20+me,am4,head='% Operator Ptilde.',ivr=desc_a%loc_to_glob) + + + call psb_ipcoo2csr(am4,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='ipcoo2csr') + goto 9999 + end if + + call psb_spclone(a,am3,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='spclone') + goto 9999 + end if + + ! + ! WARNING: the cycles below assume that AM3 does have + ! its diagonal elements stored explicitly!!! + ! Should we swicth to something safer? + ! + call psb_spscal(am3,p%dorig,info) + if(info /= 0) goto 9999 + + if (p%iprcparm(om_choice_) == lib_choice_) then + + if (p%iprcparm(smth_kind_) == smth_biz_) then + + ! + ! This only works with CSR. + ! + anorm = 0.d0 + do i=1,am3%m + tmp = 0.d0 + do j=am3%ia2(i),am3%ia2(i+1)-1 + if (am3%ia1(j) <= am3%m) then + tmp = tmp + dabs(am3%aspk(j)) + endif + if (am3%ia1(j) == i ) then + dg = dabs(am3%aspk(j)) + end if + end do + anorm = max(anorm,tmp/dg) + enddo + + call dgamx2d(icontxt,'All',' ',1,1,anorm,1,itemp,jtemp,-1,-1,-1) + else + anorm = f90_psnrmi(am3,desc_a,info) + endif + omega = 4.d0/(3.d0*anorm) + p%dprcparm(smooth_omega_) = omega + + else if (p%iprcparm(om_choice_) == user_choice_) then + + omega = p%dprcparm(smooth_omega_) + + else if (p%iprcparm(om_choice_) /= user_choice_) then + write(0,*) me,'Error: invalid choice for OMEGA in blaggrmat?? ',& + & p%iprcparm(om_choice_) + end if + + + if (am3%fida=='CSR') then + do i=1,am3%m + do j=am3%ia2(i),am3%ia2(i+1)-1 + if (am3%ia1(j) == i) then + am3%aspk(j) = one - omega*am3%aspk(j) + else + am3%aspk(j) = - omega*am3%aspk(j) + end if + end do + end do + else if (am3%fida=='COO') then + do j=1,am3%infoa(nnz_) + if (am3%ia1(j) /= am3%ia2(j)) then + am3%aspk(j) = - omega*am3%aspk(j) + else + am3%aspk(j) = one - omega*am3%aspk(j) + endif + end do + else + write(0,*) 'Missing implementation of I sum' + call psb_errpush(4010,name) + goto 9999 + end if + + if (test_dump) call csprt(40+me,am3,head='% (I-wDA)',ivr=desc_a%loc_to_glob,& + & ivc=desc_a%loc_to_glob) + ! + ! Symbmm90 does the allocation for its result. + ! + ! am1 = (i-wDA)Ptilde + ! Doing it this way means to consider diag(Ai) + ! + ! + call symbmm90(am3,am4,am1) + call numbmm90(am3,am4,am1) + + + call psb_spfree(am4,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='spfree') + goto 9999 + end if + + if (ml_global_nmb) then + ! + ! Now we have to gather the halo of am1, and add it to itself + ! to multiply it by A, + ! + call psb_csrovr(am1,desc_a,am4,info,clcnv=.false.) + + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_csrovr') + goto 9999 + end if + + call psb_rwextd(ncol,am1,info,b=am4) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_rwextd') + goto 9999 + end if + + call psb_spfree(am4,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_spfree') + goto 9999 + end if + + else + + call psb_rwextd(ncol,am1,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='rwextd') + goto 9999 + end if + endif + + if (test_dump) & + & call psb_csprt(60+me,am1,head='% (I-wDA)Pt',ivr=desc_a%loc_to_glob) + + call symbmm90(a,am1,am3) + call numbmm90(a,am1,am3) + + if (p%iprcparm(smth_kind_) == smth_omg_) then + call psb_transp(am1,am2,fmt='COO') + nzl = am2%infoa(nnz_) + i=0 + ! + ! Now we have to fix this. The only rows of B that are correct + ! are those corresponding to "local" aggregates, i.e. indices in p%mlia(:) + ! + do k=1, nzl + if ((naggrm1 < am2%ia1(k)) .and.(am2%ia1(k) <= naggrp1)) then + i = i+1 + am2%aspk(i) = am2%aspk(k) + am2%ia1(i) = am2%ia1(k) + am2%ia2(i) = am2%ia2(k) + end if + end do + + am2%infoa(nnz_) = i + call psb_ipcoo2csr(am2,info) + else + call psb_transp(am1,am2) + endif + + if (p%iprcparm(smth_kind_) == smth_omg_) then + ! am2 = ((i-wDA)Ptilde)^T + call psb_csrovr(am3,desc_a,am4,info,clcnv=.false.) + + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_csrovr') + goto 9999 + end if + call psb_rwextd(ncol,am3,info,b=am4) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_rwextd') + goto 9999 + end if + call psb_spfree(am4,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_spfree') + goto 9999 + end if + + else if (p%iprcparm(smth_kind_) == smth_biz_) then + + call psb_rwextd(ncol,am3,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_rwextd') + goto 9999 + end if + endif + + call symbmm90(am2,am3,b) + call numbmm90(am2,am3,b) + +!!$ if (aggr_dump) call csprt(50+me,am1,head='% Operator PTrans.') + call psb_spfree(am3,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_spfree') + goto 9999 + end if + + call psb_ipcsr2coo(b,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='ipcsr2coo') + goto 9999 + end if + + call psb_fixcoo(b,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='fixcoo') + goto 9999 + end if + + + if (test_dump) call psb_csprt(80+me,b,head='% Smoothed aggregate AC.') + + select case(p%iprcparm(smth_kind_)) + + case(smth_omg_) + + select case(p%iprcparm(coarse_mat_)) + + case(mat_distr_) + + call psb_spclone(b,bg,info) + if(info /= 0) goto 9999 + nzbg = bg%infoa(nnz_) + nzl = bg%infoa(nnz_) + + allocate(ivall(ntaggr)) + + i = 1 + do ip=1,nprows + do k=1, p%nlaggr(ip) + ivall(i) = ip + i = i + 1 + end do + end do + call psb_dscall(ntaggr,ivall,icontxt,p%desc_data,info,flag=1) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_dscall') + goto 9999 + end if + + + call psb_dscins(nzl,bg%ia1,bg%ia2,p%desc_data,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_dscins') + goto 9999 + end if + + call psb_dscasb(p%desc_data,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_dscasb') + goto 9999 + end if + + + + call psb_glob_to_loc(bg%ia1(1:nzl),p%desc_data,info,iact='I') + if(info /= 0) then + call psb_errpush(4010,name,a_err='psglob_to_loc') + goto 9999 + end if + + + call psb_glob_to_loc(bg%ia2(1:nzl),p%desc_data,info,iact='I') + if(info /= 0) then + call psb_errpush(4010,name,a_err='psglob_to_loc') + goto 9999 + end if + + + bg%m=p%desc_data%matrix_data(psb_n_row_) + bg%k=p%desc_data%matrix_data(psb_n_col_) + + call psb_spfree(b,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_spfree') + goto 9999 + end if + + + deallocate(ivall,nzbr,idisp) + + ! Split BG=M+N N off-diagonal part + call psb_spall(bg%m,bg%k,p%av(ap_nd_),nzl,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_spall') + goto 9999 + end if + + k=0 + do i=1,nzl + if (bg%ia2(i)>bg%m) then + k = k + 1 + p%av(ap_nd_)%aspk(k) = bg%aspk(i) + p%av(ap_nd_)%ia1(k) = bg%ia1(i) + p%av(ap_nd_)%ia2(k) = bg%ia2(i) + endif + enddo + p%av(ap_nd_)%infoa(nnz_) = k + call psb_ipcoo2csr(p%av(ap_nd_),info) + + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_ipcoo2csr') + goto 9999 + end if + call igsum2d(icontxt,'All',' ',1,1,k,1,-1,-1) + + if (k == 0) then + ! If the off diagonal part is emtpy, there's no point + ! in doing multiple Jacobi sweeps. This is certain + ! to happen when running on a single processor. + p%iprcparm(jac_sweeps_) = 1 + end if + + + if (np>1) then + call psb_spinfo(nztotreq,am1,nzl,info) + call psb_glob_to_loc(am1%ia1(1:nzl),p%desc_data,info,'I') + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_glob_to_loc') + goto 9999 + end if + endif + am1%k=p%desc_data%matrix_data(psb_n_col_) + + if (np>1) then + call psb_ipcsr2coo(am2,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_ipcsr2coo') + goto 9999 + end if + + nzl = am2%infoa(nnz_) + call psb_glob_to_loc(am2%ia1(1:nzl),p%desc_data,info,'I') + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_glob_to_loc') + goto 9999 + end if + + call psb_ipcoo2csr(am2,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_ipcoo2csr') + goto 9999 + end if + end if + am2%m=p%desc_data%matrix_data(psb_n_col_) + + case(mat_repl_) + ! + ! + nzbr(:) = 0 + nzbr(myprow+1) = b%infoa(nnz_) + + call psb_dscrep(ntaggr,icontxt,p%desc_data,info) + + call igsum2d(icontxt,'All',' ',np,1,nzbr,np,-1,-1) + nzbg = sum(nzbr) + call psb_spall(ntaggr,ntaggr,bg,nzbg,info) + if(info /= 0) goto 9999 + + + call blacs_get(icontxt,10,icomm ) + do ip=1,np + idisp(ip) = sum(nzbr(1:ip-1)) + enddo + ndx = nzbr(myprow+1) + + call mpi_allgatherv(b%aspk,ndx,mpi_double_precision,bg%aspk,nzbr,idisp,& + & mpi_double_precision,icomm,info) + call mpi_allgatherv(b%ia1,ndx,mpi_integer,bg%ia1,nzbr,idisp,& + & mpi_integer,icomm,info) + call mpi_allgatherv(b%ia2,ndx,mpi_integer,bg%ia2,nzbr,idisp,& + & mpi_integer,icomm,info) + if(info /= 0) goto 9999 + + + bg%m = ntaggr + bg%k = ntaggr + bg%infoa(nnz_) = nzbg + bg%fida='COO' + bg%descra='G' + call psb_fixcoo(bg,info) + if(info /= 0) goto 9999 + call psb_spfree(b,info) + if(info /= 0) goto 9999 + if (me==0) then + if (test_dump) call psb_csprt(80+me,bg,head='% Smoothed aggregate AC.') + endif + + deallocate(nzbr,idisp) + + case default + write(0,*) 'Inconsistent input in smooth_new_aggregate' + end select + + + case(smth_biz_) + + select case(p%iprcparm(coarse_mat_)) + + case(mat_distr_) + + call psb_spclone(b,bg,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='spclone') + goto 9999 + end if + call psb_dscdec(naggr,icontxt,p%desc_data,info) + + call spfree(b,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='spfree') + goto 9999 + end if + + + case(mat_repl_) + ! + ! + nzbr(:) = 0 + nzbr(myprow+1) = b%infoa(nnz_) + + call psb_dscrep(ntaggr,icontxt,p%desc_data,info) + + + call igsum2d(icontxt,'All',' ',np,1,nzbr,np,-1,-1) + nzbg = sum(nzbr) + call psb_spall(ntaggr,ntaggr,bg,nzbg,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_spall') + goto 9999 + end if + + call blacs_get(icontxt,10,icomm ) + do ip=1,np + idisp(ip) = sum(nzbr(1:ip-1)) + enddo + ndx = nzbr(myprow+1) + + call mpi_allgatherv(b%aspk,ndx,mpi_double_precision,bg%aspk,nzbr,idisp,& + & mpi_double_precision,icomm,info) + call mpi_allgatherv(b%ia1,ndx,mpi_integer,bg%ia1,nzbr,idisp,& + & mpi_integer,icomm,info) + call mpi_allgatherv(b%ia2,ndx,mpi_integer,bg%ia2,nzbr,idisp,& + & mpi_integer,icomm,info) + if(info /= 0) then + info=-1 + call psb_errpush(info,name) + goto 9999 + end if + + + bg%m = ntaggr + bg%k = ntaggr + bg%infoa(nnz_) = nzbg + bg%fida='COO' + bg%descra='G' + call psb_fixcoo(bg,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_fixcoo') + goto 9999 + end if + call psb_spfree(b,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_spfree') + goto 9999 + end if + + end select + deallocate(nzbr,idisp) + + end select + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_errpush(info,name) + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + + + end subroutine smooth_aggregate + + + +end subroutine psb_dbldaggrmat diff --git a/src/prec/psb_dcslu.f90 b/src/prec/psb_dcslu.f90 new file mode 100644 index 00000000..db9d7271 --- /dev/null +++ b/src/prec/psb_dcslu.f90 @@ -0,0 +1,677 @@ +!***************************************************************************** +!* * +!* This is where the action takes place. As you may notice, the only * +!* piece that's really enabled is that for CSR. This is to be fixed. * +!* CSRSETUP does the setup: building the prec descriptor plus retrieving * +!* matrix rows if needed * +!* * +!* * +!* * +!* * +!* some open code does the renumbering * +!* * +!* DSPLU90 does the actual factorization. * +!* * +!* * +!* * +!***************************************************************************** +subroutine psb_dcslu(a,desc_a,p,upd,info) + use psb_serial_mod + use psb_prec_type + use psb_descriptor_type + use psb_spmat_type + use psb_tools_mod + use psb_psblas_mod + use psb_error_mod + implicit none + ! + ! .. Scalar Arguments .. + integer, intent(out) :: info + ! .. array Arguments .. + type(psb_dspmat_type), intent(in), target :: a + type(psb_dbase_prec), intent(inout) :: p + type(psb_desc_type), intent(in) :: desc_a + character, intent(in) :: upd + + ! .. Local Scalars .. + integer :: i, j, jj, k, kk, m + integer :: int_err(5) + character :: trans, unitd + type(psb_dspmat_type) :: blck, atmp + real(kind(1.d0)) :: t1,t2,t3,t4,t5,t6,mpi_wtime, t7, t8 + integer, pointer :: itmp(:), itmp2(:) + real(kind(1.d0)), pointer :: rtmp(:) + external mpi_wtime + logical, parameter :: debugprt=.false., debug=.false., aggr_dump=.false. + integer istpb, istpe, ifctb, ifcte, err_act, irank, icomm, nztota, nztotb,& + & nztmp, nzl, ione, nnr, ir, mglob, mtype, n_row, nrow_a,n_col, nhalo,lovr + integer ::icontxt,nprow,npcol,me,mycol + character(len=20) :: name, ch_err + + info=0 + name='psb_dcslu' + call psb_erractionsave(err_act) + + icontxt=desc_a%matrix_data(psb_ctxt_) + call blacs_gridinfo(icontxt,nprow,npcol,me,mycol) + + m = a%m + if (m < 0) then + info = 10 + int_err(1) = 1 + int_err(2) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + trans = 'N' + unitd = 'U' + if (p%iprcparm(n_ovr_) < 0) then + info = 11 + int_err(1) = 1 + int_err(2) = p%iprcparm(n_ovr_) + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + ! call blacs_gridinfo(icontxt,nprow,npcol,me,mycol) + + + icontxt=desc_a%matrix_data(psb_ctxt_) + call psb_nullify_sp(blck) + t1= mpi_wtime() + + if(debug) write(0,*)me,': calling psb_csrsetup' + call psb_csrsetup(p%iprcparm(p_type_),p%iprcparm(n_ovr_),a,& + & blck,desc_a,upd,p%desc_data,info) + if(info/=0) then + info=4010 + ch_err='psb_csrsetup' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + t2= mpi_wtime() + if(debug) write(0,*)me,': out of psb_csrsetup' + + if (associated(p%av)) then + if (size(p%av) < bp_ilu_avsz) then + do k=1,size(p%av) + call psb_spfree(p%av(k),info) + end do + deallocate(p%av) + p%av => null() + endif + endif + + if (.not.associated(p%av)) then + allocate(p%av(bp_ilu_avsz)) + endif + do k=1,size(p%av) + call psb_nullify_sp(p%av(k)) + end do + nrow_a = desc_a%matrix_data(psb_n_row_) + call psb_spinfo(nztotreq,a,nztota,info) + if(info/=0) then + info=4010 + ch_err='psb_spinfo' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + n_col = desc_a%matrix_data(psb_n_col_) + nhalo = n_col-nrow_a + n_row = p%desc_data%matrix_data(psb_n_row_) + lovr = ((nztota+nrow_a-1)/nrow_a)*nhalo*p%iprcparm(n_ovr_) + p%av(l_pr_)%m = n_row + p%av(l_pr_)%k = n_row + p%av(u_pr_)%m = n_row + p%av(u_pr_)%k = n_row + call psb_spall(n_row,n_row,p%av(l_pr_),nztota+lovr,info) + call psb_spall(n_row,n_row,p%av(u_pr_),nztota+lovr,info) + if(info/=0) then + info=4010 + ch_err='psb_spall' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if (associated(p%d)) then + if (size(p%d) < n_row) then + deallocate(p%d) + endif + endif + if (.not.associated(p%d)) then + allocate(p%d(n_row),stat=info) + endif + + + if (debug) then + write(0,*) me,'Done psb_csrsetup' + call blacs_barrier(icontxt,'All') + endif + +! if (me==0) write(0,*) 'setup time',t2-t1, blck%fida, p%iprcparm(p_type_),blck%m,upd +! write(0,'(i3,1x,a,4(1x,g14.5))') me,' setup time',t2-t1 + + if (p%iprcparm(iren_) > 0) then + + ! + ! Here we allocate a full copy to hold local A and received BLK + ! + + call psb_spinfo(nztotreq,a,nztota,info) + call psb_spinfo(nztotreq,blck,nztotb,info) + call psb_spall(atmp,nztota+nztotb,info) + if(info/=0) then + info=4011 + call psb_errpush(info,name) + goto 9999 + end if + + +! write(0,*) 'DCSLU ',nztota,nztotb,a%m + + + call apply_renum(info) + if(info/=0) then + info=4010 + ch_err='apply_ernum' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + t3 = mpi_wtime() + if (debugprt) then + open(40+me) + call psb_csprt(40+me,atmp,head='% Local matrix') + close(40+me) + endif + if (debug) write(0,*) me,' Factoring rows ',& + &atmp%m,a%m,blck%m,atmp%ia2(atmp%m+1)-1 + + ! + ! Ok, factor the matrix. + ! + t5 = mpi_wtime() + blck%m=0 + call psb_dsplu(atmp,p%av(l_pr_),p%av(u_pr_),p%d,info,blck=blck) + if(info/=0) then + info=4010 + ch_err='psb_dsplu' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + call psb_spfree(atmp,info) + if(info/=0) then + info=4010 + ch_err='psb_spfree' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + + else if (p%iprcparm(iren_) == 0) then + t3 = mpi_wtime() +! ierr = MPE_Log_event( ifctb, 0, "st SIMPLE" ) + ! This is where we have mo renumbering, thus no need + ! for ATMP + + if (debugprt) then + open(40+me) + call psb_csprt(40+me,a,iv=p%desc_data%loc_to_glob,& + & head='% Local matrix') + if (p%iprcparm(p_type_)==asm_) then + call psb_csprt(40+me,blck,iv=p%desc_data%loc_to_glob,& + & irs=a%m,head='% Received rows') + endif + close(40+me) + endif + + + + t5= mpi_wtime() + if (debug) write(0,*) me,' Going for dsplu' + call psb_dsplu(a,p%av(l_pr_),p%av(u_pr_),p%d,info,blck=blck) + if(info/=0) then + info=4010 + ch_err='psb_dsplu' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (debug) write(0,*) me,' Done dsplu' + endif + + + if (debugprt) then + ! + ! Print out the factors on file. + ! + open(80+me) + + call psb_csprt(80+me,p%av(l_pr_),head='% Local L factor') + write(80+me,*) '% Diagonal: ',p%av(l_pr_)%m + do i=1,p%av(l_pr_)%m + write(80+me,*) i,i,p%d(i) + enddo + call psb_csprt(80+me,p%av(u_pr_),head='% Local U factor') + + close(80+me) + endif + + +! ierr = MPE_Log_event( ifcte, 0, "st SIMPLE" ) + t6 = mpi_wtime() +! +! write(0,'(i3,1x,a,3(1x,g18.9))') me,'renum/factor time',t3-t2,t6-t5 +! if (me==0) write(0,'(a,3(1x,g18.9))') 'renum/factor time',t3-t2,t6-t5 + + call psb_spfree(blck,info) + if(info/=0) then + info=4010 + ch_err='psb_spfree' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if (debug) write(0,*) me,'End of cslu' + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +contains + + subroutine apply_renum(info) + + integer, intent(out) :: info + character(len=20) :: name, ch_err + + info=0 + name='apply_renum' + call psb_erractionsave(err_act) + +!!!!!!!!!!!!!!!! CHANGE FOR NON-CSR A + ! + ! Renumbering type: + ! 1. Global column indices + ! (2. GPS band reduction disabled for the time being) + + if (p%iprcparm(iren_)==1) then + atmp%m = a%m + blck%m + atmp%k = a%k + atmp%fida='CSR' + atmp%descra = 'GUN' + + ! This is the renumbering coherent with global indices.. + mglob = desc_a%matrix_data(m_) + ! + ! Remember: we have switched IA1=COLS and IA2=ROWS + ! Now identify the set of distinct local column indices + ! + + nnr = p%desc_data%matrix_data(psb_n_row_) + allocate(p%perm(nnr),p%invperm(nnr),itmp2(nnr)) + do k=1,nnr + itmp2(k) = p%desc_data%loc_to_glob(k) + enddo + ! + ! We want: NEW(I) = OLD(PERM(I)) + ! + call isrx(nnr,itmp2,p%perm) + + do k=1, nnr + p%invperm(p%perm(k)) = k + enddo + t3 = mpi_wtime() + + ! Build ATMP with new numbering. + + allocate(itmp(max(8,atmp%m+2,nztmp+2)),rtmp(atmp%m)) + + j = 1 + atmp%ia2(1) = 1 + do i=1, atmp%m + ir = p%perm(i) + + if (ir <= a%m ) then + + nzl = a%ia2(ir+1) - a%ia2(ir) + if (nzl > size(rtmp)) then + call psb_realloc(nzl,rtmp,info) + if(info/=0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + endif + jj = a%ia2(ir) + k=0 + do kk=1, nzl + if (a%ia1(jj+kk-1)<=atmp%m) then + k = k + 1 + rtmp(k) = a%aspk(jj+kk-1) + atmp%ia1(j+k-1) = p%invperm(a%ia1(jj+kk-1)) + endif + enddo + call isrx(k,atmp%ia1(j:j+k-1),itmp2) + do kk=1,k + atmp%aspk(j+kk-1) = rtmp(itmp2(kk)) + enddo + + else if (ir <= atmp%m ) then + + ir = ir - a%m + nzl = blck%ia2(ir+1) - blck%ia2(ir) + if (nzl > size(rtmp)) then + call psb_realloc(nzl,rtmp,info) + if(info/=0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + endif + jj = blck%ia2(ir) + k=0 + do kk=1, nzl + if (blck%ia1(jj+kk-1)<=atmp%m) then + k = k + 1 + rtmp(k) = blck%aspk(jj+kk-1) + atmp%ia1(j+k-1) = p%invperm(blck%ia1(jj+kk-1)) + endif + enddo + call isrx(k,atmp%ia1(j:j+k-1),itmp2) + do kk=1,k + atmp%aspk(j+kk-1) = rtmp(itmp2(kk)) + enddo + + else + write(0,*) 'Row index error 1 :',i,ir + endif + + j = j + k + atmp%ia2(i+1) = j + + enddo + + t4 = mpi_wtime() + + + deallocate(itmp,itmp2,rtmp) + + else if (p%iprcparm(iren_)==2) then + + atmp%m = a%m + blck%m + atmp%k = a%k + atmp%fida='CSR' + atmp%descra = 'GUN' + do i=1, a%m + atmp%ia2(i) = a%ia2(i) + do j= a%ia2(i), a%ia2(i+1)-1 + atmp%ia1(j) = a%ia1(j) + enddo + enddo + atmp%ia2(a%m+1) = a%ia2(a%m+1) + + if (blck%m>0) then + do i=1, blck%m + atmp%ia2(a%m+i) = nztota+blck%ia2(i) + do j= blck%ia2(i), blck%ia2(i+1)-1 + atmp%ia1(nztota+j) = blck%ia1(j) + enddo + enddo + atmp%ia2(atmp%m+1) = nztota+blck%ia2(blck%m+1) + endif + nztmp = atmp%ia2(atmp%m+1) - 1 + + + ! This is a renumbering with Gibbs-Poole-Stockmeyer + ! band reduction. Switched off for now. To be fixed, + ! gps_reduction should get p%perm. + +! write(0,*) me,' Renumbering: realloc perms',atmp%m + call psb_realloc(atmp%m,p%perm,info) + if(info/=0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + call psb_realloc(atmp%m,p%invperm,info) + if(info/=0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + allocate(itmp(max(8,atmp%m+2,nztmp+2))) + itmp(1:8) = 0 +! write(0,*) me,' Renumbering: Calling Metis' +! call blacs_barrier(icontxt,'All') + ione = 1 +! write(0,*) size(p%av(u_pr_)%pl),size(p%av(l_pr_)%pr) + call gps_reduction(atmp%m,atmp%ia2,atmp%ia1,p%perm,p%invperm,info) + if(info/=0) then + info=4010 + ch_err='gps_reduction' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + +! write(0,*) me,' Renumbering: Done GPS' + call blacs_barrier(icontxt,'All') + do i=1, atmp%m + if (p%perm(i) /= i) then + write(0,*) me,' permutation is not identity ' + exit + endif + enddo + + + + do k=1, nnr + p%invperm(p%perm(k)) = k + enddo + t3 = mpi_wtime() + + ! Build ATMP with new numbering. + + allocate(itmp2(max(8,atmp%m+2,nztmp+2)),rtmp(atmp%m)) + + j = 1 + atmp%ia2(1) = 1 + do i=1, atmp%m + ir = p%perm(i) + + if (ir <= a%m ) then + + nzl = a%ia2(ir+1) - a%ia2(ir) + if (nzl > size(rtmp)) then + call psb_realloc(nzl,rtmp,info) + if(info/=0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + endif + jj = a%ia2(ir) + k=0 + do kk=1, nzl + if (a%ia1(jj+kk-1)<=atmp%m) then + k = k + 1 + rtmp(k) = a%aspk(jj+kk-1) + atmp%ia1(j+k-1) = p%invperm(a%ia1(jj+kk-1)) + endif + enddo + call isrx(k,atmp%ia1(j:j+k-1),itmp2) + do kk=1,k + atmp%aspk(j+kk-1) = rtmp(itmp2(kk)) + enddo + + else if (ir <= atmp%m ) then + + ir = ir - a%m + nzl = blck%ia2(ir+1) - blck%ia2(ir) + if (nzl > size(rtmp)) then + call psb_realloc(nzl,rtmp,info) + if(info/=0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + endif + jj = blck%ia2(ir) + k=0 + do kk=1, nzl + if (blck%ia1(jj+kk-1)<=atmp%m) then + k = k + 1 + rtmp(k) = blck%aspk(jj+kk-1) + atmp%ia1(j+k-1) = p%invperm(blck%ia1(jj+kk-1)) + endif + enddo + call isrx(k,atmp%ia1(j:j+k-1),itmp2) + do kk=1,k + atmp%aspk(j+kk-1) = rtmp(itmp2(kk)) + enddo + + else + write(0,*) 'Row index error 1 :',i,ir + endif + + j = j + k + atmp%ia2(i+1) = j + + enddo + + t4 = mpi_wtime() + + + + deallocate(itmp,itmp2,rtmp) + + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + + end subroutine apply_renum + + + subroutine gps_reduction(m,ia,ja,perm,iperm,info) + integer i,j,dgConn,Npnt,m + integer n,idpth,ideg,ibw2,ipf2 + integer,dimension(:) :: perm,iperm,ia,ja + integer, intent(out) :: info + + integer,dimension(:,:),allocatable::NDstk + integer,dimension(:),allocatable::iOld,renum,ndeg,lvl,lvls1,lvls2,ccstor + !--- Per la common area. + + common /gra/ n,iDpth,iDeg + + character(len=20) :: name, ch_err + + info=0 + name='gps_reduction' + call psb_erractionsave(err_act) + + + !--- Calcolo il massimo grado di connettivita'. + npnt = m + write(6,*) ' GPS su ',npnt + dgConn=0 + do i=1,m + dgconn = max(dgconn,(ia(i+1)-ia(i))) + enddo + !--- Il max valore di connettivita' e "dgConn" + + !--- Valori della common + n=Npnt !--- Numero di righe + iDeg=dgConn !--- Massima connettivita' +! iDpth= !--- Numero di livelli non serve settarlo + + allocate(NDstk(Npnt,dgConn),stat=info) + if (info/=0) then + info=4000 + call psb_errpush(info,name) + goto 9999 + else + write(0,*) 'gps_reduction first alloc OK' + endif + allocate(iOld(Npnt),renum(Npnt+1),ndeg(Npnt),lvl(Npnt),lvls1(Npnt),& + &lvls2(Npnt),ccstor(Npnt),stat=info) + if (info/=0) then + info=4000 + call psb_errpush(info,name) + goto 9999 + else + write(0,*) 'gps_reduction 2nd alloc OK' + endif + + !--- Prepariamo il grafo della matrice + Ndstk(:,:)=0 + do i=1,Npnt + k=0 + do j = ia(i),ia(i+1) - 1 + if ((1<=ja(j)).and.( ja( j ) /= i ).and.(ja(j)<=npnt)) then + k = k+1 + Ndstk(i,k)=ja(j) + endif + enddo + ndeg(i)=k + enddo + + !--- Numerazione. + do i=1,Npnt + iOld(i)=i + enddo + write(0,*) 'gps_red : Preparation done' + !--- + !--- Chiamiamo funzione reduce. + call reduce(Ndstk,Npnt,iOld,renum,ndeg,lvl,lvls1, lvls2,ccstor,ibw2,ipf2) + write(0,*) 'gps_red : Done reduce' + !--- Permutazione + perm(1:Npnt)=renum(1:Npnt) + !--- Inversa permutazione + do i=1,Npnt + iperm(perm(i))=i + enddo + !--- Puliamo tutto. + deallocate(NDstk,iOld,renum,ndeg,lvl,lvls1,lvls2,ccstor) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + + end subroutine gps_reduction + + +end subroutine psb_dcslu + + diff --git a/src/prec/psb_dcsrsetup.f90 b/src/prec/psb_dcsrsetup.f90 new file mode 100644 index 00000000..be15193a --- /dev/null +++ b/src/prec/psb_dcsrsetup.f90 @@ -0,0 +1,215 @@ +!***************************************************************************** +!* * +!* This routine does two things: * +!* 1. Builds the auxiliary descriptor. This is always done even for * +!* Block Jacobi. * +!* 2. Retrieves the remote matrix pieces. * +!* * +!* All of 1. is done under f90_dscov, which is independent of CSR, and * +!* has been placed in the TOOLS directory because it might be used for * +!* building a descriptor for an extended stencil in a PDE solver without * +!* necessarily applying AS precond. * +!* * +!* * +!* * +!* * +!* * +!***************************************************************************** +Subroutine psb_dcsrsetup(ptype,novr,a,blk,desc_data,upd,desc_p,info,outfmt) + + use psb_serial_mod + use psb_descriptor_type + Use psb_prec_type + use psb_tools_mod + use psb_const_mod + use psb_error_mod + Implicit None + + ! .. Array Arguments .. + integer, intent(in) :: ptype,novr + Type(psb_dspmat_type), Intent(in) :: a + Type(psb_dspmat_type), Intent(inout) :: blk + integer, intent(out) :: info + Type(psb_desc_type), Intent(inout) :: desc_p + Type(psb_desc_type), Intent(in) :: desc_data + Character, Intent(in) :: upd + character(len=5), optional :: outfmt + + + real(kind(1.d0)) :: t1,t2,t3,mpi_wtime + external mpi_wtime + integer idscb,idsce,iovrb,iovre, err, irank, icomm + + ! .. Local Scalars .. + Integer :: k, tot_elem,proc,& + & point,nprow,npcol, me, mycol, start,m,nnzero,& + & icontxt, lovr, n_col, linp,ier,n,int_err(5),& + & tot_recv, ircode, n_row, nztot,nhalo, nrow_a,err_act + Logical,Parameter :: debug=.false., debugprt=.false. + character(len=20) :: name, ch_err + name='psb_dcsrsetup' + info=0 + call psb_erractionsave(err_act) + + If(debug) Write(0,*)'IN DCSRSETUP ', upd + icontxt=desc_data%matrix_data(psb_ctxt_) + tot_recv=0 + + nrow_a = desc_data%matrix_data(psb_n_row_) + nnzero = Size(a%aspk) + n_col = desc_data%matrix_data(psb_n_col_) + nhalo = n_col-nrow_a + + + If (ptype == bja_) Then + ! + ! Block Jacobi. Copy the descriptor, just in case we want to + ! do the renumbering. + ! + call psb_spall(0,0,blk,1,info) + if(info /= 0) then + info=4010 + ch_err='psb_spall' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + blk%fida = 'COO' + blk%infoa(nnz_) = 0 + + If (upd == 'F') Then + call psb_dsccpy(desc_p,desc_data,info) + if(info /= 0) then + info=4010 + ch_err='psb_ddsccpy' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + endif + + Else If (ptype == asm_) Then + + + ! + ! Additive Schwarz variant. + ! + ! + + icontxt=desc_data%matrix_data(psb_ctxt_) + + if (novr < 0) then + info=3 + int_err(1)=novr + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + if (novr == 0) then + ! + ! This is really just Block Jacobi..... + ! + call psb_spall(0,0,blk,1,info) + if(info /= 0) then + info=4010 + ch_err='psb_spall' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + blk%fida='COO' + blk%infoa(nnz_)=0 + if (debug) write(0,*) 'Calling desccpy' + if (upd == 'F') then + call psb_dsccpy(desc_p,desc_data,info) + if(info /= 0) then + info=4010 + ch_err='psb_dsccpy' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (debug) write(0,*) 'Early return from dcsrsetup: P>=3 N_OVR=0' + endif + return + endif + + call blacs_get(icontxt,10,icomm ) +!!$ call MPI_Comm_rank(icomm,irank,ierr) +!!$ idscb = mpe_log_get_event_number() +!!$ idsce = mpe_log_get_event_number() +!!$ iovrb = mpe_log_get_event_number() +!!$ iovre = mpe_log_get_event_number() +!!$ if (irank==0) then +!!$ info = mpe_describe_state(idscb,idsce,"DSCASB ","NavyBlue") +!!$ info = mpe_describe_state(iovrb,iovre,"DSCOVR ","DeepPink") +!!$ endif +!!$ + + Call blacs_gridinfo(icontxt,nprow,npcol,me,mycol) + If(debug)Write(0,*)'BEGIN dcsrsetup',me,upd + t1 = mpi_wtime() + + + If (upd == 'F') Then + ! + ! Build the auiliary descriptor',desc_p%matrix_data(psb_n_row_) + ! + call psb_dscov(a,desc_data,novr,desc_p,info) + if(info /= 0) then + info=4010 + ch_err='psb_dscov' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + Endif + + if(debug) write(0,*) me,' From dscov _:',desc_p%matrix_data(psb_n_row_),desc_p%matrix_data(psb_n_col_) + + + n_row = desc_p%matrix_data(psb_n_row_) + t2 = mpi_wtime() + + if (debug) write(0,*) 'Before dcsrovr ',blk%fida,blk%m,nnz_,blk%infoa(nnz_) +!!$ ierr = MPE_Log_event( iovrb, 0, "st OVR" ) +!!$ blk%m = n_row-nrow_a +!!$ blk%k = n_row + + if (present(outfmt)) then + if(debug) write(0,*) me,': Calling CSROVR with ',size(blk%ia2) + Call psb_csrovr(a,desc_p,blk,info,outfmt=outfmt) + else + if(debug) write(0,*) me,': Calling CSROVR with ',size(blk%ia2) + Call psb_csrovr(a,desc_p,blk,info) + end if + + + if(info /= 0) then + info=4010 + ch_err='psb_csrovr' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if (debug) write(0,*) 'After psb_dcsrovr ',blk%fida,blk%m,nnz_,blk%infoa(nnz_) +!!$ ierr = MPE_Log_event( iovre, 0, "ed OVR" ) + + t3 = mpi_wtime() + if (debugprt) then + open(40+me) + call psb_csprt(40+me,blk,head='% Ovrlap rows') + close(40+me) + endif + + + End If + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + Return + +End Subroutine psb_dcsrsetup + diff --git a/src/prec/psb_dgenaggrmap.f90 b/src/prec/psb_dgenaggrmap.f90 new file mode 100644 index 00000000..fd1a8757 --- /dev/null +++ b/src/prec/psb_dgenaggrmap.f90 @@ -0,0 +1,258 @@ +subroutine psb_dgenaggrmap(aggr_type,a,desc_a,nlaggr,ilaggr,info) + use psb_spmat_type + use psb_serial_mod + use psb_desc_type + use psb_error_mod + implicit none + integer, intent(in) :: aggr_type + type(psb_dspmat_type), intent(in) :: a + type(psb_desc_type), intent(in) :: desc_a + integer, pointer :: ilaggr(:),nlaggr(:) + integer, intent(out) :: info + ! Locals + integer, pointer :: ils(:), neigh(:) + integer :: icnt,nlp,k,n,ia,isz,nr, naggr,i,j,m + + logical :: recovery + logical, parameter :: debug=.false. + integer ::icontxt,nprow,npcol,me,mycol,err_act + integer :: nrow, ncol, n_ne + integer, parameter :: one=1, two=2 + character(len=20) :: name, ch_err + + info=0 + name = 'psb_bldaggrmat' + call psb_erractionsave(err_act) + ! + ! Note. At the time being we are ignoring aggr_type + ! so that we only have local decoupled aggregation. This might + ! change in the future. + ! + icontxt=desc_a%matrix_data(psb_ctxt_) + call blacs_gridinfo(icontxt,nprow,npcol,me,mycol) + nrow = desc_a%matrix_data(psb_n_row_) + ncol = desc_a%matrix_data(psb_n_col_) + + + nr = a%m + allocate(ilaggr(nr),neigh(nr),stat=info) + if(info.ne.0) then + info=4000 + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + do i=1, nr + ilaggr(i) = -(nr+1) + end do + ! Note: -(nr+1) Untouched as yet + ! -i 1<=i<=nr Adjacent to aggregate i + ! i 1<=i<=nr Belonging to aggregate i + + ! + ! Phase one: group nodes together. + ! Very simple minded strategy. + ! + naggr = 0 + nlp = 0 + do + icnt = 0 + do i=1, nr + if (ilaggr(i) == -(nr+1)) then + ! + ! 1. Untouched nodes are marked >0 together + ! with their neighbours + ! + icnt = icnt + 1 + naggr = naggr + 1 + ilaggr(i) = naggr + + call psb_neigh(a,i,neigh,n_ne,info,lev=one) + if (info/=0) then + info=4010 + ch_err='psb_neigh' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + do k=1, n_ne + j = neigh(k) + if ((1<=j).and.(j<=nr)) then + ilaggr(j) = naggr +!!$ if (ilaggr(j) < 0) ilaggr(j) = naggr +!!$ if (ilaggr(j) == -(nr+1)) ilaggr(j) = naggr + endif + enddo + ! + ! 2. Untouched neighbours of these nodes are marked <0. + ! + call psb_neigh(a,i,neigh,n_ne,info,lev=two) + if (info/=0) then + info=4010 + ch_err='psb_neigh' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + do n = 1, n_ne + m = neigh(n) + if ((1<=m).and.(m<=nr)) then + if (ilaggr(m) == -(nr+1)) ilaggr(m) = -naggr + endif + enddo + endif + enddo + nlp = nlp + 1 + if (icnt == 0) exit + enddo + if (debug) then + write(0,*) 'Check 1:',count(ilaggr == -(nr+1)),(a%ia1(i),i=a%ia2(1),a%ia2(2)-1) + end if + + ! + ! Phase two: sweep over leftovers. + ! + allocate(ils(naggr+10),stat=info) + if(info.ne.0) then + info=4000 + call psb_errpush(info,name) + goto 9999 + end if + + do i=1, size(ils) + ils(i) = 0 + end do + do i=1, nr + n = ilaggr(i) + if (n>0) then + if (n>naggr) then + write(0,*) 'loc_Aggregate: n > naggr 1 ? ',n,naggr + else + ils(n) = ils(n) + 1 + end if + + end if + end do + if (debug) then + write(0,*) 'Phase 1: number of aggregates ',naggr + write(0,*) 'Phase 1: nodes aggregated ',sum(ils) + end if + + recovery=.false. + do i=1, nr + if (ilaggr(i) < 0) then + ! + ! Now some silly rule to break ties: + ! Group with smallest adjacent aggregate. + ! + isz = nr+1 + ia = -1 + + call psb_neigh(a,i,neigh,n_ne,info,lev=one) + if (info/=0) then + info=4010 + ch_err='psb_neigh' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + do j=1, n_ne + k = neigh(j) + if ((1<=k).and.(k<=nr)) then + n = ilaggr(k) + if (n>0) then + if (n>naggr) then + write(0,*) 'loc_Aggregate: n > naggr 2? ',n,naggr + end if + + if (ils(n) < isz) then + ia = n + isz = ils(n) + endif + endif + endif + enddo + if (ia == -1) then + if (ilaggr(i) > -(nr+1)) then + ilaggr(i) = abs(ilaggr(i)) + if (ilaggr(I)>naggr) then + write(0,*) 'loc_Aggregate: n > naggr 3? ',ilaggr(i),naggr + end if + ils(ilaggr(i)) = ils(ilaggr(i)) + 1 + ! + ! This might happen if the pattern is non symmetric. + ! Need a better handling. + ! + recovery = .true. + else + write(0,*) 'Unrecoverable error !!',ilaggr(i), nr + endif + else + ilaggr(i) = ia + if (ia>naggr) then + write(0,*) 'loc_Aggregate: n > naggr 4? ',ia,naggr + end if + + ils(ia) = ils(ia) + 1 + endif + end if + enddo + if (recovery) then + write(0,*) 'Had to recover from strange situation in loc_aggregate.' + write(0,*) 'Perhaps an unsymmetric pattern?' + endif + if (debug) then + write(0,*) 'Phase 2: number of aggregates ',naggr + write(0,*) 'Phase 2: nodes aggregated ',sum(ils) + do i=1, naggr + write(*,*) 'Size of aggregate ',i,' :',count(ilaggr==i), ils(i) + enddo + write(*,*) maxval(ils(1:naggr)) + write(*,*) 'Leftovers ',count(ilaggr<0), ' in ',nlp,' loops' + end if + +!!$ write(0,*) 'desc_a loc_aggr 4 : ', desc_a%matrix_data(m_) + if (count(ilaggr<0) >0) then + write(0,*) 'Fatal error: some leftovers!!!' + endif + + deallocate(ils,neigh,stat=info) + if (info/=0) then + info=4000 + call psb_errpush(info,name) + goto 9999 + end if + + if (nrow /= size(ilaggr)) then + write(0,*) 'SOmething wrong ilaggr ',nrow,size(ilaggr) + endif + call psb_realloc(ncol,ilaggr,info) + if (info/=0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + allocate(nlaggr(nprow),stat=info) + if (info/=0) then + info=4000 + call psb_errpush(info,name) + goto 9999 + end if + + nlaggr(:) = 0 + nlaggr(me+1) = naggr + call igsum2d(icontxt,'All',' ',nprow,1,nlaggr,nprow,-1,-1) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end subroutine psb_dgenaggrmap diff --git a/src/prec/psb_dprec.f90 b/src/prec/psb_dprec.f90 new file mode 100644 index 00000000..abc51c9d --- /dev/null +++ b/src/prec/psb_dprec.f90 @@ -0,0 +1,910 @@ +subroutine psb_dprecaply(prec,x,y,desc_data,info,trans, work) + + use psb_serial_mod + use psb_descriptor_type + use psb_prec_type + use psb_psblas_mod + use psb_const_mod + use psb_error_mod + implicit none + + type(psb_desc_type),intent(in) :: desc_data + type(psb_dprec_type), intent(in) :: prec + real(kind(0.d0)),intent(inout) :: x(:), y(:) + integer, intent(out) :: info + character(len=1), optional :: trans + real(kind(0.d0)),intent(inout), optional, target :: work(:) + + ! Local variables + character ::trans_ + real(kind(1.d0)), pointer :: work_(:) + integer :: icontxt,nprow,npcol,me,mycol,err_act + logical,parameter :: debug=.false., debugprt=.false. + real(kind(1.d0)), parameter :: one=1.d0, zero=0.d0 + external mpi_wtime + character(len=20) :: name, ch_err + name='psb_dprecaply' + info = 0 + call psb_erractionsave(err_act) + + icontxt=desc_data%matrix_data(psb_ctxt_) + call blacs_gridinfo(icontxt,nprow,npcol,me,mycol) + + if (present(trans)) then + trans_=trans + else + trans_='N' + end if + + if (present(work)) then + work_ => work + else + allocate(work_(4*desc_data%matrix_data(psb_n_col_))) + end if + + if (.not.(associated(prec%baseprecv))) then + write(0,*) 'Inconsistent preconditioner: neither SMTH nor BASE?' + end if + if (size(prec%baseprecv) >1) then + call psb_dmlprcaply(prec%baseprecv,x,zero,y,desc_data,trans_,work_,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_dmlprcaply') + goto 9999 + end if + + else if (size(prec%baseprecv) == 1) then + call psb_dbaseprcaply(prec%baseprecv(1),x,zero,y,desc_data,trans_, work_,info) + else + write(0,*) 'Inconsistent preconditioner: size of baseprecv???' + endif + + if (present(work)) then + else + deallocate(work_) + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end subroutine psb_dprecaply + + + +subroutine psb_dbaseprcaply(prec,x,beta,y,desc_data,trans,work,info) + ! + ! Compute Y <- beta*Y + K^-1 X + ! where K is a a basic preconditioner stored in prec + ! + + use psb_serial_mod + use psb_descriptor_type + use psb_prec_type + use psb_psblas_mod + use psb_const_mod + use psb_error_mod + implicit none + + type(psb_desc_type),intent(in) :: desc_data + type(psb_dbase_prec), intent(in) :: prec + real(kind(0.d0)),intent(inout) :: x(:), y(:) + real(kind(0.d0)),intent(in) :: beta + character(len=1) :: trans + real(kind(0.d0)),intent(inout),target :: work(:) + integer, intent(out) :: info + + ! Local variables + integer :: n_row,n_col + real(kind(1.d0)), pointer :: ww(:), aux(:), tx(:),ty(:) + character ::diagl, diagu + integer :: icontxt,nprow,npcol,me,mycol,i, isz, nrg, err_act + real(kind(1.d0)) :: t1, t2, t3, t4, t5, t6, t7, mpi_wtime + logical,parameter :: debug=.false., debugprt=.false. + real(kind(1.d0)), parameter :: one=1.d0, zero=0.d0 + external mpi_wtime + character(len=20) :: name, ch_err + + name='psb_dbaseprcaply' + info = 0 + call psb_erractionsave(err_act) + + icontxt=desc_data%matrix_data(psb_ctxt_) + call blacs_gridinfo(icontxt,nprow,npcol,me,mycol) + + diagl='U' + diagu='U' + + select case(trans) + case('N','n') + case('T','t','C','c') + case default + info=40 + int_err(1)=6 + ch_err(2)=trans + goto 9999 + end select + + select case(prec%iprcparm(p_type_)) + + case(noprec_) + + n_row=desc_data%matrix_data(psb_n_row_) + if (beta==zero) then + y(1:n_row) = x(1:n_row) + else if (beta==one) then + y(1:n_row) = x(1:n_row) + y(1:n_row) + else if (beta==-one) then + y(1:n_row) = x(1:n_row) - y(1:n_row) + else + y(1:n_row) = x(1:n_row) + beta*y(1:n_row) + end if + + case(diagsc_) + + n_row=desc_data%matrix_data(psb_n_row_) + if (beta==zero) then + y(1:n_row) = x(1:n_row)*prec%d(1:n_row) + else if (beta==one) then + y(1:n_row) = x(1:n_row)*prec%d(1:n_row) + y(1:n_row) + else if (beta==-one) then + y(1:n_row) = x(1:n_row)*prec%d(1:n_row) - y(1:n_row) + else + y(1:n_row) = x(1:n_row)*prec%d(1:n_row) + beta*y(1:n_row) + end if + + case(bja_) + + call psb_bjacaply(prec,x,beta,y,desc_data,trans,work,info) + if(info.ne.0) then + info=4010 + ch_err=psb_bjacaply + goto 9999 + end if + + case(asm_,ras_,ash_,rash_) + + ! Note: currently trans is unused. + n_row=prec%desc_data%matrix_data(psb_n_row_) + n_col=prec%desc_data%matrix_data(psb_n_col_) + + isz=max(n_row,N_COL) + if ((6*isz) <= size(work)) then + ww => work(1:isz) + tx => work(isz+1:2*isz) + ty => work(2*isz+1:3*isz) + aux => work(3*isz+1:) + else if ((4*isz) <= size(work)) then + aux => work(1:) + allocate(ww(isz),tx(isz),ty(isz)) + else if ((3*isz) <= size(work)) then + ww => work(1:isz) + tx => work(isz+1:2*isz) + ty => work(2*isz+1:3*isz) + allocate(aux(4*isz)) + else + allocate(ww(isz),tx(isz),ty(isz),& + &aux(4*isz)) + endif + + if (debug) write(0,*)' vdiag: ',prec%d(:) + if (debug) write(0,*) 'Bi-CGSTAB with Additive Schwarz prec' + + tx(1:desc_data%matrix_data(psb_n_row_)) = x(1:desc_data%matrix_data(psb_n_row_)) + tx(desc_data%matrix_data(psb_n_row_)+1:isz) = zero + + if (prec%iprcparm(restr_)==halo_) then + call psb_halo(tx,prec%desc_data,info,work=aux) + if(info /=0) then + info=4010 + ch_err='psb_halo' + goto 9999 + end if + else if (prec%iprcparm(restr_) /= none_) then + write(0,*) 'Problem in PRCAPLY: Unknown value for restriction ',& + &prec%iprcparm(restr_) + end if + + if (prec%iprcparm(iren_)>0) then + call psb_dgelp('N',n_row,1,prec%perm,tx,isz,ww,isz,info) + if(info /=0) then + info=4010 + ch_err='psb_dgelp' + goto 9999 + end if + endif + + call psb_bjacaply(prec,tx,zero,ty,prec%desc_data,trans,aux,info) + + if (prec%iprcparm(iren_)>0) then + call psb_dgelp('N',n_row,1,prec%invperm,ty,isz,ww,isz,info) + if(info /=0) then + info=4010 + ch_err='psb_dgelp' + goto 9999 + end if + endif + + select case (prec%iprcparm(prol_)) + + case(none_) + ! Would work anyway, but since it's supposed to do nothing... + ! call f90_psovrl(ty,prec%desc_data,update_type=prec%a_restrict) + + case(sum_,avg_) + call psb_ovrl(ty,prec%desc_data,info,& + & update_type=prec%iprcparm(prol_),work=aux) + if(info /=0) then + info=4010 + ch_err='psb_ovrl' + goto 9999 + end if + + case default + write(0,*) 'Problem in PRCAPLY: Unknown value for prolongation ',& + & prec%iprcparm(prol_) + end select + + if (beta == zero) then + y(1:desc_data%matrix_data(psb_n_row_)) = ty(1:desc_data%matrix_data(psb_n_row_)) + else if (beta == one) then + y(1:desc_data%matrix_data(psb_n_row_)) = y(1:desc_data%matrix_data(psb_n_row_)) + & + & ty(1:desc_data%matrix_data(psb_n_row_)) + else if (beta == -one) then + y(1:desc_data%matrix_data(psb_n_row_)) = -y(1:desc_data%matrix_data(psb_n_row_)) + & + & ty(1:desc_data%matrix_data(psb_n_row_)) + else + y(1:desc_data%matrix_data(psb_n_row_)) = beta*y(1:desc_data%matrix_data(psb_n_row_)) + & + & ty(1:desc_data%matrix_data(psb_n_row_)) + end if + + + if ((6*isz) <= size(work)) then + else if ((4*isz) <= size(work)) then + deallocate(ww,tx,ty) + else if ((3*isz) <= size(work)) then + deallocate(aux) + else + deallocate(ww,aux,tx,ty) + endif + + case default + write(0,*) 'Invalid PRE%PREC ',prec%iprcparm(p_type_),':',& + & min_prec_,noprec_,diagsc_,bja_,& + & asm_,ras_,ash_,rash_ + end select + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_errpush(info,name,i_err=int_err=a_err=ch_err) + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end subroutine psb_dbaseprcaply + + + +subroutine psb_dbjacaply(prec,x,beta,y,desc_data,trans,work,info) + ! + ! Compute Y <- beta*Y + K^-1 X + ! where K is a a Block Jacobi preconditioner stored in prec + ! Note that desc_data may or may not be the same as prec%desc_data, + ! but since both are INTENT(IN) this should be legal. + ! + + use psb_serial_mod + use psb_descriptor_type + use psb_prec_type + use psb_psblas_mod + use psb_const_mod + use psb_error_mod + implicit none + + type(psb_desc_type), intent(in) :: desc_data + type(psb_dbase_prec), intent(in) :: prec + real(kind(0.d0)),intent(inout) :: x(:), y(:) + real(kind(0.d0)),intent(in) :: beta + character(len=1) :: trans + real(kind(0.d0)),intent(inout),target :: work(:) + integer, intent(out) :: info + + ! Local variables + integer :: n_row,n_col + real(kind(1.d0)), pointer :: ww(:), aux(:), tx(:),ty(:),tb(:) + character ::diagl, diagu + integer :: icontxt,nprow,npcol,me,mycol,i, isz, nrg, err_act + real(kind(1.d0)) :: t1, t2, t3, t4, t5, t6, t7, mpi_wtime + logical,parameter :: debug=.false., debugprt=.false. + real(kind(1.d0)), parameter :: one=1.d0, zero=0.d0 + external mpi_wtime + character(len=20) :: name, ch_err + name='psb_dbjacaply' + info = 0 + call psb_erractionsave(err_act) + + icontxt=desc_data%matrix_data(psb_ctxt_) + call blacs_gridinfo(icontxt,nprow,npcol,me,mycol) + + diagl='U' + diagu='U' + + select case(trans) + case('N','n') + case('T','t','C','c') + case default + call psb_errpush(40,name) + goto 9999 + end select + + + n_row=desc_data%matrix_data(psb_n_row_) + n_col=desc_data%matrix_data(psb_n_col_) + + if (n_col <= size(work)) then + ww => work(1:n_col) + if ((4*n_col+n_col) <= size(work)) then + aux => work(n_col+1:) + else + allocate(aux(4*n_col)) + endif + else + allocate(ww(n_col),aux(4*n_col)) + endif + + + if (prec%iprcparm(jac_sweeps_) == 1) then + + + select case(prec%iprcparm(f_type_)) + case(f_ilu_n_,f_ilu_e_) + + select case(trans) + case('N','n') + + call psb_spsm(one,prec%av(l_pr_),x,zero,ww,desc_data,info,& + & trans='N',unit=diagl,choice=none_,work=aux) + ww(1:n_row) = ww(1:n_row)*prec%d(1:n_row) + call psb_spsm(one,prec%av(u_pr_),ww,beta,y,desc_data,info,& + & trans='N',unit=diagu,choice=none_, work=aux) + if(info /=0) goto 9999 + + case('T','t','C','c') + call psb_spsm(one,prec%av(u_pr_),x,zero,ww,desc_data,info,& + & trans=trans,unit=diagu,choice=none_, work=aux) + ww(1:n_row) = ww(1:n_row)*prec%d(1:n_row) + call psb_spsm(one,prec%av(l_pr_),ww,beta,y,desc_data,info,& + & trans=trans,unit=diagl,choice=none_,work=aux) + if(info /=0) goto 9999 + + end select + + case(f_slu_) + + ww(1:n_row) = x(1:n_row) + + select case(trans) + case('N','n') + call fort_slu_solve(0,n_row,1,ww,n_row,prec%iprcparm(slu_ptr_),info) + case('T','t','C','c') + call fort_slu_solve(1,n_row,1,ww,n_row,prec%iprcparm(slu_ptr_),info) + end select + + if(info /=0) goto 9999 + + if (beta == 0.d0) then + y(1:n_row) = ww(1:n_row) + else if (beta==1.d0) then + y(1:n_row) = ww(1:n_row) + y(1:n_row) + else if (beta==-1.d0) then + y(1:n_row) = ww(1:n_row) - y(1:n_row) + else + y(1:n_row) = ww(1:n_row) + beta*y(1:n_row) + endif + + case default + write(0,*) 'Unknown factorization type in bjac_prcaply',prec%iprcparm(f_type_) + end select + if (debug) write(0,*)' Y: ',y(:) + + else if (prec%iprcparm(jac_sweeps_) > 1) then + + ! Note: we have to add TRANS to this one !!!!!!!!! + + if (size(prec%av) < ap_nd_) then + info = 4011 + goto 9999 + endif + + allocate(tx(n_col),ty(n_col)) + tx = zero + ty = zero + select case(prec%iprcparm(f_type_)) + case(f_ilu_n_,f_ilu_e_) + do i=1, prec%iprcparm(jac_sweeps_) + ! X(k+1) = M^-1*(b-N*X(k)) + ty(1:n_row) = x(1:n_row) + call psb_spmm(-one,prec%av(ap_nd_),tx,one,ty,& + & prec%desc_data,info,work=aux) + if(info /=0) goto 9999 + call psb_spsm(one,prec%av(l_pr_),ty,zero,ww,& + & prec%desc_data,info,& + & trans='N',unit='U',choice=none_,work=aux) + ww(1:n_row) = ww(1:n_row)*prec%d(1:n_row) + call psb_spsm(one,prec%av(u_pr_),ww,zero,tx,& + & prec%desc_data,info,& + & trans='N',unit='U',choice=none_,work=aux) + if(info /=0) goto 9999 + end do + + case(f_slu_) + do i=1, prec%iprcparm(jac_sweeps_) + ! X(k+1) = M^-1*(b-N*X(k)) + ty(1:n_row) = x(1:n_row) + call psb_spmm(-one,prec%av(ap_nd_),tx,one,ty,& + & prec%desc_data,info,work=aux) + if(info /=0) goto 9999 + + call fort_slu_solve(0,n_row,1,ty,n_row,prec%iprcparm(slu_ptr_),info) + if(info /=0) goto 9999 + tx(1:n_row) = ty(1:n_row) + end do + end select + + if (beta == 0.d0) then + y(1:n_row) = tx(1:n_row) + else if (beta==1.d0) then + y(1:n_row) = tx(1:n_row) + y(1:n_row) + else if (beta==-1.d0) then + y(1:n_row) = tx(1:n_row) - y(1:n_row) + else + y(1:n_row) = tx(1:n_row) + beta*y(1:n_row) + endif + + deallocate(tx,ty) + + + else + + goto 9999 + + endif + + if (n_col <= size(work)) then + if ((4*n_col+n_col) <= size(work)) then + else + deallocate(aux) + endif + else + deallocate(ww,aux) + endif + + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_errpush(info,name,i_err=int_err,a_err=ch_err) + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end subroutine psb_dbjacaply + + +subroutine psb_dmlprcaply(baseprecv,x,beta,y,desc_data,trans,work,info) + ! + ! Compute Y <- beta*Y + K^-1 X + ! where K is a multilevel (actually 2-level) preconditioner stored in prec + ! + + use psb_serial_mod + use psb_descriptor_type + use psb_prec_type + use psb_psblas_mod + use psb_blacs_mod + use psb_const_mod + use psb_error_mod + implicit none + + type(psb_desc_type),intent(in) :: desc_data + type(psb_dbase_prec), intent(in) :: baseprecv(:) + real(kind(0.d0)),intent(in) :: beta + real(kind(0.d0)),intent(inout) :: x(:), y(:) + character :: trans + real(kind(0.d0)),intent(inout),target :: work(:) + integer, intent(out) :: info + + + ! Local variables + integer :: n_row,n_col + real(kind(1.d0)), allocatable :: tx(:),ty(:),t2l(:),w2l(:),& + & x2l(:),b2l(:),tz(:),tty(:) + character ::diagl, diagu + integer :: icontxt,nprow,npcol,me,mycol,i, isz, nrg,nr2l,err_act, iptype + real(kind(1.d0)) :: omega + real(kind(1.d0)) :: t1, t2, t3, t4, t5, t6, t7, mpi_wtime + logical, parameter :: debug=.false., debugprt=.false. + real(kind(1.d0)), parameter :: one=1.d0, zero=0.d0 + integer :: ismth + external mpi_wtime + character(len=20) :: name, ch_err + name='psb_dmlprcaply' + info = 0 + call psb_erractionsave(err_act) + + + icontxt=desc_data%matrix_data(psb_ctxt_) + call blacs_gridinfo(icontxt,nprow,npcol,me,mycol) + + omega=baseprecv(2)%dprcparm(smooth_omega_) + ismth=baseprecv(2)%iprcparm(smth_kind_) + + select case(baseprecv(2)%iprcparm(ml_type_)) + case(no_ml_) + ! Should not really get here. + write(0,*) 'Smooth preconditioner with no multilevel in MLPRCAPLY????' + + case(add_ml_prec_) + + ! + ! Additive multilevel + ! + t1 = mpi_wtime() + n_row=desc_data%matrix_data(psb_n_row_) + n_col=desc_data%matrix_data(psb_n_col_) + call psb_baseprcaply(baseprecv(1),x,beta,y,desc_data,trans,work,info) + if(info /=0) goto 9999 + + nr2l = baseprecv(2)%desc_data%matrix_data(psb_n_col_) + nrg = baseprecv(2)%desc_data%matrix_data(psb_n_row_) + allocate(t2l(nr2l),w2l(nr2l)) + t2l(:) = zero + w2l(:) = zero + + if (ismth /= no_smth_) then + ! + ! Smoothed aggregation + ! + allocate(tx(max(n_row,n_col)),ty(max(n_row,n_col)),& + & tz(max(n_row,n_col))) + tx(1:desc_data%matrix_data(psb_n_row_)) = x(1:desc_data%matrix_data(psb_n_row_)) + tx(desc_data%matrix_data(psb_n_row_)+1:max(n_row,n_col)) = zero + ty(desc_data%matrix_data(psb_n_row_)+1:max(n_row,n_col)) = zero + tz(desc_data%matrix_data(psb_n_row_)+1:max(n_row,n_col)) = zero + + + if (baseprecv(2)%iprcparm(glb_smth_) >0) then + call psb_halo(tx,desc_data,info,work=work) + if(info /=0) goto 9999 + else + tx(desc_data%matrix_data(psb_n_row_)+1:max(n_row,n_col)) = zero + end if + + call psb_csmm(one,baseprecv(2)%av(sm_pr_t_),tx,zero,t2l,info) + if(info /=0) goto 9999 + + else + ! + ! Raw aggregation, may take shortcut + ! + do i=1,desc_data%matrix_data(psb_n_row_) + t2l(baseprecv(2)%mlia(i)) = t2l(baseprecv(2)%mlia(i)) + x(i) + end do + + end if + + if (baseprecv(2)%iprcparm(coarse_mat_)==mat_repl_) Then + call gsum2d(icontxt,'All',t2l(1:nrg)) + else if (baseprecv(2)%iprcparm(coarse_mat_) /= mat_distr_) Then + write(0,*) 'Unknown value for baseprecv(2)%iprcparm(coarse_mat_) ',& + & baseprecv(2)%iprcparm(coarse_mat_) + endif + + w2l=t2l + call psb_baseprcaply(baseprecv(2),w2l,zero,t2l,baseprecv(2)%desc_data,'N',work,info) + + + if (ismth /= no_smth_) then + + call psb_csmm(one,baseprecv(2)%av(sm_pr_),t2l,zero,ty,info) + if(info /=0) goto 9999 + ! + ! Finally add back into Y. + ! + call psb_axpby(one,ty,one,y,desc_data,info) + if(info /=0) goto 9999 + deallocate(tx,ty,tz) + + else + + do i=1, desc_data%matrix_data(psb_n_row_) + y(i) = y(i) + t2l(baseprecv(2)%mlia(i)) + enddo + + end if + + if (debug) write(0,*)' Y2: ',Y(:) + + deallocate(t2l,w2l) + + case(mult_ml_prec_) + + ! + ! Multiplicative multilevel + ! Pre/post smoothing versions. + + select case(baseprecv(2)%iprcparm(smth_pos_)) + + case(post_smooth_) + + + t1 = mpi_wtime() + n_row = desc_data%matrix_data(psb_n_row_) + n_col = desc_data%matrix_data(psb_n_col_) + nr2l = baseprecv(2)%desc_data%matrix_data(psb_n_col_) + nrg = baseprecv(2)%desc_data%matrix_data(psb_n_row_) + allocate(t2l(nr2l),w2l(nr2l),tx(n_col),ty(n_col)) + t2l(:) = zero + w2l(:) = zero + + ! + ! Need temp copies to handle Y<- betaY + K^-1 X + ! One of the temp copies is not strictly needed when beta==zero + ! + + if (debug) write(0,*)' mult_ml_apply omega ',omega + if (debug) write(0,*)' mult_ml_apply X: ',X(:) + call psb_axpby(one,x,zero,tx,desc_data,info) + if(info /=0) goto 9999 + + if (ismth /= no_smth_) then + ! + ! Smoothed aggregation + ! + allocate(tz(max(n_row,n_col))) + + if (baseprecv(2)%iprcparm(glb_smth_) >0) then + call psb_halo(tx,desc_data,info,work=work) + if(info /=0) goto 9999 + else + tx(desc_data%matrix_data(psb_n_row_)+1:max(n_row,n_col)) = zero + end if + + call psb_csmm(one,baseprecv(2)%av(sm_pr_t_),tx,zero,t2l,info) + if(info /=0) goto 9999 + + else + ! + ! Raw aggregation, may take shortcut + ! + do i=1,desc_data%matrix_data(psb_n_row_) + t2l(baseprecv(2)%mlia(i)) = t2l(baseprecv(2)%mlia(i)) + tx(i) + end do + end if + + if (baseprecv(2)%iprcparm(coarse_mat_)==mat_repl_) Then + call gsum2d(icontxt,'All',t2l(1:nrg)) + else if (baseprecv(2)%iprcparm(coarse_mat_) /= mat_distr_) Then + write(0,*) 'Unknown value for baseprecv(2)%iprcparm(coarse_mat_) ',& + & baseprecv(2)%iprcparm(coarse_mat_) + endif + + t6 = mpi_wtime() + w2l=t2l + call psb_baseprcaply(baseprecv(2),w2l,zero,t2l,baseprecv(2)%desc_data,'N',work,info) + if(info /=0) goto 9999 + + if (ismth /= no_smth_) then + if (ismth == smth_omg_) & + & call psb_halo(t2l,baseprecv(2)%desc_data,info,work=work) + call psb_csmm(one,baseprecv(2)%av(sm_pr_),t2l,zero,ty,info) + if(info /=0) goto 9999 + ! + ! Finally add back into Y. + ! + deallocate(tz) + else + ty(:) = zero + do i=1, desc_data%matrix_data(psb_n_row_) + ty(i) = ty(i) + t2l(baseprecv(2)%mlia(i)) + enddo + + end if + deallocate(t2l,w2l) + + call psb_spmm(-one,baseprecv(2)%aorig,ty,one,tx,desc_data,info,work=work) + if(info /=0) goto 9999 + + call psb_baseprcaply(baseprecv(1),tx,one,ty,desc_data,trans,work,info) + if(info /=0) goto 9999 + + call psb_axpby(one,ty,beta,y,desc_data,info) + if(info /=0) goto 9999 + + deallocate(tx,ty) + + + + case(pre_smooth_) + + t1 = mpi_wtime() + n_row=desc_data%matrix_data(psb_n_row_) + n_col=desc_data%matrix_data(psb_n_col_) + nr2l = baseprecv(2)%desc_data%matrix_data(psb_n_col_) + nrg = baseprecv(2)%desc_data%matrix_data(psb_n_row_) + allocate(t2l(nr2l),w2l(nr2l),tx(n_col),ty(n_col),tty(n_col)) + t2l(:) = zero + w2l(:) = zero + + ! + ! Need temp copies to handle Y<- betaY + K^-1 X + ! One of the temp copies is not strictly needed when beta==zero + ! + call psb_axpby(one,x,zero,tx,desc_data,info) + call psb_axpby(one,y,zero,ty,desc_data,info) + if(info /=0) goto 9999 + + call psb_baseprcaply(baseprecv(1),x,zero,tty,desc_data,trans,work,info) + if(info /=0) goto 9999 + + call psb_spmm(-one,baseprecv(2)%aorig,tty,one,tx,desc_data,info,work=work) + if(info /=0) goto 9999 + + if (ismth /= no_smth_) then + allocate(tz(max(n_row,n_col))) + + if (baseprecv(2)%iprcparm(glb_smth_) >0) then + call psb_halo(tx,desc_data,info,work=work) + if(info /=0) goto 9999 + else + tx(desc_data%matrix_data(psb_n_row_)+1:max(n_row,n_col)) = zero + end if + + call psb_csmm(one,baseprecv(2)%av(sm_pr_t_),tx,zero,t2l,info) + if(info /=0) goto 9999 + + else + ! + ! Raw aggregation, may take shortcuts + ! + do i=1,desc_data%matrix_data(psb_n_row_) + t2l(baseprecv(2)%mlia(i)) = t2l(baseprecv(2)%mlia(i)) + tx(i) + end do + end if + + if (baseprecv(2)%iprcparm(coarse_mat_)==mat_repl_) Then + call gsum2d(icontxt,'All',t2l(1:nrg)) + else if (baseprecv(2)%iprcparm(coarse_mat_) /= mat_distr_) Then + write(0,*) 'Unknown value for baseprecv(2)%iprcparm(coarse_mat_) ',& + & baseprecv(2)%iprcparm(coarse_mat_) + endif + + t6 = mpi_wtime() + w2l=t2l + call psb_baseprcaply(baseprecv(2),w2l,zero,t2l,baseprecv(2)%desc_data,'N',work,info) + if(info /=0) goto 9999 + + if (ismth /= no_smth_) then + + if (ismth == smth_omg_) & + & call psb_halo(t2l,baseprecv(2)%desc_data,info,work=work) + call psb_csmm(one,baseprecv(2)%av(sm_pr_),t2l,zero,ty,info) + if(info /=0) goto 9999 + + call psb_axpby(one,ty,one,tty,desc_data,info) + if(info /=0) goto 9999 + + deallocate(tz) + else + + do i=1, desc_data%matrix_data(psb_n_row_) + tty(i) = tty(i) + t2l(baseprecv(2)%mlia(i)) + enddo + + end if + + call psb_axpby(one,tty,beta,y,desc_data,info) + if(info /=0) goto 9999 + + deallocate(t2l,w2l,tx,ty,tty) + + + + + case default + + write(0,*) 'Unknown value for ml_smooth_pos',baseprecv(2)%iprcparm(smth_pos_) + + end select + + case default + write(0,*) me, 'Wrong mltype into PRCAPLY ',& + & baseprecv(2)%iprcparm(ml_type_) + end select + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_errpush(info,name) + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end subroutine psb_dmlprcaply + + +subroutine psb_dprec1(prec,x,desc_data,info,trans) + + use psb_serial_mod + use psb_descriptor_type + use psb_prec_type + use psb_psblas_mod + use psb_const_mod + use psb_error_mod + implicit none + + type(pab_desc_type),intent(in) :: desc_data + type(psb_dprec_type), intent(in) :: prec + real(kind(0.d0)),intent(inout) :: x(:) + integer, intent(out) :: info + character(len=1), optional :: trans + logical,parameter :: debug=.false., debugprt=.false. + real(kind(1.d0)), parameter :: one=1.d0, zero=0.d0 + + + ! Local variables + character :: trans_ + integer :: icontxt,nprow,npcol,me,mycol,i, isz, err_act + real(kind(1.d0)), pointer :: WW(:), w1(:) + character(len=20) :: name, ch_err + name='psb_dprec1' + info = 0 + call psb_erractionsave(err_act) + + + icontxt=desc_data%matrix_data(psb_ctxt_) + call blacs_gridinfo(icontxt,nprow,npcol,me,mycol) + if (present(trans)) then + trans_=trans + else + trans_='N' + end if + + allocate(ww(size(x)),w1(size(x))) + call psb_dprec(prec,x,ww,desc_data,info,trans_,w1) + if(info /=0) goto 9999 + x(:) = ww(:) + deallocate(ww,W1) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_errpush(info,name) + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return +end subroutine psb_dprec1 + diff --git a/src/prec/psb_dprecbld.f90 b/src/prec/psb_dprecbld.f90 new file mode 100644 index 00000000..581dbee1 --- /dev/null +++ b/src/prec/psb_dprecbld.f90 @@ -0,0 +1,539 @@ +subroutine psb_dprecbld(a,p,desc_a,info,upd) + + use psb_serial_mod + Use psb_spmat_type + use psb_descriptor_type + use psb_prec_type + use psb_const_mod + use psb_psblas_mod + Use psb_prec_mod + use psb_error_mod + Implicit None + + integer, intent(out) :: info + type(psb_dspmat_type), intent(in), target :: a + type(psb_dprec_type),intent(inout) :: p + type(psb_desc_type), intent(in) :: desc_a + character, intent(in), optional :: upd + + + ! Local scalars + Integer :: err, nnzero, n_row, n_col,I,j,icontxt,& + & me,mycol,nprow,npcol,mglob,lw, mtype, nrg, nzg, err_act + real(kind(1.d0)) :: temp, real_err(5) + real(kind(1.d0)),pointer :: gd(:), work(:) + integer :: int_err(5) + character :: iupd + + logical, parameter :: debug=.false. + integer,parameter :: iroot=0,iout=60,ilout=40 + character(len=20) :: name, ch_err + + info=0 + err=0 + call psb_erractionsave(err_act) + name = 'psb_precbld' + + if (debug) write(0,*) 'Entering precbld',P%prec,desc_a%matrix_data(:) + info = 0 + int_err(1) = 0 + icontxt = desc_a%matrix_data(CTXT_) + n_row = desc_a%matrix_data(psb_n_row_) + n_col = desc_a%matrix_data(psb_n_col_) + mglob = desc_a%matrix_data(m_) + if (debug) write(0,*) 'Preconditioner Blacs_gridinfo' + call blacs_gridinfo(icontxt, nprow, npcol, me, mycol) + + if (present(upd)) then + if (debug) write(0,*) 'UPD ', upd + if ((UPD.eq.'F').or.(UPD.eq.'T')) then + IUPD=UPD + else + IUPD='F' + endif + else + IUPD='F' + endif + + if (.not.associated(p%baseprecv)) then + !! Error 1: should call precset + end if + ! + ! Should add check to ensure all procs have the same... + ! + ! ALso should define symbolic names for the preconditioners. + ! + + call psb_check_def(p%baseprecv(1)%iprcparm(p_type_),'base_prec',diagsc_,is_legal_base_prec) + allocate(p%baseprecv(1)%desc_data) + call psb_nullify_desc(p%baseprecv(1)%desc_data) + + select case(p%baseprecv(1)%iprcparm(p_type_)) + case (NOPREC_) + ! Do nothing. + + + case (diagsc_) + + if (debug) write(0,*) 'Precond: Diagonal scaling' + ! diagonal scaling + + call psb_realloc(n_col,p%baseprecv(1)%d,info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='psb_realloc') + goto 9999 + end if + + call psb_csrws(p%baseprecv(1)%d,a,info,trans='N') + if(info /= 0) then + info=4010 + ch_err='psb_csrws' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if (debug) write(ilout+me,*) 'VDIAG ',n_row + do i=1,n_row + if (p%baseprecv(1)%d(i).eq.0.0d0) then + p%baseprecv(1)%d(i)=1.d0 + else + p%baseprecv(1)%d(i) = 1.d0/p%baseprecv(1)%d(i) + endif + + if (debug) write(ilout+me,*) i,desc_a%loc_to_glob(i),& + & p%baseprecv(1)%d(i) + if (p%baseprecv(1)%d(i).lt.0.d0) then + write(0,*) me,'Negative RWS? ',i,p%baseprecv(1)%d(i) + endif + end do + if (a%pl(1) /= 0) then + allocate(work(n_row),stat=info) + if (info /= 0) then + info=4000 + call psb_errpush(info,name) + goto 9999 + end if + call psb_dgelp('n',n_row,1,a%Pl,p%baseprecv(1)%d,n_col,WORK,n_row,info) + if(info /= 0) then + info=4010 + ch_err='psb_dgelp' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + deallocate(work) + endif + + if (debug) then + allocate(gd(mglob)) + call psb_dgatherm(gd, p%baseprecv(1)%d, desc_a, info, iroot=iroot) + if(info /= 0) then + info=4010 + ch_err='psb_dgatherm' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if (me.eq.iroot) then + write(iout+nprow,*) 'VDIAG CHECK ',mglob + do i=1,mglob + write(iout+nprow,*) i,gd(i) + enddo + endif + deallocate(gd) + endif + if (debug) write(*,*) 'Preconditioner DIAG computed OK' + + + case (bja_,asm_) + + call psb_check_def(p%baseprecv(1)%iprcparm(n_ovr_),'overlap',& + & 0,is_legal_n_ovr) + call psb_check_def(p%baseprecv(1)%iprcparm(restr_),'restriction',& + & halo_,is_legal_restrict) + call psb_check_def(p%baseprecv(1)%iprcparm(prol_),'prolongator',& + & none_,is_legal_prolong) + + if ((p%baseprecv(1)%iprcparm(iren_)<0).or.(p%baseprecv(1)%iprcparm(iren_)>2)) then + write(0,*) 'Bad PREC%IRENUM value, defaulting to 0', & + & p%baseprecv(1)%iprcparm(iren_) + p%baseprecv(1)%iprcparm(iren_) = 0 + endif + + if (debug) write(0,*)me, ': Calling PSB_DCSLU' + + + select case(p%baseprecv(1)%iprcparm(f_type_)) + + case(f_ilu_n_,f_ilu_e_) + call psb_dcslu(a,desc_a,p%baseprecv(1),iupd,info) + if(debug) write(0,*)me,': out of psb_dcslu' + if(info /= 0) then + info=4010 + ch_err='psb_dcslu' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + case(f_slu_) + p%baseprecv(1)%av => null() + if(debug) write(0,*)me,': calling splu_bld' + call psb_splu_bld(a,desc_a,p%baseprecv(1),info) + if(info /= 0) then + info=4010 + ch_err='splu_bld' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + case(f_none_) + write(0,*) 'Fact=None in PRECBLD Bja/ASM??' + + case default + write(0,*) 'Unknown factor type in precbld bja/asm: ',& + &p%baseprecv(1)%iprcparm(f_type_) + end select + + end select + + + if (size(p%baseprecv) >1) then + + if (.not.associated(p%baseprecv(2)%iprcparm)) then + info = 2222 + call psb_errpush(info,name) + goto 9999 + endif + call psb_check_def(p%baseprecv(2)%iprcparm(aggr_alg_),'aggregation',& + & loc_aggr_,is_legal_ml_aggr_kind) + call psb_check_def(p%baseprecv(2)%iprcparm(smth_kind_),'Smoother kind',& + & smth_omg_,is_legal_ml_smth_kind) + call psb_check_def(p%baseprecv(2)%iprcparm(coarse_mat_),'Coarse matrix',& + & mat_distr_,is_legal_ml_coarse_mat) + call psb_check_def(p%baseprecv(2)%iprcparm(smth_pos_),'smooth_pos',& + & pre_smooth_,is_legal_ml_smooth_pos) + call psb_check_def(p%baseprecv(2)%iprcparm(f_type_),'fact',f_ilu_n_,is_legal_ml_fact) + + allocate(p%baseprecv(2)%desc_data) + call psb_nullify_desc(p%baseprecv(2)%desc_data) + + select case(p%baseprecv(2)%iprcparm(f_type_)) + case(f_ilu_n_) + call psb_check_def(p%baseprecv(2)%iprcparm(ilu_fill_in_),'Level',0,is_legal_ml_lev) + case(f_ilu_e_) + call psb_check_def(p%baseprecv(2)%dprcparm(fact_eps_),'Eps',0.0d0,is_legal_ml_eps) + end select + call psb_check_def(p%baseprecv(2)%dprcparm(smooth_omega_),'omega',0.0d0,is_legal_omega) + call psb_check_def(p%baseprecv(2)%iprcparm(jac_sweeps_),'Jacobi sweeps',& + & 1,is_legal_jac_sweeps) + + call psb_mlprec_bld(a,desc_a,p%baseprecv(2),info) + if(info /= 0) then + info=4010 + ch_err='psb_mlprec_bld' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + endif + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end subroutine psb_dprecbld + + +subroutine psb_splu_bld(a,desc_a,p,info) + use psb_serial_mod + use psb_descriptor_type + use psb_prec_type + use psb_const_mod + implicit none + + type(psb_dspmat_type), intent(in) :: a + type(psb_desc_type), intent(in) :: desc_a + type(psb_dbase_prec), intent(inout) :: p + integer, intent(out) :: info + + + type(psb_dspmat_type) :: blck, atmp + character(len=5) :: fmt + character :: upd='F' + integer :: i,j,nza,nzb,nzt,icontxt, me,mycol,nprow,npcol,err_act + logical, parameter :: debug=.false. + character(len=20) :: name, ch_err + + info=0 + name='psb_splu_bld' + call psb_erractionsave(err_act) + + icontxt = desc_A%matrix_data(psb_ctxt_) + call blacs_gridinfo(icontxt, nprow, npcol, me, mycol) + + + fmt = 'COO' + call psb_nullify_sp(blck) + call psb_nullify_sp(atmp) + atmp%fida='COO' + if (Debug) then + write(0,*) me, 'SPLUBLD: Calling csdp' + call blacs_barrier(icontxt,'All') + endif + + call psb_dcsdp(a,atmp,info) + if(info /= 0) then + info=4010 + ch_err='psb_dcsdp' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + nza = atmp%infoa(nnz_) + if (Debug) then + write(0,*) me, 'SPLUBLD: Done csdp',info,nza,atmp%m,atmp%k + call blacs_barrier(icontxt,'All') + endif + call psb_csrsetup(p%iprcparm(p_type_),p%iprcparm(n_ovr_),a,& + & blck,desc_a,upd,p%desc_data,info,outfmt=fmt) + if(info /= 0) then + info=4010 + ch_err='psb_csrsetup' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + nzb = blck%infoa(nnz_) + if (Debug) then + write(0,*) me, 'SPLUBLD: Done csrsetup',info,nzb,blck%fida + call blacs_barrier(icontxt,'All') + endif + if (nzb > 0 ) then + if (size(atmp%aspk) a + allocate(p%av(smth_avsz)) + + do i=1, smth_avsz + call psb_nullify_sp(p%av(i)) + call psb_spall(0,0,p%av(i),1,info) + if(info /= 0) then + info=4010 + ch_err='psb_spall' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + end do + + ! Currently this is ignored by gen_aggrmap, but it could be + ! changed in the future. Need to package nlaggr & mlia in a + ! private data structure? + + call psb_gen_aggrmap(p%iprcparm(aggr_alg_),a,desc_a,p%nlaggr,p%mlia,info) + if(info /= 0) then + info=4010 + ch_err='psb_gen_aggrmap' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + call psb_bld_aggrmat(a,desc_a,p,info) + if(info /= 0) then + info=4010 + ch_err='psb_bld_aggrmat' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + nrg = p%av(ac_)%m + call psb_spinfo(nztotreq,p%av(ac_),nzg,info) + call psb_ipcoo2csr(p%av(ac_),info) + if(info /= 0) then + info=4011 + call psb_errpush(info,name) + goto 9999 + end if + + allocate(p%d(nrg)) + + select case(p%iprcparm(f_type_)) + case(f_ilu_n_,f_ilu_e_) + call psb_spreall(p%av(l_pr_),nzg,info) + call psb_spreall(p%av(u_pr_),nzg,info) + call psb_dsplu(p%av(ac_),p%av(l_pr_),p%av(u_pr_),p%d,info) + if(info /= 0) then + info=4011 + call psb_errpush(info,name) + goto 9999 + end if + + case(f_slu_) + call psb_spall(0,0,p%av(l_pr_),1,info) + call psb_spall(0,0,p%av(u_pr_),1,info) + call psb_ipcsr2coo(p%av(ac_),info) + if(info /= 0) then + info=4011 + call psb_errpush(info,name) + goto 9999 + end if + k=0 + do i=1,p%av(ac_)%infoa(nnz_) + if (p%av(ac_)%ia2(i) <= p%av(ac_)%m) then + k = k + 1 + p%av(ac_)%aspk(k) = p%av(ac_)%aspk(i) + p%av(ac_)%ia1(k) = p%av(ac_)%ia1(i) + p%av(ac_)%ia2(k) = p%av(ac_)%ia2(i) + end if + end do + p%av(ac_)%infoa(nnz_) = k + call psb_ipcoo2csr(p%av(ac_),info) + call psb_spinfo(nztotreq,p%av(ac_),nzg,info) + call fort_slu_factor(nrg,nzg,& + & p%av(ac_)%aspk,p%av(ac_)%ia2,p%av(ac_)%ia1,p%iprcparm(slu_ptr_),info) + if(info /= 0) then + info=4011 + call psb_errpush(info,name) + goto 9999 + end if + + case default + write(0,*) 'Invalid fact type for multi level',(p%iprcparm(f_type_)) + end select + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + Return + +end subroutine psb_mlprec_bld diff --git a/src/prec/psb_dprecfree.f90 b/src/prec/psb_dprecfree.f90 new file mode 100644 index 00000000..9260b95f --- /dev/null +++ b/src/prec/psb_dprecfree.f90 @@ -0,0 +1,69 @@ +subroutine psb_dprecfree(p,info) + !...free sparse matrix structure... + use psb_descriptor_type + use psb_serial_mod + use psb_const_mod + use psb_prec_type + use psb_tools_mod + use psb_error_mod + implicit none + !....parameters... + + type(psb_dprec_type), intent(inout) :: p + integer, intent(out) :: info + + !...locals.... + integer :: int_err(5) + integer :: temp(1), me + real(kind(1.d0)) :: real_err(5) + integer :: icontxt,err_act,i + integer,parameter :: ione=1 + character(len=20) :: name, ch_err + + info=0 + name = 'psdprecfree' + call psb_erractionsave(err_act) + + me=-1 + +!!$ if (associated(p%baseprec)) then +!!$ call base_precfree(p%baseprec,info) +!!$ if (info /= 0) then +!!$ info=4010 +!!$ ch_err='base_precfree' +!!$ call psb_errpush(info,name,a_err=ch_err) +!!$ goto 9999 +!!$ end if +!!$ deallocate(p%baseprec,stat=info) +!!$ nullify(p%baseprec) +!!$ endif +!!$ +!!$ if (associated(p%mlprec)) then +!!$ ! Check this !!!!! +!!$ call base_precfree(p%mlprec,info) +!!$ if (info /= 0) then +!!$ write(0,*) 'From Base_precfree',info +!!$ end if +!!$ deallocate(p%mlprec,stat=info) +!!$ nullify(p%mlprec) +!!$ endif + + if (associated(p%baseprecv)) then + do i=1,size(p%baseprecv) + call psb_baseprecfree(p%baseprecv(i),info) + end do + deallocate(p%baseprecv) + p%baseprecv => null() + end if + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end subroutine psb_dprecfree diff --git a/src/prec/psb_dprecset.f90 b/src/prec/psb_dprecset.f90 new file mode 100644 index 00000000..fa934ef4 --- /dev/null +++ b/src/prec/psb_dprecset.f90 @@ -0,0 +1,160 @@ +subroutine psb_dprecset(p,ptype,iv,rs,rv,info) + + use psb_serial_mod + use psb_descriptor_type + use psb_prec_type + implicit none + + type(psb_dprec_type), intent(inout) :: p + character(len=*), intent(in) :: ptype + integer, optional, intent(in) :: iv(:) + real(kind(1.d0)), optional, intent(in) :: rs + real(kind(1.d0)), optional, intent(in) :: rv(:) + integer, optional, intent(out) :: info + + type(psb_dbase_prec), pointer :: bpv(:)=>null() + character(len=len(ptype)) :: typeup + integer :: isz, err + + if (present(info)) info = 0 + + if (.not.associated(p%baseprecv)) then + allocate(p%baseprecv(1),stat=err) +!!$ if (associated(p%baseprec%iprcparm)) then +!!$ write(0,*) 'precset: should not get here!' +!!$ endif + call psb_nullify_baseprec(p%baseprecv(1)) + endif + + if (.not.associated(p%baseprecv(1)%iprcparm)) then + allocate(p%baseprecv(1)%iprcparm(ifpsz),stat=err) + if (err/=0) then + write(0,*)'Precset Memory Failure',err + endif + end if + + call touppers(ptype,typeup) + select case (typeup) +!!$ select case(toupper(ptype)) + case ('NONE','NOPREC') + p%baseprecv(1)%iprcparm(p_type_) = noprec_ + p%baseprecv(1)%iprcparm(f_type_) = f_none_ + p%baseprecv(1)%iprcparm(restr_) = none_ + p%baseprecv(1)%iprcparm(prol_) = none_ + p%baseprecv(1)%iprcparm(iren_) = 0 + p%baseprecv(1)%iprcparm(n_ovr_) = 0 + p%baseprecv(1)%iprcparm(jac_sweeps_) = 1 + + case ('DIAG','DIAGSC') + p%baseprecv(1)%iprcparm(p_type_) = diagsc_ + p%baseprecv(1)%iprcparm(f_type_) = f_none_ + p%baseprecv(1)%iprcparm(restr_) = none_ + p%baseprecv(1)%iprcparm(prol_) = none_ + p%baseprecv(1)%iprcparm(iren_) = 0 + p%baseprecv(1)%iprcparm(n_ovr_) = 0 + p%baseprecv(1)%iprcparm(jac_sweeps_) = 1 + + case ('BJA','ILU') + p%baseprecv(1)%iprcparm(p_type_) = bja_ + p%baseprecv(1)%iprcparm(f_type_) = f_ilu_n_ + p%baseprecv(1)%iprcparm(restr_) = none_ + p%baseprecv(1)%iprcparm(prol_) = none_ + p%baseprecv(1)%iprcparm(iren_) = 0 + p%baseprecv(1)%iprcparm(n_ovr_) = 0 + p%baseprecv(1)%iprcparm(ilu_fill_in_) = 0 + p%baseprecv(1)%iprcparm(jac_sweeps_) = 1 + + case ('ASM','AS') + ! Defaults first + p%baseprecv(1)%iprcparm(p_type_) = asm_ + p%baseprecv(1)%iprcparm(f_type_) = f_ilu_n_ + p%baseprecv(1)%iprcparm(restr_) = halo_ + p%baseprecv(1)%iprcparm(prol_) = none_ + p%baseprecv(1)%iprcparm(iren_) = 0 + p%baseprecv(1)%iprcparm(n_ovr_) = 1 + p%baseprecv(1)%iprcparm(ilu_fill_in_) = 0 + p%baseprecv(1)%iprcparm(jac_sweeps_) = 1 + if (present(iv)) then + isz = size(iv) + if (isz >= 1) p%baseprecv(1)%iprcparm(n_ovr_) = iv(1) + if (isz >= 2) p%baseprecv(1)%iprcparm(restr_) = iv(2) + if (isz >= 3) p%baseprecv(1)%iprcparm(prol_) = iv(3) + if (isz >= 4) p%baseprecv(1)%iprcparm(f_type_) = iv(4) + ! Do not consider renum for the time being. +!!$ if (isz >= 5) p%baseprecv(1)%iprcparm(iren_) = iv(5) + end if + + + case ('ML', '2LEV') + + select case (size(p%baseprecv)) + case(1) + ! Reallocate + allocate(bpv(2)) + bpv(1) = p%baseprecv(1) + call psb_nullify_baseprec(bpv(2)) + deallocate(p%baseprecv) + p%baseprecv => bpv + nullify(bpv) + + case(2) + ! Do nothing + + case default + ! Error + + end select + + allocate(p%baseprecv(2)%iprcparm(ifpsz),stat=err) + if (err/=0) then + write(0,*)'Precset Memory Failure 2l:2',err + endif + allocate(p%baseprecv(2)%dprcparm(dfpsz),stat=err) + if (err/=0) then + write(0,*)'Precset Memory Failure 2l:3',err + endif + + + + p%baseprecv(2)%iprcparm(p_type_) = bja_ + p%baseprecv(2)%iprcparm(ml_type_) = mult_ml_prec_ + p%baseprecv(2)%iprcparm(aggr_alg_) = loc_aggr_ + p%baseprecv(2)%iprcparm(smth_kind_) = smth_omg_ + p%baseprecv(2)%iprcparm(coarse_mat_) = mat_distr_ + p%baseprecv(2)%iprcparm(smth_pos_) = post_smooth_ + p%baseprecv(2)%iprcparm(glb_smth_) = 1 + p%baseprecv(2)%iprcparm(om_choice_) = lib_choice_ + p%baseprecv(2)%iprcparm(f_type_) = f_ilu_n_ + p%baseprecv(2)%iprcparm(ilu_fill_in_) = 0 + p%baseprecv(2)%dprcparm(smooth_omega_) = 4.d0/3.d0 + p%baseprecv(2)%iprcparm(jac_sweeps_) = 1 + + + if (present(iv)) then + isz = size(iv) + if (isz >= 1) p%baseprecv(2)%iprcparm(ml_type_) = iv(1) + if (isz >= 2) p%baseprecv(2)%iprcparm(aggr_alg_) = iv(2) + if (isz >= 3) p%baseprecv(2)%iprcparm(smth_kind_) = iv(3) + if (isz >= 4) p%baseprecv(2)%iprcparm(coarse_mat_) = iv(4) + if (isz >= 5) p%baseprecv(2)%iprcparm(smth_pos_) = iv(5) + if (isz >= 6) p%baseprecv(2)%iprcparm(glb_smth_) = iv(6) + if (isz >= 7) p%baseprecv(2)%iprcparm(f_type_) = iv(7) + if (isz >= 8) p%baseprecv(2)%iprcparm(jac_sweeps_) = iv(8) + + end if + + if (present(rs)) then + p%baseprecv(2)%iprcparm(om_choice_) = user_choice_ + p%baseprecv(2)%dprcparm(smooth_omega_) = rs + end if + + + case default + write(0,*) 'Unknown preconditioner type request "',ptype,'"' + err = 2 + + end select + + if (present(info)) info = err + +end subroutine psb_dprecset diff --git a/src/prec/psb_dsplu.f90 b/src/prec/psb_dsplu.f90 new file mode 100644 index 00000000..561d7ca7 --- /dev/null +++ b/src/prec/psb_dsplu.f90 @@ -0,0 +1,432 @@ +subroutine psb_dsplu(a,l,u,d,info,blck) + + ! + ! This routine copies and factors "on the fly" from A and BLCK + ! into L/D/U. + ! + ! + + use psb_serial_mod + use psb_tools_mod + use psb_error_mod + implicit none + ! .. Scalar Arguments .. + integer, intent(out) :: info + ! .. Array Arguments .. + type(psb_dspmat_type),intent(in) :: a + type(psb_dspmat_type),intent(inout) :: l,u + type(psb_dspmat_type),intent(in), optional, target :: blck + real(kind(1.d0)), intent(inout) :: d(:) + ! .. Local Scalars .. + real(kind(1.d0)) :: dia, temp + integer :: i, j, jj, k, kk, l1, l2, ll, low1, low2,m,ma,err_act + real(kind(1.d0)), parameter :: epstol=1.d-12 + + type(d_spmat), pointer :: blck_ + character(len=20) :: name, ch_err + name='psb_dcsrlu' + info = 0 + call psb_erractionsave(err_act) + ! .. Executable Statements .. + ! + + if (present(blck)) then + blck_ => blck + else + allocate(blck_) + call psb_nullify_sp(blck_) ! Why do we need this? Who knows.... + call psb_spall(0,0,blck_,1,info) + if(info.ne.0) then + info=4010 + ch_err='psb_spall' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + blck_%m=0 + endif + + call psb_dspluint(m,a%m,a,blck_%m,blck_,& + & d,l%aspk,l%ia1,l%ia2,u%aspk,u%ia1,u%ia2,l1,l2,info) + if(info.ne.0) then + info=4010 + ch_err='psb_dspluint' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + l%infoa(1) = l1 + l%fida = 'CSR' + l%descra = 'TLU' + u%infoa(1) = l2 + u%fida = 'CSR' + u%descra = 'TUU' + l%m = m + l%k = m + u%m = m + u%k = m + if (present(blck)) then + blck_ => null() + else + call psb_spfree(blck_,info) + if(info.ne.0) then + info=4010 + ch_err='psb_spfree' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + deallocate(blck_) + endif + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +contains + subroutine psb_dspluint(m,ma,a,mb,b,& + & d,laspk,lia1,lia2,uaspk,uia1,uia2,l1,l2,info) + implicit none + + type(psb_dspmat_type) :: a,b + integer :: m,ma,mb,l1,l2,info + integer, dimension(*) :: lia1,lia2,uia1,uia2 + real(kind(1.d0)), dimension(*) :: laspk,uaspk,d + + integer :: i,j,k,l,low1,low2,kk,jj,ll, irb, ktrw,err_act + real(kind(1.d0)) :: dia,temp + real(kind(1.d0)), parameter :: epstol=1.d-12 + integer, parameter :: nrb=16 + logical,parameter :: debug=.false. + type(d_spmat) :: trw + character(len=20) :: name, ch_err + + name='psb_dspluint' + info=0 + call psb_erractionsave(err_act) + + trw%m=0 + trw%k=0 + if(debug) write(0,*)'LUINT Allocating TRW' + call psb_spall(trw,1,info) + if(info.ne.0) then + info=4010 + ch_err='psb_spall' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if(debug) write(0,*)'LUINT Done Allocating TRW' + lia2(1) = 1 + uia2(1) = 1 + l1=0 + l2=0 + m = ma+mb + if(debug) write(0,*)'In DCSRLU Begin cycle',m,ma,mb + + do i = 1, ma + if(debug) write(0,*)'LUINT: Loop index ',i,ma + d(i) = 0.d0 + + ! + ! Here we take a fast shortcut if possible, otherwise + ! use spgtrow, slower but able (in principle) to handle + ! anything. + ! + if (a%fida=='CSR') then + do j = a%ia2(i), a%ia2(i+1) - 1 + k = a%ia1(j) + ! write(0,*)'KKKKK',k + if ((k < i).and.(k >= 1)) then + l1 = l1 + 1 + laspk(l1) = a%aspk(j) + lia1(l1) = k + else if (k == i) then + d(i) = a%aspk(j) + else if ((k > i).and.(k <= m)) then + l2 = l2 + 1 + uaspk(l2) = a%aspk(j) + uia1(l2) = k + end if + enddo + + else + + if ((mod(i,nrb) == 1).or.(nrb==1)) then + irb = min(ma-i+1,nrb) + call psb_spgtrow(i,a,trw,info,lrw=i+irb-1) + if(info.ne.0) then + info=4010 + ch_err='psb_spgtrow' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + ktrw=1 + end if + + do + if (ktrw > trw%infoa(nnz_)) exit + if (trw%ia1(ktrw) > i) exit + k = trw%ia2(ktrw) + ! write(0,*)'KKKKK',k + if ((k < i).and.(k >= 1)) then + l1 = l1 + 1 + laspk(l1) = trw%aspk(ktrw) + lia1(l1) = k + else if (k == i) then + d(i) = trw%aspk(ktrw) + else if ((k > i).and.(k <= m)) then + l2 = l2 + 1 + uaspk(l2) = trw%aspk(ktrw) + uia1(l2) = k + end if + ktrw = ktrw + 1 + enddo + + end if + +!!$ + + lia2(i+1) = l1 + 1 + uia2(i+1) = l2 + 1 + + dia = d(i) + do kk = lia2(i), lia2(i+1) - 1 + ! + ! compute element alo(i,k) of incomplete factorization + ! + temp = laspk(kk) + k = lia1(kk) + laspk(kk) = temp*d(k) + ! update the rest of row i using alo(i,k) + low1 = kk + 1 + low2 = uia2(i) + updateloop: do jj = uia2(k), uia2(k+1) - 1 + j = uia1(jj) + ! + if (j < i) then + ! search alo(i,*) for matching index J + do ll = low1, lia2(i+1) - 1 + l = lia1(ll) + if (l > j) then + low1 = ll + exit + else if (l == j) then + laspk(ll) = laspk(ll) - temp*uaspk(jj) + low1 = ll + 1 + cycle updateloop + end if + enddo + ! + else if (j == i) then + ! j=i update diagonal + ! write(0,*)'aggiorno dia',dia,'temp',temp,'jj',jj,'u%aspk',uaspk(jj) + dia = dia - temp*uaspk(jj) + ! write(0,*)'dia',dia,'temp',temp,'jj',jj,'aspk',uaspk(jj) + cycle updateloop + ! + else if (j > i) then + ! search aup(i,*) for matching index j + do ll = low2, uia2(i+1) - 1 + l = uia1(ll) + if (l > j) then + low2 = ll + exit + else if (l == j) then + uaspk(ll) = uaspk(ll) - temp*uaspk(jj) + low2 = ll + 1 + cycle updateloop + end if + enddo + end if + ! + ! for milu al=1.; for ilu al=0. + ! al = 1.d0 + ! dia = dia - al*temp*aup(jj) + enddo updateloop + enddo + ! + ! + ! Non singularity + ! + if (dabs(dia) < epstol) then + ! + ! Pivot too small: unstable factorization + ! + info = 2 + call psb_errpush(info,name) + goto 9999 + else + dia = 1.d0/dia + end if + d(i) = dia + ! write(6,*)'diag(',i,')=',d(i) + ! Scale row i of upper triangle + do kk = uia2(i), uia2(i+1) - 1 + uaspk(kk) = uaspk(kk)*dia + enddo + enddo + + do i = ma+1, m + d(i) = 0.d0 + + + if (b%fida=='CSR') then + + do j = b%ia2(i-ma), b%ia2(i-ma+1) - 1 + k = b%ia1(j) + ! if (me.eq.2) write(0,*)'ecco k=',k + if ((k < i).and.(k >= 1)) then + l1 = l1 + 1 + laspk(l1) = b%aspk(j) + lia1(l1) = k + ! if(me.eq.2) write(0,*)'scrivo l' + else if (k == i) then + d(i) = b%aspk(j) + else if ((k > i).and.(k <= m)) then + l2 = l2 + 1 + uaspk(l2) = b%aspk(j) + ! write(0,*)'KKKKK',k + uia1(l2) = k + end if + enddo + + else + + if ((mod((i-ma),nrb) == 1).or.(nrb==1)) then + irb = min(m-i+1,nrb) + call psb_spgtrow(i-ma,b,trw,info,lrw=i-ma+irb-1) + if(info.ne.0) then + info=4010 + ch_err='psb_spgtrow' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + ktrw=1 + end if + + do + if (ktrw > trw%infoa(nnz_)) exit + if (trw%ia1(ktrw) > i) exit + k = trw%ia2(ktrw) + ! write(0,*)'KKKKK',k + if ((k < i).and.(k >= 1)) then + l1 = l1 + 1 + laspk(l1) = trw%aspk(ktrw) + lia1(l1) = k + else if (k == i) then + d(i) = trw%aspk(ktrw) + else if ((k > i).and.(k <= m)) then + l2 = l2 + 1 + uaspk(l2) = trw%aspk(ktrw) + uia1(l2) = k + end if + ktrw = ktrw + 1 + enddo + + endif + + + lia2(i+1) = l1 + 1 + uia2(i+1) = l2 + 1 + + dia = d(i) + do kk = lia2(i), lia2(i+1) - 1 + ! + ! compute element alo(i,k) of incomplete factorization + ! + temp = laspk(kk) + k = lia1(kk) + laspk(kk) = temp*d(k) + ! update the rest of row i using alo(i,k) + low1 = kk + 1 + low2 = uia2(i) + updateloopb: do jj = uia2(k), uia2(k+1) - 1 + j = uia1(jj) + ! + if (j < i) then + ! search alo(i,*) for matching index J + do ll = low1, lia2(i+1) - 1 + l = lia1(ll) + if (l > j) then + low1 = ll + exit + else if (l == j) then + laspk(ll) = laspk(ll) - temp*uaspk(jj) + low1 = ll + 1 + cycle updateloopb + end if + enddo + ! + else if (j == i) then + ! j=i update diagonal + dia = dia - temp*uaspk(jj) + cycle updateloopb + ! + else if (j > i) then + ! search aup(i,*) for matching index j + do ll = low2, uia2(i+1) - 1 + l = uia1(ll) + if (l > j) then + low2 = ll + exit + else if (l == j) then + uaspk(ll) = uaspk(ll) - temp*uaspk(jj) + low2 = ll + 1 + cycle updateloopb + end if + enddo + end if + ! + ! for milu al=1.; for ilu al=0. + ! al = 1.d0 + ! dia = dia - al*temp*aup(jj) + enddo updateloopb + enddo + ! + ! + ! Non singularity + ! + if (dabs(dia) < epstol) then + ! + ! Pivot too small: unstable factorization + ! + info = 2 + call psb_errpush(info,name) + goto 9999 + else + dia = 1.d0/dia + end if + d(i) = dia + ! Scale row i of upper triangle + do kk = uia2(i), uia2(i+1) - 1 + uaspk(kk) = uaspk(kk)*dia + enddo + enddo + + call psb_spfree(trw,info) + if(info.ne.0) then + info=4010 + ch_err='psb_spfree' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if(debug) write(0,*)'Leaving dcsrlu' + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return +end subroutine psb_dspluint +end subroutine psb_dsplu diff --git a/src/psblas/Makefile b/src/psblas/Makefile new file mode 100644 index 00000000..57781188 --- /dev/null +++ b/src/psblas/Makefile @@ -0,0 +1,26 @@ +include ../../Make.inc + +#FCOPT=-O2 +F90_PSDOBJS= psb_ddot.o psb_damax.o psb_dasum.o psb_daxpby.o\ + psb_dnrm2.o psb_dnrmi.o psb_dspmm.o psb_dspsm.o\ + +LIBDIR=../../lib +HERE=. +LIBNAME=$(LIBDIR)/$(F90LIB) + + +INCDIRS=-I. -I.. -I$(LIBDIR) + + +lib: $(F90_PSDOBJS) + (cd INTERNALS; make lib LIBDIR=../$(LIBDIR) LIBNAME=$(LIBNAME)) + ar -cur $(LIBNAME) $(F90_PSDOBJS) + ranlib $(LIBNAME) + +#$(F90_PSDOBJS): $(MODS) + +veryclean: clean + /bin/rm -f $(LIBNAME) + +clean: + /bin/rm -f $(F90_PSDOBJS) $(LOCAL_MODS) diff --git a/src/psblas/notes b/src/psblas/notes new file mode 100644 index 00000000..0d5db57e --- /dev/null +++ b/src/psblas/notes @@ -0,0 +1,16 @@ +1 + nella f90_psdspmm si se work non è presente si alloca work1 + con dimensione pari a: + + llwork= 2*desc_a%matrix_data(psb_n_col_) + if (a%pr(1) /= 0) llwork = llwork + in * ik + if (a%pl(1) /= 0) llwork = llwork + im * ik + + però, poi, in psdspmm_.c si controlla semplicemente che la + dimensione di work si maggiore di desc_a%matrix_data(psb_n_row_). + perchè allocare un'area così grande? + +2 + nella psdspmm_.c nella parte in cui la matrice è trasposta si + usa il puntatore xcopy e si passa desc_as%matrix_data(psb_n_row_) + anzichè lldx: perchè? diff --git a/src/psblas/pdtreecomb.f b/src/psblas/pdtreecomb.f new file mode 100644 index 00000000..12e7e879 --- /dev/null +++ b/src/psblas/pdtreecomb.f @@ -0,0 +1,360 @@ +C +C This file imported from ScaLAPACK. +C +C + SUBROUTINE PDTREECOMB( ICTXT, SCOPE, N, MINE, RDEST0, CDEST0, + $ SUBPTR ) +* +* -- ScaLAPACK tools routine (version 1.0) -- +* University of Tennessee, Knoxville, Oak Ridge National Laboratory, +* and University of California, Berkeley. +* February 28, 1995 +* +* .. Scalar Arguments .. + CHARACTER SCOPE + INTEGER CDEST0, ICTXT, N, RDEST0 +* .. +* .. Array Arguments .. + DOUBLE PRECISION MINE( * ) +* .. +* .. Subroutine Arguments .. + EXTERNAL SUBPTR +* .. +* +* Purpose +* ======= +* +* PDTREECOMB does a 1-tree parallel combine operation on scalars, +* using the subroutine indicated by SUBPTR to perform the required +* computation. +* +* Arguments +* ========= +* +* ICTXT (global input) INTEGER +* The BLACS context handle, indicating the global context of +* the operation. The context itself is global. +* +* SCOPE (global input) CHARACTER +* The scope of the operation: 'Rowwise', 'Columnwise', or +* 'All'. +* +* N (global input) INTEGER +* The number of elements in MINE. N = 1 for the norm-2 +* computation and 2 for the sum of square. +* +* MINE (local input/global output) DOUBLE PRECISION array of +* dimension at least equal to N. The local data to use in the +* combine. +* +* RDEST0 (global input) INTEGER +* The process row to receive the answer. If RDEST0 = -1, +* every process in the scope gets the answer. +* +* CDEST0 (global input) INTEGER +* The process column to receive the answer. If CDEST0 = -1, +* every process in the scope gets the answer. +* +* SUBPTR (local input) Pointer to the subroutine to call to perform +* the required combine. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL BCAST, RSCOPE, CSCOPE + INTEGER CMSSG, DEST, DIST, HISDIST, I, IAM, MYCOL, + $ MYROW, MYDIST, MYDIST2, NP, NPCOL, NPROW, + $ RMSSG, TCDEST, TRDEST +* .. +* .. Local Arrays .. + DOUBLE PRECISION HIS( 2 ) +* .. +* .. External Subroutines .. + EXTERNAL BLACS_GRIDINFO, DGEBR2D, DGEBS2D, + $ DGERV2D, DGESD2D +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. +* .. Executable Statements .. +* +* See if everyone wants the answer (need to broadcast the answer) +* + BCAST = ( ( RDEST0.EQ.-1 ).OR.( CDEST0.EQ.-1 ) ) + IF( BCAST ) THEN + TRDEST = 0 + TCDEST = 0 + ELSE + TRDEST = RDEST0 + TCDEST = CDEST0 + END IF +* +* Get grid parameters. +* + CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL ) +* +* Figure scope-dependant variables, or report illegal scope +* + RSCOPE = LSAME( SCOPE, 'R' ) + CSCOPE = LSAME( SCOPE, 'C' ) +* + IF( RSCOPE ) THEN + IF( BCAST ) THEN + TRDEST = MYROW + ELSE IF( MYROW.NE.TRDEST ) THEN + RETURN + END IF + NP = NPCOL + MYDIST = MOD( NPCOL + MYCOL - TCDEST, NPCOL ) + ELSE IF( CSCOPE ) THEN + IF( BCAST ) THEN + TCDEST = MYCOL + ELSE IF( MYCOL.NE.TCDEST ) THEN + RETURN + END IF + NP = NPROW + MYDIST = MOD( NPROW + MYROW - TRDEST, NPROW ) + ELSE IF( LSAME( SCOPE, 'A' ) ) THEN + NP = NPROW * NPCOL + IAM = MYROW*NPCOL + MYCOL + DEST = TRDEST*NPCOL + TCDEST + MYDIST = MOD( NP + IAM - DEST, NP ) + ELSE + RETURN + END IF +* + IF( NP.LT.2 ) + $ RETURN +* + MYDIST2 = MYDIST + RMSSG = MYROW + CMSSG = MYCOL + I = 1 +* + 10 CONTINUE +* + IF( MOD( MYDIST, 2 ).NE.0 ) THEN +* +* If I am process that sends information +* + DIST = I * ( MYDIST - MOD( MYDIST, 2 ) ) +* +* Figure coordinates of dest of message +* + IF( RSCOPE ) THEN + CMSSG = MOD( TCDEST + DIST, NP ) + ELSE IF( CSCOPE ) THEN + RMSSG = MOD( TRDEST + DIST, NP ) + ELSE + CMSSG = MOD( DEST + DIST, NP ) + RMSSG = CMSSG / NPCOL + CMSSG = MOD( CMSSG, NPCOL ) + END IF +* + CALL DGESD2D( ICTXT, N, 1, MINE, N, RMSSG, CMSSG ) +* + GO TO 20 +* + ELSE +* +* If I am a process receiving information, figure coordinates +* of source of message +* + DIST = MYDIST2 + I + IF( RSCOPE ) THEN + CMSSG = MOD( TCDEST + DIST, NP ) + HISDIST = MOD( NP + CMSSG - TCDEST, NP ) + ELSE IF( CSCOPE ) THEN + RMSSG = MOD( TRDEST + DIST, NP ) + HISDIST = MOD( NP + RMSSG - TRDEST, NP ) + ELSE + CMSSG = MOD( DEST + DIST, NP ) + RMSSG = CMSSG / NPCOL + CMSSG = MOD( CMSSG, NPCOL ) + HISDIST = MOD( NP + RMSSG*NPCOL+CMSSG - DEST, NP ) + END IF +* + IF( MYDIST2.LT.HISDIST ) THEN +* +* If I have anyone sending to me +* + CALL DGERV2D( ICTXT, N, 1, HIS, N, RMSSG, CMSSG ) + CALL SUBPTR( MINE, HIS ) +* + END IF + MYDIST = MYDIST / 2 +* + END IF + I = I * 2 +* + IF( I.LT.NP ) + $ GO TO 10 +* + 20 CONTINUE +* + IF( BCAST ) THEN + IF( MYDIST2.EQ.0 ) THEN + CALL DGEBS2D( ICTXT, SCOPE, ' ', N, 1, MINE, N ) + ELSE + CALL DGEBR2D( ICTXT, SCOPE, ' ', N, 1, MINE, N, + $ TRDEST, TCDEST ) + END IF + END IF +* + RETURN +* +* End of PDTREECOMB +* + END +* + SUBROUTINE DCOMBAMAX( V1, V2 ) +* +* -- ScaLAPACK tools routine (version 1.0) -- +* University of Tennessee, Knoxville, Oak Ridge National Laboratory, +* and University of California, Berkeley. +* February 28, 1995 +* +* .. Array Arguments .. + DOUBLE PRECISION V1( 2 ), V2( 2 ) +* .. +* +* Purpose +* ======= +* +* DCOMBAMAX finds the element having max. absolute value as well +* as its corresponding globl index. +* +* Arguments +* ========= +* +* V1 (local input/local output) DOUBLE PRECISION array of +* dimension 2. The first maximum absolute value element and +* its global index. V1(1) = AMAX, V1(2) = INDX. +* +* V2 (local input) DOUBLE PRECISION array of dimension 2. +* The second maximum absolute value element and its global +* index. V2(1) = AMAX, V2(2) = INDX. +* +* ===================================================================== +* +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* + IF( ABS( V1( 1 ) ).LT.ABS( V2( 1 ) ) ) THEN + V1( 1 ) = V2( 1 ) + V1( 2 ) = V2( 2 ) + END IF +* + RETURN +* +* End of DCOMBAMAX +* + END +* + SUBROUTINE DCOMBSSQ( V1, V2 ) +* +* -- ScaLAPACK tools routine (version 1.0) -- +* University of Tennessee, Knoxville, Oak Ridge National Laboratory, +* and University of California, Berkeley. +* February 28, 1995 +* +* .. Array Arguments .. + DOUBLE PRECISION V1( 2 ), V2( 2 ) +* .. +* +* Purpose +* ======= +* +* DCOMBSSQ does a scaled sum of squares on two scalars. +* +* Arguments +* ========= +* +* V1 (local input/local output) DOUBLE PRECISION array of +* dimension 2. The first scaled sum. V1(1) = SCALE, +* V1(2) = SUMSQ. +* +* V2 (local input) DOUBLE PRECISION array of dimension 2. +* The second scaled sum. V2(1) = SCALE, V2(2) = SUMSQ. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Executable Statements .. +* + IF( V1( 1 ).GE.V2( 1 ) ) THEN + IF( V1( 1 ).NE.ZERO ) + $ V1( 2 ) = V1( 2 ) + ( V2( 1 ) / V1( 1 ) )**2 * V2( 2 ) + ELSE + V1( 2 ) = V2( 2 ) + ( V1( 1 ) / V2( 1 ) )**2 * V1( 2 ) + V1( 1 ) = V2( 1 ) + END IF +* + RETURN +* +* End of DCOMBSSQ +* + END +* + SUBROUTINE DCOMBNRM2( X, Y ) +* +* -- ScaLAPACK tools routine (version 1.0) -- +* University of Tennessee, Knoxville, Oak Ridge National Laboratory, +* and University of California, Berkeley. +* February 28, 1995 +* +* .. Scalar Arguments .. + DOUBLE PRECISION X, Y +* .. +* +* Purpose +* ======= +* +* DCOMBNRM2 combines local norm 2 results, taking care not to cause +* unnecessary overflow. +* +* Arguments +* ========= +* +* X (local input) DOUBLE PRECISION +* Y (local input) DOUBLE PRECISION +* X and Y specify the values x and y. X and Y are supposed to +* be >= 0. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION W, Z +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + W = MAX( X, Y ) + Z = MIN( X, Y ) +* + IF( Z.EQ.ZERO ) THEN + X = W + ELSE + X = W*SQRT( ONE+( Z / W )**2 ) + END IF +* + RETURN +* +* End of DCOMBNRM2 +* + END diff --git a/src/psblas/psb_chkglobvect.f90 b/src/psblas/psb_chkglobvect.f90 new file mode 100644 index 00000000..74a750a0 --- /dev/null +++ b/src/psblas/psb_chkglobvect.f90 @@ -0,0 +1,117 @@ +! File: psb_chkglobvect.f90 +! +! Subroutine: psb_chkglobvect +! psb_chkglobvect checks the validity of a descriptor vector desc_dec, the +! related global indexes ix, jx and the leading dimension lldx. +! If an inconsistency is found among its parameters ix, jx, +! descdec and lldx, the routine returns an error code in info. +! +! Parameters: +! m - integer. The number of rows of the dense matrix X being operated on. +! n - integer. The number of columns of the dense matrix X being operated on. +! lldx - integer. The leading dimension of the local dense matrix X. +! ix - integer. X's global row index, which points to the beginning +! of the dense submatrix which is to be operated on. +! jx - integer. X's global column index, which points to the beginning +! of the dense submatrix which is to be operated on. +! desc_dec - integer,dimension(:). Is the matrix_data array. +! info - integer. Eventually returns an error code. +! +subroutine psb_chkglobvect( m, n, lldx, ix, jx, desc_dec, info) + + use psb_error_mod + implicit none + + integer, intent(in) :: m,n,ix,jx,lldx + integer, intent(in) :: desc_dec(:) + integer, intent(out) :: info + + ! locals + integer :: err_act, int_err(5) + character(len=20) :: name, ch_err + + info=0 + name='psb_chkglobvect' + call psb_erractionsave(err_act) + + + if (m.lt.0) then + info=10 + int_err(1) = 1 + int_err(2) = m + else if (n.lt.0) then + info=10 + int_err(1) = 3 + int_err(2) = n + else if ((ix.lt.1) .and. (m.ne.0)) then + info=20 + int_err(1) = 4 + int_err(2) = ix + else if ((jx.lt.1) .and. (n.ne.0)) then + info=20 + int_err(1) = 5 + int_err(2) = jx + else if (desc_dec(psb_n_col_).lt.0) then + info=40 + int_err(1) = 6 + int_err(2) = psb_n_col_ + int_err(3) = desc_dec(psb_n_col_) + else if (desc_dec(psb_n_row_).lt.0) then + info=40 + int_err(1) = 6 + int_err(2) = psb_n_row_ + int_err(3) = desc_dec(psb_n_row_) + else if (lldx.lt.desc_dec(m_)) then + info=50 + int_err(1) = 3 + int_err(2) = lldx + int_err(3) = 6 + int_err(4) = psb_n_col_ + int_err(5) = desc_dec(psb_n_col_) + else if (desc_dec(n_).lt.m) then + info=60 + int_err(1) = 1 + int_err(2) = m + int_err(3) = 6 + int_err(4) = n_ + int_err(5) = desc_dec(n_) + else if (desc_dec(n_).lt.ix) then + info=60 + int_err(1) = 4 + int_err(2) = ix + int_err(3) = 6 + int_err(4) = n_ + int_err(5) = desc_dec(n_) + else if (desc_dec(m_).lt.jx) then + info=60 + int_err(1) = 5 + int_err(2) = jx + int_err(3) = 6 + int_err(4) = m_ + int_err(5) = desc_dec(m_) + else if (desc_dec(n_).lt.(ix+m-1)) then + info=80 + int_err(1) = 1 + int_err(2) = m + int_err(3) = 4 + int_err(4) = ix + end if + + if (info.ne.0) then + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end subroutine psb_chkvect diff --git a/src/psblas/psb_chkmat.f90 b/src/psblas/psb_chkmat.f90 new file mode 100644 index 00000000..b0a1bb4a --- /dev/null +++ b/src/psblas/psb_chkmat.f90 @@ -0,0 +1,138 @@ +! File: psb_chkmat.f90 +! File: psb_chkmat.f90 +! +! Subroutine: psb_chkmat +! pbmatvect checks the validity of a descriptor vector DESCDEC, the +! related global indexes IA, JA. It also computes the starting local +! indexes (IIA,JJA) corresponding to the submatrix starting globally at +! the entry pointed by (IA,JA). Finally, if an inconsitency is found among +! its parameters ia, ja and desc_A, the routine returns an error code in +! info. +! +! Parameters: +! m - integer. The number of rows of the matrix being operated on. +! n - integer. The number of columns of the matrix being operated on. +! ia - integer. a's global row index, which points to the beginning +! of the submatrix which is to be operated on. +! ja - integer. a's global column index, which points to the beginning +! of the submatrix which is to be operated on. +! desc_dec - integer,dimension(:). Is the matrix_data array. +! info - integer. Eventually returns an error code. +! iia - integer(optional). The local rows starting index of the submatrix. +! jja - integer(optional). The local columns starting index of the submatrix. +! +subroutine psb_chkmat( m, n, ia, ja, desc_dec, info, iia, jja) + + use psb_error_mod + implicit none + + integer, intent(in) :: m,n,ia,ja + integer, intent(in) :: desc_dec(:) + integer, intent(out) :: info + integer, optional :: iia, jja + + ! locals + integer :: err_act, int_err(5) + character(len=20) :: name, ch_err + + info=0 + name='psb_chkmat' + call psb_erractionsave(err_act) + + if (m.lt.0) then + info=10 + int_err(1) = 1 + int_err(2) = m + else if (n.lt.0) then + info=10 + int_err(1) = 3 + int_err(2) = n + else if ((ix.lt.1) .and. (m.ne.0)) then + info=20 + int_err(1) = 4 + int_err(2) = ix + else if ((jx.lt.1) .and. (n.ne.0)) then + info=20 + int_err(1) = 5 + int_err(2) = jx + else if (desc_dec(psb_n_col_).lt.0) then + info=40 + int_err(1) = 6 + int_err(2) = psb_n_col_ + int_err(3) = desc_dec(psb_n_col_) + else if (desc_dec(psb_n_row_).lt.0) then + info=40 + int_err(1) = 6 + int_err(2) = psb_n_row_ + int_err(3) = desc_dec(psb_n_row_) + else if (desc_dec(m_).lt.m) then + info=60 + int_err(1) = 1 + int_err(2) = m + int_err(3) = 5 + int_err(4) = m_ + int_err(5) = desc_dec(m_) + else if (desc_dec(m_).lt.m) then + info=60 + int_err(1) = 2 + int_err(2) = n + int_err(3) = 5 + int_err(4) = n_ + int_err(5) = desc_dec(n_) + else if (desc_dec(m_).lt.ia) then + info=60 + int_err(1) = 3 + int_err(2) = ia + int_err(3) = 5 + int_err(4) = m_ + int_err(5) = desc_dec(m_) + else if (desc_dec(n_).lt.ja) then + info=60 + int_err(1) = 4 + int_err(2) = ja + int_err(3) = 5 + int_err(4) = n_ + int_err(5) = desc_dec(n_) + else if (desc_dec(m_).lt.(ia+m-1)) then + info=80 + int_err(1) = 1 + int_err(2) = m + int_err(3) = 3 + int_err(4) = ia + else if (desc_dec(n_).lt.(ja+n-1)) then + info=80 + int_err(1) = 2 + int_err(2) = n + int_err(3) = 4 + int_err(4) = ja + end if + + if (info.ne.0) then + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + ! Compute local indices for submatrix starting + ! at global indices ix and jx + if(present(iia).and.present(jja)) then + if (desc_dec(psb_n_row_).gt.0) then + iia=1 + jja=1 + else + iia=desc_dec(psb_n_row_)+1 + jja=desc_dec(psb_n_col_)+1 + end if + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return +end subroutine psb_chkmat diff --git a/src/psblas/psb_chkvect.f90 b/src/psblas/psb_chkvect.f90 new file mode 100644 index 00000000..8e934bd9 --- /dev/null +++ b/src/psblas/psb_chkvect.f90 @@ -0,0 +1,126 @@ +! File: psb_chkvect.f90 +! +! Subroutine: psb_chkvect +! psb_chkvect checks the validity of a descriptor vector desc_dec, the +! related global indexes ix, jx and the leading dimension lldx. It also +! eventually computes the starting local indexes (iix,jjx) corresponding +! to the submatrix starting globally at the entry pointed by (ix,jx). +! Finally, if an inconsistency is found among its parameters ix, jx, +! descdec and lldx, the routine returns an error code in info. +! +! Parameters: +! m - integer. The number of rows of the dense matrix X being operated on. +! n - integer. The number of columns of the dense matrix X being operated on. +! lldx - integer. The leading dimension of the local dense matrix X. +! ix - integer. X's global row index, which points to the beginning +! of the dense submatrix which is to be operated on. +! jx - integer. X's global column index, which points to the beginning +! of the dense submatrix which is to be operated on. +! desc_dec - integer,dimension(:). Is the matrix_data array. +! info - integer. Eventually returns an error code. +! iix - integer(optional). The local rows starting index of the submatrix. +! jjx - integer(optional). The local columns starting index of the submatrix. +subroutine psb_chkvect( m, n, lldx, ix, jx, desc_dec, info, iix, jjx) + + use psb_error_mod + implicit none + + integer, intent(in) :: m,n,ix,jx,lldx + integer, intent(in) :: desc_dec(:) + integer, intent(out) :: info + integer, optional :: iix, jjx + + ! locals + integer :: err_act, int_err(5) + character(len=20) :: name, ch_err + + info=0 + name='psb_chkvect' + call psb_erractionsave(err_act) + + + if (m.lt.0) then + info=10 + int_err(1) = 1 + int_err(2) = m + else if (n.lt.0) then + info=10 + int_err(1) = 3 + int_err(2) = n + else if ((ix.lt.1) .and. (m.ne.0)) then + info=20 + int_err(1) = 4 + int_err(2) = ix + else if ((jx.lt.1) .and. (n.ne.0)) then + info=20 + int_err(1) = 5 + int_err(2) = jx + else if (desc_dec(psb_n_col_).lt.0) then + info=40 + int_err(1) = 6 + int_err(2) = psb_n_col_ + int_err(3) = desc_dec(psb_n_col_) + else if (desc_dec(psb_n_row_).lt.0) then + info=40 + int_err(1) = 6 + int_err(2) = psb_n_row_ + int_err(3) = desc_dec(psb_n_row_) + else if (lldx.lt.desc_dec(psb_n_col_)) then + info=50 + int_err(1) = 3 + int_err(2) = lldx + int_err(3) = 6 + int_err(4) = psb_n_col_ + int_err(5) = desc_dec(psb_n_col_) + else if (desc_dec(n_).lt.m) then + info=60 + int_err(1) = 1 + int_err(2) = m + int_err(3) = 6 + int_err(4) = n_ + int_err(5) = desc_dec(n_) + else if (desc_dec(n_).lt.ix) then + info=60 + int_err(1) = 4 + int_err(2) = ix + int_err(3) = 6 + int_err(4) = n_ + int_err(5) = desc_dec(n_) + else if (desc_dec(m_).lt.jx) then + info=60 + int_err(1) = 5 + int_err(2) = jx + int_err(3) = 6 + int_err(4) = m_ + int_err(5) = desc_dec(m_) + else if (desc_dec(n_).lt.(ix+m-1)) then + info=80 + int_err(1) = 1 + int_err(2) = m + int_err(3) = 4 + int_err(4) = ix + end if + + if (info.ne.0) then + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + ! Compute local indices for submatrix starting + ! at global indices ix and jx + if(present(iix)) iix=ix ! (for our applications iix=ix)) + if(present(jjx)) iix=ix ! (for our applications jjx=jx)) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end subroutine psb_chkvect diff --git a/src/psblas/psb_damax.f90 b/src/psblas/psb_damax.f90 new file mode 100644 index 00000000..d2821b2b --- /dev/null +++ b/src/psblas/psb_damax.f90 @@ -0,0 +1,415 @@ +! File: psb_damax.f90 +! +! Function: psb_damax +! Searches the absolute max of X. +! +! normi := max(abs(sub(X)(i)) +! +! where sub( X ) denotes X(1:N,JX:). +! +! Parameters: +! x - real,dimension(:,:). The input vector. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! jx - integer(optional). The column offset. +! +function psb_damax (x,desc_a, info, jx) + + use psb_serial_mod + use psb_descriptor_type + use psb_error_mod + implicit none + + real(kind(1.d0)), intent(in) :: x(:,:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + integer, optional, intent(in) :: jx + real(kind(1.d0)) :: psb_damax + + ! locals + integer :: int_err(5), icontxt, nprow, npcol, me, mycol,& + & err_act, n, iix, jjx, temp(2) + real(kind(1.d0)) :: locmax(2), amax + real(kind(1.d0)),pointer :: tmpx(:) + character(len=20) :: name, ch_err + + name='psb_damax' + info=0 + call psb_erractionsave(err_act) + + locmax(:)=0.d0 + amax=0.d0 + + icontxt=desc_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + ix = 1 + if (present(jx)) then + ijx = jx + else + ijx = 1 + endif + + m = desc_data(m_) + + call psb_chkvect(m,1,size(x,1),ix,ijx,desc_data%matrix_data,info,iix,jjx) + if(info.ne.0) then + info=4010 + ch_err='psb_chkvect' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if (iix.ne.1) then + info=3040 + call psb_errpush(info,name) + goto 9999 + end if + + ! compute local max + if ((desc_data(psb_n_row_).gt.0).and.(m.ne.0)) then + tmpx => x(iix:,jjx) + imax=idamax(desc_data(n_row),tmpx,1) + amax=abs(tmpx(imax)) + end if + + ! compute global max + call dgamx2d(icontxt, 'A', ' ', ione, ione, amax, ione,& + &temp ,temp,-ione ,-ione,-ione) + + psb_damax=amax + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return +end function psb_damax + + + + +! Function: psb_damaxv +! Searches the absolute max of X. +! +! normi := max(abs(X(i)) +! +! Parameters: +! x - real,dimension(:). The input vector. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! +function psb_damaxv (x,desc_a, info) + use psb_serial_mod + use psb_descriptor_type + use psb_error_mod + implicit none + + real(kind(1.d0)), intent(in) :: x(:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + real(kind(1.d0)) :: psb_damax + + ! locals + integer :: int_err(5), icontxt, nprow, npcol, me, mycol,& + & err_act, n, iix, jjx, ix, jx, temp(2) + real(kind(1.d0)) :: locmax(2), amax + real(kind(1.d0)),pointer :: tmpx(:) + character(len=20) :: name, ch_err + + name='psb_damaxv' + info=0 + call psb_erractionsave(err_act) + + locmax(:)=0.d0 + amax=0.d0 + + icontxt=desc_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + ix = 1 + jx = 1 + + m = desc_data(m_) + + call psb_chkvect(m,1,size(x,1),ix,jx,desc_data%matrix_data,info,iix,jjx) + if(info.ne.0) then + info=4010 + ch_err='psb_chkvect' + call psb_errpush(info,name,a_err=ch_err) + end if + + if (iix.ne.1) then + info=3040 + call psb_errpush(info,name) + end if + + err=info + call psb_errcomm(icontxt,err) + if(err.ne.0) goto 9999 + + ! compute local max + if ((desc_data(psb_n_row_).gt.0).and.(m.ne.0)) then + tmpx => x(iix:,jjx) + imax=idamax(desc_data(n_row),tmpx,1) + amax=abs(tmpx(imax)) + end if + + ! compute global max + call dgamx2d(icontxt, 'A', ' ', ione, ione, amax, ione,& + &temp ,temp,-ione ,-ione,-ione) + + psb_damaxv=amax + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return +end function psb_damaxv + + +! Subroutine: psb_damaxvs +! Searches the absolute max of X. +! +! normi := max(abs(sub(X)(i)) +! +! where sub( X ) denotes X(1:N,JX:). +! +! Parameters: +! res - real. The result. +! x - real,dimension(:,:). The input vector. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! jx - integer(optional). The column offset. +! +subroutine psb_damaxvs (res,x,desc_a, info, jx) + use psb_serial_mod + use psb_descriptor_type + use psb_error_mod + implicit none + + real(kind(1.d0)), intent(in) :: x(:,:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + integer, optional, intent(in) :: jx + real(kind(1.D0)), intent(out) :: res + real(kind(1.d0)) :: psb_damax + + ! locals + integer :: int_err(5), icontxt, nprow, npcol, me, mycol,& + & err_act, n, iix, jjx, temp(2) + real(kind(1.d0)) :: locmax(2), amax + real(kind(1.d0)),pointer :: tmpx(:) + character(len=20) :: name, ch_err + + name='psb_damaxvs' + info=0 + call psb_erractionsave(err_act) + + locmax(:)=0.d0 + amax=0.d0 + + icontxt=desc_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + ix = 1 + if (present(jx)) then + ijx = jx + else + ijx = 1 + endif + + m = desc_data(m_) + + call psb_chkvect(m,1,size(x,1),ix,ijx,desc_data%matrix_data,info,iix,jjx) + if(info.ne.0) then + info=4010 + ch_err='psb_chkvect' + call psb_errpush(info,name,a_err=ch_err) + end if + + if (iix.ne.1) then + info=3040 + call psb_errpush(info,name) + end if + + err=info + call psb_errcomm(icontxt,err) + if(err.ne.0) goto 9999 + + ! compute local max + if ((desc_data(psb_n_row_).gt.0).and.(m.ne.0)) then + tmpx => x(iix:,jjx) + imax=idamax(desc_data(n_row),tmpx,1) + amax=abs(tmpx(imax)) + end if + + ! compute global max + call dgamx2d(icontxt, 'A', ' ', ione, ione, amax, ione,& + &temp ,temp,-ione ,-ione,-ione) + + res = amax + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return +end subroutine psb_damaxvs + + + + +! Subroutine: psb_dmamaxs +! Searches the absolute max of X. +! +! normi := max(abs(X(i)) +! +! Parameters: +! res - real. The result. +! x - real,dimension(:). The input vector. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! +subroutine psb_dmamaxs (res,x,desc_a, info) + use psb_serial_mod + use psb_descriptor_type + use psb_error_mod + implicit none + + real(kind(1.d0)), intent(in) :: x(:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + real(kind(1.d0)), intent(out) :: res(:) + + ! locals + integer :: int_err(5), icontxt, nprow, npcol, me, mycol,& + & err_act, n, iix, jjx, ix, jx, temp(2) + real(kind(1.d0)) :: locmax(2) + real(kind(1.d0)),pointer :: tmpx(:) + character(len=20) :: name, ch_err + + name='psb_dmamaxs' + info=0 + call psb_erractionsave(err_act) + + locmax(:)=0.d0 + amax=0.d0 + + icontxt=desc_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + ix = 1 + jx = 1 + + m = desc_data(m_) + k = min(size(x,2),size(res,1)) + + call psb_chkvect(m,1,size(x,1),ix,jx,desc_data%matrix_data,info,iix,jjx) + if(info.ne.0) then + info=4010 + ch_err='psb_chkvect' + call psb_errpush(info,name,a_err=ch_err) + end if + + if (iix.ne.1) then + info=3040 + call psb_errpush(info,name) + end if + + err=info + call psb_errcomm(icontxt,err) + if(err.ne.0) goto 9999 + + ! compute local max + if ((desc_data(psb_n_row_).gt.0).and.(m.ne.0)) then + do i=1,k + tmpx => x(iix:,i) + imax=idamax(desc_data(n_row),tmpx,1) + res(i)=abs(tmpx(imax)) + end do + end if + + ! compute global max + call dgamx2d(icontxt, 'A', ' ', ione, ione, amax, ione,& + &temp ,temp,-ione ,-ione,-ione) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return +end subroutine psb_dmamaxs diff --git a/src/psblas/psb_dasum.f90 b/src/psblas/psb_dasum.f90 new file mode 100644 index 00000000..645dce06 --- /dev/null +++ b/src/psblas/psb_dasum.f90 @@ -0,0 +1,368 @@ +! File: psb_dasum.f90 +! +! Function: psb_dasum +! Computes norm1 of X +! +! norm1 := sum(sub( X )(i)) +! +! where sub( X ) denotes X(1:N,JX:). +! +! Parameters: +! x - real,dimension(:,:). The input vector. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! jx - integer(optional). The column offset. +! +function psb_dasum (x,desc_a, info, jx) + + use psb_serial_mod + use psb_descriptor_type + use psb_error_mod + implicit none + + real(kind(1.d0)), intent(in) :: x(:,:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + integer, optional, intent(in) :: jx + real(kind(1.d0)) :: psb_dasum + + ! locals + integer :: int_err(5), icontxt, nprow, npcol, me, mycol,& + & err_act, n, iix, jjx, temp(2) + real(kind(1.d0)) :: asum + real(kind(1.d0)),pointer :: tmpx(:) + character(len=20) :: name, ch_err + + name='psb_dasum' + info=0 + call psb_erractionsave(err_act) + + asum=0.d0 + + icontxt=desc_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + ix = 1 + if (present(jx)) then + ijx = jx + else + ijx = 1 + endif + + m = desc_data(m_) + + ! check vector correctness + call psb_chkvect(m,1,size(x,1),ix,ijx,desc_data%matrix_data,info,iix,jjx) + if(info.ne.0) then + info=4010 + ch_err='psb_chkvect' + call psb_errpush(info,name,a_err=ch_err) + end if + + if (iix.ne.1) then + info=3040 + call psb_errpush(info,name) + end if + + err=info + call psb_errcomm(icontxt,err) + if(err.ne.0) goto 9999 + + ! compute local max + if ((m.ne.0)) then + if(desc_data(psb_n_row_).gt.0) then + tmpx => x(iix:,jjx) + asum=dasum(desc_data(n_row),tmpx,ione) + + ! adjust asum because overlapped elements are computed more than once + i=1 + do while (desc_a%ovrlap_elem(i).ne.-ione) + asum = asum -& + & (desc_a%ovrlap_elem(i+1)-1)/desc_a%ovrlap_elem(i+1)*& + & tmpx(desc_a%ovrlap_elem(i)) + i = i+2 + end do + + ! compute global sum + call dgsum2d(icontxt, 'A', ' ', ione, ione, asum,& + & ione, mone ,mycol) + + else + asum=0.d0 + ! compute global sum + call dgsum2d(icontxt, 'A', ' ', ione, ione, asum,& + & ione, mone ,mycol) + end if + else + asum=0.d0 + end if + + + psb_dasum=asum + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return +end function psb_dasum + + +! Function: psb_dasumv +! Computes norm1 of X +! +! norm1 := sum(X(i)) +! +! Parameters: +! x - real,dimension(:). The input vector. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! +function psb_dasumv (x,desc_a, info) + + use psb_serial_mod + use psb_descriptor_type + use psb_error_mod + implicit none + + real(kind(1.d0)), intent(in) :: x(:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + real(kind(1.d0)) :: psb_dasumv + + ! locals + integer :: int_err(5), icontxt, nprow, npcol, me, mycol,& + & err_act, n, iix, jjx, temp(2) + real(kind(1.d0)) :: asum + real(kind(1.d0)),pointer :: tmpx(:) + character(len=20) :: name, ch_err + + name='psb_dasumv' + info=0 + call psb_erractionsave(err_act) + + locmax(:)=0.d0 + asum=0.d0 + + icontxt=desc_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + ix = 1 + jx = 1 + + m = desc_data(m_) + + ! check vector correctness + call psb_chkvect(m,1,size(x),ix,jx,desc_data%matrix_data,info,iix,jjx) + if(info.ne.0) then + info=4010 + ch_err='psb_chkvect' + call psb_errpush(info,name,a_err=ch_err) + end if + + if (iix.ne.1) then + info=3040 + call psb_errpush(info,name) + end if + + err=info + call psb_errcomm(icontxt,err) + if(err.ne.0) goto 9999 + + ! compute local max + if ((m.ne.0)) then + if(desc_data(psb_n_row_).gt.0) then + tmpx => x(:) + asum=dasum(desc_data(n_row),tmpx,ione) + + ! adjust asum because overlapped elements are computed more than once + i=1 + do while (desc_a%ovrlap_elem(i).ne.-ione) + asum = asum -& + & (desc_a%ovrlap_elem(i+1)-1)/desc_a%ovrlap_elem(i+1)*& + & tmpx(desc_a%ovrlap_elem(i)) + i = i+2 + end do + + ! compute global sum + call dgsum2d(icontxt, 'A', ' ', ione, ione, asum,& + & ione, mone ,mycol) + + else + asum=0.d0 + ! compute global sum + call dgsum2d(icontxt, 'A', ' ', ione, ione, asum,& + & ione, mone ,mycol) + end if + else + asum=0.d0 + end if + + + psb_dasumv=asum + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return +end function psb_dasum + + +! Subroutine: psb_dasum vs +! Computes norm1 of X +! +! norm1 := sum(X(i)) +! +! Parameters: +! res - real. The result. +! x - real,dimension(:). The input vector. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! jx - integer(optional). The column offset. +! +subroutine psb_dasumvs (res,x,desc_a, info) + use psb_serial_mod + use psb_descriptor_type + use psb_error_mod + implicit none + + real(kind(1.d0)), intent(in) :: x(:) + real(kind(1.d0)), intent(out) :: res + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + + ! locals + integer :: int_err(5), icontxt, nprow, npcol, me, mycol,& + & err_act, n, iix, jjx, temp(2) + real(kind(1.d0)) :: asum + real(kind(1.d0)),pointer :: tmpx(:) + character(len=20) :: name, ch_err + + name='psb_dasumvs' + info=0 + call psb_erractionsave(err_act) + + locmax(:)=0.d0 + asum=0.d0 + + icontxt=desc_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + ix = 1 + jx = 1 + + m = desc_data(m_) + + ! check vector correctness + call psb_chkvect(m,1,size(x),ix,jx,desc_data%matrix_data,info,iix,jjx) + if(info.ne.0) then + info=4010 + ch_err='psb_chkvect' + call psb_errpush(info,name,a_err=ch_err) + end if + + if (iix.ne.1) then + info=3040 + call psb_errpush(info,name) + end if + + err=info + call psb_errcomm(icontxt,err) + if(err.ne.0) goto 9999 + + ! compute local max + if ((m.ne.0)) then + if(desc_data(psb_n_row_).gt.0) then + tmpx => x(:) + asum=dasum(desc_data(n_row),tmpx,ione) + + ! adjust asum because overlapped elements are computed more than once + i=1 + do while (desc_a%ovrlap_elem(i).ne.-ione) + asum = asum -& + & (desc_a%ovrlap_elem(i+1)-1)/desc_a%ovrlap_elem(i+1)*& + & tmpx(desc_a%ovrlap_elem(i)) + i = i+2 + end do + + ! compute global sum + call dgsum2d(icontxt, 'A', ' ', ione, ione, asum,& + & ione, mone ,mycol) + + else + asum=0.d0 + ! compute global sum + call dgsum2d(icontxt, 'A', ' ', ione, ione, asum,& + & ione, mone ,mycol) + end if + else + asum=0.d0 + end if + + + res = asum + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return +end subroutine psb_dasumvs + + + diff --git a/src/psblas/psb_daxpby.f90 b/src/psblas/psb_daxpby.f90 new file mode 100644 index 00000000..3eeba1bf --- /dev/null +++ b/src/psblas/psb_daxpby.f90 @@ -0,0 +1,220 @@ +! File: psb_daxpby.f90 +! +! Subroutine: psb_daxpby +! Adds one distributed matrix to another, +! +! sub( Y ) := beta * sub( Y ) + alpha * sub( X ) +! +! where sub( X ) denotes X(:,JX) +! +! sub( Y ) denotes Y(:,JY). +! +! Parameters: +! alpha - real. The scalar used to multiply each component of sub( X ). +! x - real,dimension(:,:). The input vector containing the entries of sub( X ). +! beta - real. The scalar used to multiply each component of sub( Y ). +! y - real,dimension(:,:). The input vector containing the entries of sub( Y ). +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! jx - integer(optional). The column offset for sub( X ). +! jy - integer(optional). The column offset for sub( Y ). +! +subroutine psb_daxpby(alpha, x, beta,y,desc_a,info, n, jx, jy) + use psb_descriptor_type + use psb_error_mod + implicit none + + integer, intent(in), optional :: n, jx, jy + integer, intent(out) :: info + type(psb_desc_type), intent(in) :: desc_a + real(kind(1.D0)), intent(in) :: alpha, beta + real(kind(1.D0)), intent(in) :: x(:,:) + real(kind(1.D0)), intent(inout) :: y(:,:) + + ! locals + integer :: int_err(5), icontxt, nprow, npcol, me, mycol,& + & err_act, n, iix, jjx, temp(2) + real(kind(1.d0)),pointer :: tmpx(:) + character(len=20) :: name, ch_err + + name='psb_daxpby' + info=0 + call psb_erractionsave(err_act) + + icontxt=desc_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow == -ione) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= ione) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + ix = ione + if (present(jx)) then + ijx = jx + else + ijx = ione + endif + + iy = ione + if (present(jy)) then + ijy = jy + else + ijy = ione + endif + + if (present(n)) then + if(((ijx+n).le.size(x,2)).and.& + & ((ijy+n).le.size(y,2))) then + in = n + else + in = min(size(x,2),size(y,2)) + end if + else + in = min(size(x,2),size(y,2)) + endif + + if(ijx.ne.ijy) then + info=3050 + call psb_errpush(info,name) + goto 9999 + end if + + m = desc_data(m_) + + ! check vector correctness + call psb_chkvect(m,ione,size(x,1),ix,ijx,desc_data%matrix_data,info,iix,jjx) + call psb_chkvect(m,ione,size(y,1),iy,ijy,desc_data%matrix_data,info,iiy,jjy) + if(info.ne.0) then + info=4010 + ch_err='psb_chkvect' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if ((iix.ne.ione).or.(iiy.ne.ione)) then + info=3040 + call psb_errpush(info,name) + goto 9999 + end if + + if ((in.ne.0)) then + if(desc_data(psb_n_row_).gt.0) then + call daxpby(desc_a%matrix_data(psb_n_col_),in,& + & alpha,x(iix,jjx),size(x,1),beta,& + & y(iiy,jjy),size(y,1),info) + end if + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return +end subroutine psb_daxpby + + + + + +! +! Subroutine: psb_daxpbyv +! Adds one distributed matrix to another, +! +! Y := beta * Y + alpha * X +! +! Parameters: +! alpha - real. The scalar used to multiply each component of X. +! x - real,dimension(:). The input vector containing the entries of X. +! beta - real. The scalar used to multiply each component of Y. +! y - real,dimension(:). The input vector containing the entries of Y. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! +subroutine psb_psdaxpbyv(alpha, x, beta,y,desc_a,info) + use psb_descriptor_type + use psb_error_mod + implicit none + + integer, intent(out) :: info + type(psb_desc_type), intent(in) :: desc_a + real(kind(1.D0)), intent(in) :: alpha, beta + real(kind(1.D0)), intent(in) :: x(:) + real(kind(1.D0)), intent(inout) :: y(:) + + ! locals + integer :: int_err(5), icontxt, nprow, npcol, me, mycol,& + & err_act, n, iix, jjx, temp(2) + character(len=20) :: name, ch_err + + name='psb_daxpby' + info=0 + call psb_erractionsave(err_act) + + icontxt=desc_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow == -ione) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= ione) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + ix = ione + iy = ione + + m = desc_data(m_) + + ! check vector correctness + call psb_chkvect(m,ione,size(x),ix,ione,desc_data%matrix_data,info,iix,jjx) + call psb_chkvect(m,ione,size(y),iy,ione,desc_data%matrix_data,info,iiy,jjy) + if(info.ne.0) then + info=4010 + ch_err='psb_chkvect' + call psb_errpush(info,name,a_err=ch_err) + end if + + if ((iix.ne.ione).or.(iiy.ne.ione)) then + info=3040 + call psb_errpush(info,name) + end if + + if ((in.ne.0)) then + if(desc_data(psb_n_row_).gt.0) then + call daxpby(desc_a%matrix_data(psb_n_col_),ione,& + & alpha,x,size(x),beta,& + & y,size(y),info) + end if + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return +end subroutine psb_psdaxpbyv diff --git a/src/psblas/psb_ddot.f90 b/src/psblas/psb_ddot.f90 new file mode 100644 index 00000000..28e26f62 --- /dev/null +++ b/src/psblas/psb_ddot.f90 @@ -0,0 +1,470 @@ +! File: psb_ddot.f90 +! +! Function: psb_ddot +! psb_ddot forms the dot product of two distributed vectors, +! +! dot := sub( X )**T * sub( Y ) +! +! where sub( X ) denotes X(:,JX) +! +! sub( Y ) denotes Y(:,JY). +! +! Parameters: +! x - real,dimension(:,:). The input vector containing the entries of sub( X ). +! y - real,dimension(:,:). The input vector containing the entries of sub( Y ). +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! jx - integer(optional). The column offset for sub( X ). +! jy - integer(optional). The column offset for sub( Y ). +! +function psb_ddot(x, y,desc_a, info, jx, jy) + use psb_descriptor_type + use psb_error_mod + implicit none + + real(kind(1.d0)), intent(in) :: x(:,:), y(:,:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(in), optional :: jx, jy + integer, intent(out) :: info + real(kind(1.D0)) :: f90_psddot + + ! locals + integer :: int_err(5), icontxt, nprow, npcol, me, mycol,& + & err_act, n, iix, jjx, temp(2) + real(kind(1.d0)),pointer :: tmpx(:) + real(kind(1.D0)) :: dot_local + character(len=20) :: name, ch_err + + name='psb_ddot' + info=0 + call psb_erractionsave(err_act) + + icontxt=desc_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow == -ione) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= ione) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + ix = ione + if (present(jx)) then + ijx = jx + else + ijx = ione + endif + + iy = ione + if (present(jy)) then + ijy = jy + else + ijy = ione + endif + + if(ijx.ne.ijy) then + info=3050 + call psb_errpush(info,name) + goto 9999 + end if + + m = desc_a%matrix_data(m_) + + ! check vector correctness + call psb_chkvect(m,ione,size(x,1),ix,ijx,desc_data%matrix_data,info,iix,jjx) + call psb_chkvect(m,ione,size(y,1),iy,ijy,desc_data%matrix_data,info,iiy,jjy) + if(info.ne.0) then + info=4010 + ch_err='psb_chkvect' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if ((iix.ne.ione).or.(iiy.ne.ione)) then + info=3040 + call psb_errpush(info,name) + goto 9999 + end if + + if(m.ne.0) then + if(desc_a%matrix_data(psb_n_row_).gt.0) then + dot = ddot(desc_a%matrix_data(psb_n_row_),& + & x(iix,jjx),ione,y(iiy,jjy),ione) + ! adjust dot because overlapped elements are computed more than once + i=1 + do while (desc_a%ovrlap_elem(i).ne.-ione) + dot = dot -& + & (desc_a%ovrlap_elem(i+1)-1)/desc_a%ovrlap_elem(i+1)*& + & x(iix+desc_a%ovrlap_elem(i)-1,jjx)* + & y(iiy+desc_a%ovrlap_elem(i)-1,jjy) + i = i+2 + end do + else + dot=0.d0 + end if + else + dot=0.d0 + end if + + ! compute global sum + call dgsum2d(icontxt, 'A', ' ', ione, ione, dot,& + & ione, mone ,mycol) + + psb_ddot = dot + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return +end function psb_ddot + + + + +! Function: psb_ddotv +! psb_ddot forms the dot product of two distributed vectors, +! +! dot := X**T * Y +! +! Parameters: +! x - real,dimension(:). The input vector containing the entries of X. +! y - real,dimension(:). The input vector containing the entries of Y. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! +function psb_ddotv(x, y,desc_a, info) + use psb_descriptor_type + use psb_error_mod + implicit none + + real(kind(1.d0)), intent(in) :: x(:), y(:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + real(kind(1.D0)) :: psb_ddotv + + ! locals + integer :: int_err(5), icontxt, nprow, npcol, me, mycol,& + & err_act, n, iix, jjx, temp(2) + real(kind(1.d0)),pointer :: tmpx(:) + real(kind(1.D0)) :: dot_local + character(len=20) :: name, ch_err + + name='psb_ddot' + info=0 + call psb_erractionsave(err_act) + + icontxt=desc_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow == -ione) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= ione) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + ix = ione + iy = ione + m = desc_a%matrix_data(m_) + + ! check vector correctness + call psb_chkvect(m,ione,size(x,1),ix,ijx,desc_data%matrix_data,info,iix,jjx) + call psb_chkvect(m,ione,size(y,1),iy,ijy,desc_data%matrix_data,info,iiy,jjy) + if(info.ne.0) then + info=4010 + ch_err='psb_chkvect' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if ((iix.ne.ione).or.(iiy.ne.ione)) then + info=3040 + call psb_errpush(info,name) + goto 9999 + end if + + if(m.ne.0) then + if(desc_a%matrix_data(psb_n_row_).gt.0) then + dot = ddot(desc_a%matrix_data(psb_n_row_),& + & x,ione,y,ione) + ! adjust dot because overlapped elements are computed more than once + i=1 + do while (desc_a%ovrlap_elem(i).ne.-ione) + dot = dot -& + & (desc_a%ovrlap_elem(i+1)-1)/desc_a%ovrlap_elem(i+1)*& + & x(desc_a%ovrlap_elem(i))* + & y(desc_a%ovrlap_elem(i)) + i = i+2 + end do + else + dot=0.d0 + end if + else + dot=0.d0 + end if + + ! compute global sum + call dgsum2d(icontxt, 'A', ' ', ione, ione, dot,& + & ione, mone ,mycol) + + psb_ddotv = dot + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return +end function psb_ddotv + + + +! Subroutine: psb_ddotvs +! psb_ddot forms the dot product of two distributed vectors, +! +! dot := X**T * Y +! +! Parameters: +! res - real. The result. +! x - real,dimension(:). The input vector containing the entries of X. +! y - real,dimension(:). The input vector containing the entries of Y. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! +subroutine psb_ddotvs(res, x, y,desc_a, info) + use psb_descriptor_type + use psb_error_mod + implicit none + + real(kind(1.d0)), intent(in) :: x(:), y(:) + real(kind(1.d0)), intent(out) :: res + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + + ! locals + integer :: int_err(5), icontxt, nprow, npcol, me, mycol,& + & err_act, n, iix, jjx, temp(2) + real(kind(1.d0)),pointer :: tmpx(:) + real(kind(1.D0)) :: dot_local + character(len=20) :: name, ch_err + + name='psb_ddot' + info=0 + call psb_erractionsave(err_act) + + icontxt=desc_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow == -ione) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= ione) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + ix = ione + iy = ione + m = desc_a%matrix_data(m_) + + ! check vector correctness + call psb_chkvect(m,ione,size(x,1),ix,ijx,desc_data%matrix_data,info,iix,jjx) + call psb_chkvect(m,ione,size(y,1),iy,ijy,desc_data%matrix_data,info,iiy,jjy) + if(info.ne.0) then + info=4010 + ch_err='psb_chkvect' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if ((iix.ne.ione).or.(iiy.ne.ione)) then + info=3040 + call psb_errpush(info,name) + goto 9999 + end if + + if(m.ne.0) then + if(desc_a%matrix_data(psb_n_row_).gt.0) then + dot = ddot(desc_a%matrix_data(psb_n_row_),& + & x,ione,y,ione) + ! adjust dot because overlapped elements are computed more than once + i=1 + do while (desc_a%ovrlap_elem(i).ne.-ione) + dot = dot -& + & (desc_a%ovrlap_elem(i+1)-1)/desc_a%ovrlap_elem(i+1)*& + & x(desc_a%ovrlap_elem(i))* + & y(desc_a%ovrlap_elem(i)) + i = i+2 + end do + else + dot=0.d0 + end if + else + dot=0.d0 + end if + + ! compute global sum + call dgsum2d(icontxt, 'A', ' ', ione, ione, dot,& + & ione, mone ,mycol) + + res = dot + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return +end function psb_ddotvs + + + + +! Subroutine: psb_dmdots +! psb_ddot forms the dot product of two distributed vectors, +! +! dot := sub( X )**T * sub( Y ) +! +! where sub( X ) denotes X(:,JX) +! +! sub( Y ) denotes Y(:,JY). +! +! Parameters: +! res - real. The result. +! x - real,dimension(:,:). The input vector containing the entries of sub( X ). +! y - real,dimension(:,:). The input vector containing the entries of sub( Y ). +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! +subroutine psb_dmdots(res, x, y, desc_a, info) + use psb_descriptor_type + use psb_error_mod + implicit none + + real(kind(1.d0)), intent(in) :: x(:,:), y(:,:) + real(kind(1.d0)), intent(out) :: res(:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + + ! locals + integer :: int_err(5), icontxt, nprow, npcol, me, mycol,& + & err_act, n, iix, jjx, temp(2) + real(kind(1.d0)),pointer :: dot(:) + real(kind(1.D0)) :: dot_local + character(len=20) :: name, ch_err + + name='psb_dmdots' + info=0 + call psb_erractionsave(err_act) + + icontxt=desc_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow == -ione) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= ione) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + ix = ione + iy = ione + + m = desc_a%matrix_data(m_) + + ! check vector correctness + call psb_chkvect(m,ione,size(x,1),ix,ijx,desc_data%matrix_data,info,iix,jjx) + call psb_chkvect(m,ione,size(y,1),iy,ijy,desc_data%matrix_data,info,iiy,jjy) + if(info.ne.0) then + info=4010 + ch_err='psb_chkvect' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if ((iix.ne.ione).or.(iiy.ne.ione)) then + info=3040 + call psb_errpush(info,name) + goto 9999 + end if + + k = min(size(x,2),size(y,2)) + allocate(dot(k)) + + if(m.ne.0) then + if(desc_a%matrix_data(psb_n_row_).gt.0) then + do j=1,k + dot(j) = ddot(desc_a%matrix_data(psb_n_row_),& + & x(iix,jjx+j-1),ione,y(iiy,jjy+j-1),ione) + ! adjust dot because overlapped elements are computed more than once + i=1 + do while (desc_a%ovrlap_elem(i).ne.-ione) + dot(j) = dot(j) -& + & (desc_a%ovrlap_elem(i+1)-1)/desc_a%ovrlap_elem(i+1)*& + & x(iix+desc_a%ovrlap_elem(i)-1,jjx+j-1)* + & y(iiy+desc_a%ovrlap_elem(i)-1,jjy+j-1) + i = i+2 + end do + end do + else + dot(:)=0.d0 + end if + else + dot(:)=0.d0 + end if + + ! compute global sum + call dgsum2d(icontxt, 'A', ' ', ione, ione, dot,& + & ione, mone ,mycol) + + res(1:k) = dot(1:k) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return +end subroutine psb_dmdots diff --git a/src/psblas/psb_dnrm2.f90 b/src/psblas/psb_dnrm2.f90 new file mode 100644 index 00000000..63a23a87 --- /dev/null +++ b/src/psblas/psb_dnrm2.f90 @@ -0,0 +1,329 @@ +! File: psb_dnrm2.f90 +! +! Function: psb_dnrm2 +! Forms the norm2 of a distributed vector, +! +! norm2 := sqrt ( sub( X )**T * sub( X ) ) +! +! where sub( X ) denotes X(:,JX). +! +! Parameters: +! x - real,dimension(:,:). The input vector containing the entries of sub( X ). +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! jx - integer(optional). The column offset for sub( X ). +! +function psb_dnrm2(x, desc_a, info, jx) + use psb_descriptor_type + use psb_error_mod + implicit none + + real(kind(1.d0)), intent(in) :: x(:,:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(in), optional :: jx + integer, intent(out) :: info + real(kind(1.D0)) :: psb_dnrm2 + + ! locals + integer :: int_err(5), icontxt, nprow, npcol, me, mycol,& + & err_act, n, iix, jjx, temp(2), ndim + real(kind(1.d0)) :: nrm2 + real(kind(1.d0)),pointer :: tmpx(:) + external dcombnrm2 + character(len=20) :: name, ch_err + + name='psb_dnrm2' + info=0 + call psb_erractionsave(err_act) + + icontxt=desc_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + ix = 1 + if (present(jx)) then + ijx = jx + else + ijx = 1 + endif + + m = desc_data(m_) + + call psb_chkvect(m,1,size(x,1),ix,ijx,desc_data%matrix_data,info,iix,jjx) + if(info.ne.0) then + info=4010 + ch_err='psb_chkvect' + call psb_errpush(info,name,a_err=ch_err) + end if + + if (iix.ne.1) then + info=3040 + call psb_errpush(info,name) + goto 9999 + end if + + if(m.ne.0) then + if (desc_a%matrix_data(psb_n_row_) .gt. 0) then + ndim = desc_a%matrix_data(psb_n_row_) + nrm2 = dnrm2( ndim, x(iix,jjx), ione ) + i=1 + do while (desc_a%ovrlap_elem(i) .ne. -1) + id = desc_a%ovrlap_elem(i+n_dom_ovr_) + dd = dble(id-1)/dble(id) + nrm2 = nrm2 * sqrt(& + & one - dd * ( & + & x(desc_a%ovrlap_elem(i+ovrlp_elem_), jjx) & + & / nrm2 & + & ) ** 2 & + & ) + i = i+2 + end do + else + nrm2 = zero + end if + else + nrm2 = zero + end if + + call pdtreecomb(icontxt,'All',1,nrm2,-1,-1,dcombnrm2) + + psb_dnrm2 = nrm2 + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return +end function psb_dnrm2 + + + +! Function: psb_dnrm2 +! Forms the norm2 of a distributed vector, +! +! norm2 := sqrt ( X**T * X) +! +! Parameters: +! x - real,dimension(:). The input vector containing the entries of X. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! +function psb_dnrm2v(x, desc_a, info) + use psb_descriptor_type + use psb_error_mod + implicit none + + real(kind(1.d0)), intent(in) :: x(:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + real(kind(1.D0)) :: psb_dnrm2v + + ! locals + integer :: int_err(5), icontxt, nprow, npcol, me, mycol,& + & err_act, n, iix, jjx, temp(2), ndim + real(kind(1.d0)) :: nrm2 + real(kind(1.d0)),pointer :: tmpx(:) + external dcombnrm2 + character(len=20) :: name, ch_err + + name='psb_dnrm2v' + info=0 + call psb_erractionsave(err_act) + + icontxt=desc_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + ix = 1 + jx=1 + + m = desc_data(m_) + + call psb_chkvect(m,1,size(x),ix,jx,desc_data%matrix_data,info,iix,jjx) + if(info.ne.0) then + info=4010 + ch_err='psb_chkvect' + call psb_errpush(info,name,a_err=ch_err) + end if + + if (iix.ne.1) then + info=3040 + call psb_errpush(info,name) + goto 9999 + end if + + if(m.ne.0) then + if (desc_a%matrix_data(psb_n_row_) .gt. 0) then + ndim = desc_a%matrix_data(psb_n_row_) + nrm2 = dnrm2( ndim, x, ione ) + i=1 + do while (desc_a%ovrlap_elem(i) .ne. -1) + id = desc_a%ovrlap_elem(i+n_dom_ovr_) + dd = dble(id-1)/dble(id) + nrm2 = nrm2 * sqrt(& + & one - dd * ( & + & x(desc_a%ovrlap_elem(i+ovrlp_elem_)) & + & / nrm2 & + & ) ** 2 & + & ) + i = i+2 + end do + else + nrm2 = zero + end if + else + nrm2 = zero + end if + + call pdtreecomb(icontxt,'All',1,nrm2,-1,-1,dcombnrm2) + + psb_dnrm2v = nrm2 + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return +end function psb_dnrm2v + + + + +! Subroutine: psb_dnrm2 +! Forms the norm2 of a distributed vector, +! +! norm2 := sqrt ( X**T * X) +! +! Parameters: +! res - real. The result. +! x - real,dimension(:). The input vector containing the entries of X. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! +subroutine psb_dnrm2vs(res, x, desc_a, info) + use psb_descriptor_type + use psb_error_mod + implicit none + + real(kind(1.d0)), intent(in) :: x(:) + real(kind(1.d0)), intent(out) :: res + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + + ! locals + integer :: int_err(5), icontxt, nprow, npcol, me, mycol,& + & err_act, n, iix, jjx, temp(2), ndim + real(kind(1.d0)) :: nrm2 + real(kind(1.d0)),pointer :: tmpx(:) + external dcombnrm2 + character(len=20) :: name, ch_err + + name='psb_dnrm2' + info=0 + call psb_erractionsave(err_act) + + icontxt=desc_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + ix = 1 + jx = 1 + m = desc_data(m_) + + call psb_chkvect(m,1,size(x),ix,jx,desc_data%matrix_data,info,iix,jjx) + if(info.ne.0) then + info=4010 + ch_err='psb_chkvect' + call psb_errpush(info,name,a_err=ch_err) + end if + + if (iix.ne.1) then + info=3040 + call psb_errpush(info,name) + goto 9999 + end if + + if(m.ne.0) then + if (desc_a%matrix_data(psb_n_row_) .gt. 0) then + ndim = desc_a%matrix_data(psb_n_row_) + nrm2 = dnrm2( ndim, x, ione ) + i=1 + do while (desc_a%ovrlap_elem(i) .ne. -1) + id = desc_a%ovrlap_elem(i+n_dom_ovr_) + dd = dble(id-1)/dble(id) + nrm2 = nrm2 * sqrt(& + & one - dd * ( & + & x(desc_a%ovrlap_elem(i+ovrlp_elem_)) & + & / nrm2 & + & ) ** 2 & + & ) + i = i+2 + end do + else + nrm2 = zero + end if + else + nrm2 = zero + end if + + call pdtreecomb(icontxt,'All',1,nrm2,-1,-1,dcombnrm2) + + res = nrm2 + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return +end subroutine psb_dnrm2vs diff --git a/src/psblas/psb_dnrmi.f90 b/src/psblas/psb_dnrmi.f90 new file mode 100644 index 00000000..0881c23f --- /dev/null +++ b/src/psblas/psb_dnrmi.f90 @@ -0,0 +1,101 @@ +! File: psb_dnrmi.f90 +! +! Function: psb_dnrmi +! Forms the approximated norm of a sparse matrix, +! +! normi := max(abs(sum(A(i,j)))) +! +! Parameters: +! a - type(). The sparse matrix containing A. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! +function psb_dnrmi(a,desc_a,info) + use psb_descriptor_type + use psb_serial_mod + use psb_error_mod + implicit none + + type(psb_dspmat_type), intent(in) :: a + integer, intent(out) :: info + type(psb_desc_type), intent(in) :: desc_a + + ! locals + integer :: int_err(5), icontxt, nprow, npcol, me, mycol,& + & err_act, n, iia, jja, ia, ja, temp(2) + real(kind(1.d0)) :: nrmi + character(len=20) :: name, ch_err + + name='psb_dnrmi' + info=0 + call psb_erractionsave(err_act) + + icontxt=desc_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + ia = 1 + ja = 1 + m = desc_a%matrix_data(m_) + n = desc_a%matrix_data(n_) + + call psb_chkmat(m,n,ia,ja,desc_a%matrix_data,info,iia,jja) + if(info.ne.0) then + info=4010 + ch_err='psb_chkmat' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if ((iia.ne.1).or.(jja.ne.1)) then + info=3040 + call psb_errpush(info,name) + goto 9999 + end if + + if ((m.ne.0).and.(n.ne.0)) then + mdim = desc_a%matrix_data(psb_n_row_) + ndim = desc_a%matrix_data(psb_n_col_) + nrmi = dcsnmi('N',mdim,ndim,a%fida,& + & a%descra,a%aspk,a%ia1,a%ia2,& + & a%infoa,info) + + if(info.ne.0) then + info=4010 + ch_err='dcsnmi' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + ! compute global max + call dgamx2d(icontxt, 'A', ' ', ione, ione, nrmi, ione,& + &temp ,temp,-ione ,-ione,-ione) + else + nrmi = 0.d0 + end if + + psb_nrmi = nrmi + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return +end function psb_dnrmi diff --git a/src/psblas/psb_dspmm.f90 b/src/psblas/psb_dspmm.f90 new file mode 100644 index 00000000..3f3e75a8 --- /dev/null +++ b/src/psblas/psb_dspmm.f90 @@ -0,0 +1,606 @@ +! File: psb_dspmm.f90 +! +! Subroutine: psb_dspmm +! Performs one of the distributed matrix-vector operations +! +! sub( Y ) := alpha * Pr * A * Pc * sub( X ) + beta * sub( Y ), or +! +! sub( Y ) := alpha * Pr * A' * Pr * sub( X ) + beta * sub( Y ), +! +! where: +! +! sub( X ) denotes *if* TRANS = 'N', +! +! X(1:N,JX:JX+K-1), +! +! *else* +! +! X(1:M,JX:JX+K-1). +! +! *end if* +! +! sub( Y ) denotes *if* trans = 'N', +! +! Y(1:M,JY:JY+K-1), +! +! *else* +! +! Y(1:N,JY:JY+K-1) +! +! *end* *if* +! +! alpha and beta are scalars, and sub( X ) and sub( Y ) are distributed +! vectors and A is a M-by-N distributed matrix. +! +! Parameters: +! alpha - real. The scalar alpha. +! a - type(). The sparse matrix containing A. +! x - real,dimension(:,:). The input vector containing the entries of sub( X ). +! beta - real. The scalar beta. +! y - real,dimension(:,:). The input vector containing the entries of sub( Y ). +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! trans - character(optional). Whether A or A'. If not present 'N' is assumed. +! k - integer(optional). The number of right-hand sides. +! jx - integer(optional). The column offset for sub( X ). If not present 1 is assumed. +! jy - integer(optional). The column offset for sub( Y ). If not present 1 is assumed. +! work - real,dimension(:)(optional). Working area. +! doswap - integer(optional). Whether to performe halo updates. +! +subroutine psb_dspmm(alpha,a,x,beta,y,desc_a,info,& + & trans, k, jx, jy, work, doswap) + + use psb_dspmat_type + use psb_serial_mod + use psb_descriptor_type + use psb_comm_mod + use psi_mod + use psb_error_mod + implicit none + + real(kind(1.D0)), intent(in) :: alpha, beta + real(kind(1.d0)), intent(inout) :: x(:,:) + real(kind(1.d0)), intent(inout) :: y(:,:) + type(psb_dspmat_type), intent(in) :: a + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + real(kind(1.d0)), intent(inout), optional :: work(:) + character, intent(in), optional :: trans + integer, intent(in), optional :: k, jx, jy,doswap + + ! locals + integer :: int_err(5), icontxt, nprow, npcol, me, mycol,& + & err_act, n, iix, jjx, ia, ja, iia, jja, temp(2) + integer, parameter :: nb=4 + real(kind(1.d0)),pointer :: tmpx(:), xp(:,:), yp(:,:) + character(len=20) :: name, ch_err + + name='psb_dspmm' + info=0 + call psb_erractionsave(err_act) + + icontxt=desc_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + ia = 1 + ja = 1 + + ix = 1 + if (present(jx)) then + ijx = jx + else + ijx = 1 + endif + + iy = 1 + if (present(jy)) then + ijy = jy + else + ijy = 1 + endif + + if (present(doswap)) then + doswap_ = doswap + else + doswap_ = 1 + endif + + if (present(k)) then + ik = min(k,size(x,2)-ijx+1) + ik = min(ik,size(y,2)-ijy+1) + else + ik = min(size(x,2)-ijx+1,size(y,2)-ijy+1) + endif + + if (present(trans)) then + if((trans.eq.'N').or.(trans.eq.'T')) then + itrans = trans + else if (trans.eq.'C') then + info = 3020 + call psb_errpush(info,name) + goto 9999 + else + info = 70 + call psb_errpush(info,name) + goto 9999 + end if + else + itrans = 'N' + endif + + m = desc_data(m_) + n = desc_data(n_) + nrow = desc_a%matrix_data(psb_n_row_) + ncol = desc_a%matrix_data(psb_n_col_) + lldx = size(x,1) + lldy = size(y,1) + + ! check for presence/size of a work area + liwork= 2*ncol + if (a%pr(1) /= 0) llwork = liwork + n * ik + if (a%pl(1) /= 0) llwork = llwork + m * ik + if (present(work)) then + if(size(work).lt.liwork) then + call psb_realloc(liwork,work,info) + if(info.ne.0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + end if + iwork => work + else + call psb_realloc(liwork,iwork,info) + if(info.ne.0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + end if + iwork(1)=0.d0 + + ! checking for matrix correctness + call psb_chkmat(m,n,ia,ja,desc_a%matrix_data,info,iia,jja) + if(info.ne.0) then + info=4010 + ch_err='psb_chkmat' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + + if (itrans.eq.'N') then + ! Matrix is not transposed + if((ja.ne.ix).or.(ia.ne.iy)) then + ! this case is not yet implemented + info = 3030 + call psb_errpush(info,name) + goto 9999 + end if + + ! checking for vectors correctness + call psb_chkvect(n,ik,size(x,1),ix,ijx,desc_data%matrix_data,info,iix,jjx) + call psb_chkvect(m,ik,size(y,1),iy,ijy,desc_data%matrix_data,info,iiy,jjy) + if(info.ne.0) then + info=4010 + ch_err='psb_chkvect' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if((iix.ne.1).or.(iiy.ne.1)) then + ! this case is not yet implemented + info = 3040 + call psb_errpush(info,name) + goto 9999 + end if + + if(doswap_.lt.0) x(nrow:ncol,1:ik)=0.d0 + + ib1=min(nb,ik) + xp => x(iix:lldx,jjx:jjx+ib1-1) + if(doswap_.gt.0)& + & call psi_swapdata(ior(SWAP_SEND,SWAP_RECV),& + & ib1,dzero,xp,desc_a,iwork,info) +!!$ & call PSI_dSwapData(ior(SWAP_SEND,SWAP_RECV),ib1,& +!!$ & dzero,x(iix,jjx),lldx,desc_a%matrix_data,& +!!$ & desc_a%halo_index,iwork,liwork,info) + + + blk: do i=1, ik, nb + ib=ib1 + ib1 = max(0,min(nb,(ik)-(i-1+ib))) + xp => x(iix:lldx,jjx+i+ib-1:jjx+i+ib+ib1-2) + if((ib1.gt.0).and.(doswap_.gt.0))& + & call psi_swapdata(SWAP_SEND_,ib1,& + & dzero,xp,desc_a,iwork,info) +!!$ & call PSI_dSwapData(SWAP_SEND,ib1,& +!!$ & dzero,x(iix,jjx+i+ib-1),lldx,desc_a%matrix_data,& +!!$ & desc_a%halo_index,iwork,liwork,info) + if(info.ne.0) exit blk + + ! local Matrix-vector product + call dcsmm(itran,nrow,ib,ncol,alpha,a%pr,a%fida,& + & a%descra,a%aspk,a%ia1,a%ia2,a%infoa,a%pl,& + & x(iix,jjx+i-1),lldx,beta,y(iiy,jjy+i-1),lldy,& + & iwork,liwork,info) + if(info.ne.0) exit blk + + if((ib1.gt.0).and.(doswap_.gt.0))& + & call psi_swapdata(SWAP_SEND_,ib1,& + & dzero,xp,desc_a,iwork,info) +!!$ & call PSI_dSwapData(SWAP_RECV,ib1,& +!!$ & dzero,x(iix,jjx+i+ib-1),lldx,desc_a%matrix_data,& +!!$ & desc_a%halo_index,iwork,liwork,info) + if(info.ne.0) exit blk + end do + + if(info.ne.0) then + info = 4011 + call psb_errpush(info,name) + goto 9999 + end if + + else + ! Matrix is transposed + if((ja.ne.iy).or.(ia.ne.ix)) then + ! this case is not yet implemented + info = 3030 + call psb_errpush(info,name) + goto 9999 + end if + + if(desc_as%ovrlap_elem(1).ne.-1) then + info = 3070 + call psb_errpush(info,name) + goto 9999 + end if + + ! checking for vectors correctness + call psb_chkvect(m,ik,size(x,1),ix,ijx,desc_data%matrix_data,info,iix,jjx) + call psb_chkvect(n,ik,size(y,1),iy,ijy,desc_data%matrix_data,info,iiy,jjy) + if(info.ne.0) then + info=4010 + ch_err='psb_chkvect' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if((iix.ne.1).or.(iiy.ne.1)) then + ! this case is not yet implemented + info = 3040 + call psb_errpush(info,name) + goto 9999 + end if + + if(doswap_.lt.0) y(nrow:ncol,1:ik)=0.d0 + + ! local Matrix-vector product + call dcsmm(itran,ncol,ik,nrow,alpha,a%pr,a%fida,& + & a%descra,a%aspk,a%ia1,a%ia2,a%infoa,a%pl,& + & x(iix,jjx),lldx,beta,y(iiy,jjy),lldy,& + & iwork,liwork,info) + if(info.ne.0) then + info = 4010 + ch_err='dcsmm' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + yp => y(iiy:lldy,jjy:jjy+ik-1) + if(doswap_.gt.0)& + & call psi_swaptran(ior(SWAP_SEND,SWAP_RECV),& + & ik,done,yp,desc_a,iwork,info) +!!$ & call PSI_dSwapTran(ior(SWAP_SEND,SWAP_RECV),& +!!$ & ik,done,y(iiy,jjy),lldy,desc_a%matrix_data,& +!!$ & desc_a%halo_index,iwork,liwork,info) + if(info.ne.0) then + info = 4010 + ch_err='PSI_dSwapTran' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + end if + + if(.not.present(work)) deallocate(iwork) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return +end subroutine psb_dspmm + + + + +! Subroutine: psb_dspmmv +! Performs one of the distributed matrix-vector operations +! +! Y := alpha * Pr * A * Pc * X + beta * Y, or +! +! Y := alpha * Pr * A' * Pr * X + beta * Y, +! +! alpha and beta are scalars, and X and Y are distributed +! vectors and A is a M-by-N distributed matrix. +! +! Parameters: +! alpha - real. The scalar alpha. +! a - type(). The sparse matrix containing A. +! x - real,dimension(:). The input vector containing the entries of X. +! beta - real. The scalar beta. +! y - real,dimension(:. The input vector containing the entries of Y. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! trans - character(optional). Whether A or A'. If not present 'N' is assumed. +! work - real,dimension(:)(optional). Working area. +! doswap - integer(optional). Whether to performe halo updates. +! +subroutine psb_dspmv(alpha,a,x,beta,y,desc_a,info,& + & trans, work, doswap) + + use psb_dspmat_type + use psb_serial_mod + use psb_descriptor_type + use psb_comm_mod + use psi_mod + use psb_error_mod + implicit none + + real(kind(1.D0)), intent(in) :: alpha, beta + real(kind(1.d0)), intent(inout) :: x(:) + real(kind(1.d0)), intent(inout) :: y(:) + type(psb_dspmat_type), intent(in) :: a + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + real(kind(1.d0)), intent(inout), optional :: work(:) + character, intent(in), optional :: trans + integer, intent(in), optional :: doswap + + ! locals + integer :: int_err(5), icontxt, nprow, npcol, me, mycol,& + & err_act, n, iix, jjx, ia, ja, iia, jja, temp(2) + integer, parameter :: nb=4 + real(kind(1.d0)),pointer :: tmpx(:) + character(len=20) :: name, ch_err + + name='psb_dspmv' + info=0 + call psb_erractionsave(err_act) + + icontxt=desc_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + ia = 1 + ja = 1 + ix = 1 + jx = 1 + iy = 1 + jy = 1 + ik = 1 + + if (present(doswap)) then + doswap_ = doswap + else + doswap_ = 1 + endif + + if (present(trans)) then + if((trans.eq.'N').or.(trans.eq.'T')) then + itrans = trans + else if (trans.eq.'C') then + info = 3020 + call psb_errpush(info,name) + goto 9999 + else + info = 70 + call psb_errpush(info,name) + goto 9999 + end if + else + itrans = 'N' + endif + + m = desc_data(m_) + n = desc_data(n_) + nrow = desc_a%matrix_data(psb_n_row_) + ncol = desc_a%matrix_data(psb_n_col_) + lldx = size(x,1) + lldy = size(y,1) + + ! check for presence/size of a work area + liwork= 2*ncol + if (a%pr(1) /= 0) llwork = liwork + n * ik + if (a%pl(1) /= 0) llwork = liwork + m * ik + if (present(work)) then + if(size(work).lt.liwork) then + call psb_realloc(liwork,work,info) + if(info.ne.0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + end if + iwork => work + else + call psb_realloc(liwork,iwork,info) + if(info.ne.0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + end if + + ! checking for matrix correctness + call psb_chkmat(m,n,ia,ja,desc_a%matrix_data,info,iia,jja) + if(info.ne.0) then + info=4010 + ch_err='psb_chkmat' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + + if (itrans.eq.'N') then + ! Matrix is not transposed + if((ja.ne.ix).or.(ia.ne.iy)) then + ! this case is not yet implemented + info = 3030 + call psb_errpush(info,name) + goto 9999 + end if + + ! checking for vectors correctness + call psb_chkvect(n,ik,size(x),ix,jx,desc_data%matrix_data,info,iix,jjx) + call psb_chkvect(m,ik,size(y),iy,jy,desc_data%matrix_data,info,iiy,jjy) + if(info.ne.0) then + info=4010 + ch_err='psb_chkvect' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if((iix.ne.1).or.(iiy.ne.1)) then + ! this case is not yet implemented + info = 3040 + call psb_errpush(info,name) + goto 9999 + end if + + if(doswap_.lt.0) then + x(nrow:ncol,1:ik)=0.d0 + else + call psi_swapdata(ior(SWAP_SEND,SWAP_RECV),& + & dzero,xp,desc_a,iwork,info) +!!$ call PSI_dSwapData(ior(SWAP_SEND,SWAP_RECV),1,& +!!$ & dzero,x(iix,jjx),lldx,desc_a%matrix_data,& +!!$ & desc_a%halo_index,iwork,liwork,info) + end if + + ! local Matrix-vector product + call dcsmm(itran,nrow,ib,ncol,alpha,a%pr,a%fida,& + & a%descra,a%aspk,a%ia1,a%ia2,a%infoa,a%pl,& + & xp(iix),lldx,beta,yp(iiy),lldy,& + & iwork,liwork,info) + if(info.ne.0) exit blk + + + if(info.ne.0) then + info = 4011 + call psb_errpush(info,name) + goto 9999 + end if + + else + ! Matrix is transposed + if((ja.ne.iy).or.(ia.ne.ix)) then + ! this case is not yet implemented + info = 3030 + call psb_errpush(info,name) + goto 9999 + end if + + if(desc_as%ovrlap_elem(1).ne.-1) then + info = 3070 + call psb_errpush(info,name) + goto 9999 + end if + + ! checking for vectors correctness + call psb_chkvect(m,ik,size(x),ix,jx,desc_data%matrix_data,info,iix,jjx) + call psb_chkvect(n,ik,size(y),iy,jy,desc_data%matrix_data,info,iiy,jjy) + if(info.ne.0) then + info=4010 + ch_err='psb_chkvect' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if((iix.ne.1).or.(iiy.ne.1)) then + ! this case is not yet implemented + info = 3040 + call psb_errpush(info,name) + goto 9999 + end if + + xp => x(iix:lldx) + yp => x(iiy:lldy) + + if(doswap_.lt.0) y(nrow:ncol,1:ik)=0.d0 + + ! local Matrix-vector product + call dcsmm(itran,ncol,ik,nrow,alpha,a%pr,a%fida,& + & a%descra,a%aspk,a%ia1,a%ia2,a%infoa,a%pl,& + & x(iix),lldx,beta,y(iiy),lldy,& + & iwork,liwork,info) + if(info.ne.0) then + info = 4010 + ch_err='dcsmm' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if(doswap_.gt.0)& + $ call psi_swaptran(ior(SWAP_SEND,SWAP_RECV),& + & done,yp,desc_a,iwork,info) +!!$ & call PSI_dSwapTran(ior(SWAP_SEND,SWAP_RECV),& +!!$ & ik,done,y(iiy,jjy),lldy,desc_a%matrix_data,& +!!$ & desc_a%halo_index),iwork,liwork,info + if(info.ne.0) then + info = 4010 + ch_err='PSI_dSwapTran' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + end if + + if(.not.present(work)) deallocate(iwork) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return +end subroutine psb_dspmv diff --git a/src/psblas/psb_dspsm.f90 b/src/psblas/psb_dspsm.f90 new file mode 100644 index 00000000..c379efee --- /dev/null +++ b/src/psblas/psb_dspsm.f90 @@ -0,0 +1,536 @@ +! File: psb_dspsm.f90 +! +! Subroutine: psb_dspsm +! Performs one of the distributed matrix-vector operations +! +! sub( Y ) := alpha * Pr * A-1 * Pc *sub( X ) + beta * sub (Y ), or +! +! sub( Y ) := alpha * D * Pr * A-1 * Pc * sub( X ) + beta * sub (Y ), or +! +! sub( Y ) := alpha * Pr * A-1 * Pc * D * sub( X ) + beta * sub (Y ), or +! +! sub( Y ) := alpha * Pr * A-T * Pc * sub( X ) + beta * sub (Y ), or +! +! sub( Y ) := alpha * D * Pr * A-T * Pc * sub( X ) + beta * sub (Y ), or +! +! sub( Y ) := alpha * Pr * A-T * Pc * D * sub( X ) + beta * sub (Y ), or +! +! where : +! +! sub( X ) denotes X(1:M,JX:JX+K-1), +! +! sub( Y ) denotes Y(1:M,JY:JY+K-1). +! +! sub( X ) is a distributed +! vector and T is a M-by-M distributed triangular matrix. +! +! Parameters: +! alpha - real. The scalar alpha. +! a - type(). The sparse matrix containing A. +! x - real,dimension(:,:). The input vector containing the entries of sub( X ). +! beta - real. The scalar beta. +! y - real,dimension(:,:). The input vector containing the entries of sub( Y ). +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! trans - character(optional). Whether A or A'. If not present 'N' is assumed. +! unitd - character(optional). Specify some type of operation with the diagonal matrix D. +! choice - integer(optional). The kind of update to perform on overlap elements. +! d - real,dimension(:)(optional). Matrix for diagonal scaling. +! k - integer(optional). The number of right-hand sides. +! jx - integer(optional). The column offset for sub( X ). If not present 1 is assumed. +! jy - integer(optional). The column offset for sub( Y ). If not present 1 is assumed. +! work - real,dimension(:)(optional). Working area. +! +subroutine psb_dspsm(alpha,a,x,beta,y,desc_a,info,& + & trans, unitd, choice, d, k, jx, jy, work) + + use psb_dspmat_type + use psb_serial_mod + use psb_descriptor_type + use psb_comm_mod + use psi_mod + use psb_error_mod + implicit none + + real(kind(1.D0)), intent(in) :: alpha, beta + real(kind(1.d0)), intent(in) :: x(:,:) + real(kind(1.d0)), intent(inout) :: y(:,:) + type (psb_dspmat_type), intent(in) :: a + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + real(kind(1.d0)), intent(in), optional :: d(:) + real(kind(1.d0)), intent(inout), optional :: work(:) + character, intent(in), optional :: trans, unitd + integer, intent(in), optional :: choice + integer, intent(in), optional :: k, jx, jy + + ! locals + integer :: int_err(5), icontxt, nprow, npcol, me, mycol,& + & err_act, n, iix, jjx, ia, ja, iia, jja, temp(2), lldx,lldy, lchoice + character :: lunitd + integer, parameter :: nb=4 + real(kind(1.d0)),pointer :: tmpx(:), xp(:,:), yp(:,:) + character(len=20) :: name, ch_err + + name='psb_dspsm' + info=0 + call psb_erractionsave(err_act) + + icontxt=desc_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + ! just this case right now + ia = 1 + ja = 1 + + ix = 1 + if (present(jx)) then + ijx = jx + else + ijx = 1 + endif + + iy = 1 + if (present(jy)) then + ijy = jy + else + ijy = 1 + endif + + if (present(k)) then + ik = min(k,size(x,2)-ijx+1) + ik = min(ik,size(y,2)-ijy+1) + else + ik = min(size(x,2)-ijx+1,size(y,2)-ijy+1) + endif + + if (present(choice)) then + lchoice = choice + else + lchoice = AVG_ + endif + + if (present(unitd)) then + lunitd = unitd + else + lunitd = 'U' + endif + + if (present(trans)) then + if((trans.eq.'N').or.(trans.eq.'T')) then + itrans = trans + else if (trans.eq.'C') then + info = 3020 + call psb_errpush(info,name) + goto 9999 + else + info = 70 + call psb_errpush(info,name) + goto 9999 + end if + else + itrans = 'N' + endif + + m = desc_data(m_) + nrow = desc_a%matrix_data(psb_n_row_) + ncol = desc_a%matrix_data(psb_n_col_) + lldx = size(x,1) + lldy = size(y,1) + + if((lldx.lt.ncol).or.(lldy.lt.ncol)) then + info=3010 + call psb_errpush(info,name) + goto 9999 + end if + + ! check for presence/size of a work area + liwork= 2*ncol + if (a%pr(1) /= 0) llwork = liwork + m * ik + if (a%pl(1) /= 0) llwork = llwork + m * ik + if (present(work)) then + if(size(work).lt.liwork) then + call psb_realloc(liwork,work,info) + if(info.ne.0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + end if + iwork => work + else + call psb_realloc(liwork,iwork,info) + if(info.ne.0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + end if + iwork(1)=0.d0 + + if(present(d)) then + lld = size(d) + id => d + else + lld=1 + allocate(id(1)) + id=1.d0 + end if + + ! checking for matrix correctness + call psb_chkmat(m,m,ia,ja,desc_a%matrix_data,info,iia,jja) + ! checking for vectors correctness + call psb_chkvect(m,ik,size(x,1),ix,ijx,desc_data%matrix_data,info,iix,jjx) + call psb_chkvect(m,ik,size(y,1),iy,ijy,desc_data%matrix_data,info,iiy,jjy) + if(info.ne.0) then + info=4010 + ch_err='psb_chkvect/mat' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if(ja.ne.ix) then + ! this case is not yet implemented + info = 3030 + end if + + if((iix.ne.1).or.(iiy.ne.1)) then + ! this case is not yet implemented + info = 3040 + end if + + if(info.ne.0) then + call psb_errpush(info,name) + goto 9999 + end if + + ! Perform local triangular system solve + call dcssm(itrans,nrow,ik,alpha,lunitd,id,a%pr,& + & a%fida,a%descra,a%aspk,a%ia1,a%ia2,a%infoa,& + & a%pl,x(iix,jjx),lldx,beta,y(iiy,jjy),lldy,& + & iwork,liwork,info) + if(info.ne.0) then + info = 4010 + ch_err='dcssm' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + ! update overlap elements + if(lchoice.gt.0) then + yp => y(iiy:lldy,jjy:jjy+ik-1) + call psi_swapdata(ior(SWAP_SEND,SWAP_RECV),ik,& + & done,yp,desc_a,iwork,info) +!!$ call PSI_dSwapData(ior(SWAP_SEND,SWAP_RECV),ik,& +!!$ & done,y,lldy,desc_a%matrix_data,desc_a%ovrlap_index,& +!!$ & iwork,liwork,info) + + i=0 + ! switch on update type + select case (lchoice) + case(SQUARE_ROOT_) + do while(desc_a%ovrlap_elem(i).ne.-ione) + y(desc_a%ovrlap_elem(i+ovrlp_elem_),:) =& + & y(desc_a%ovrlap_elem(i+ovrlp_elem_),:)/& + & sqrt(real(desc_a%ovrlap_elem(i+n_dom_ovr_))) + i = i+2 + end do + case(AVG_) + do while(desc_a%ovrlap_elem(i).ne.-ione) + y(desc_a%ovrlap_elem(i+ovrlp_elem_),:) =& + & y(desc_a%ovrlap_elem(i+ovrlp_elem_),:)/& + & real(desc_a%ovrlap_elem(i+n_dom_ovr_)) + i = i+2 + end do + case(SUM_) + ! do nothing + case default + ! wrong value for choice argument + info = 70 + int_err=(/10,lchoice/) + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end select + end if + + if(.not.present(work)) deallocate(iwork) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return +end subroutine psb_dspsm + +! Subroutine: psb_dspsmv +! Performs one of the distributed matrix-vector operations +! +! Y := alpha * Pr * A-1 * Pc * X + beta * Y, or +! +! Y := alpha * D * Pr * A-1 * Pc * X + beta * Y, or +! +! Y := alpha * Pr * A-1 * Pc * D * X + beta * Y, or +! +! Y := alpha * Pr * A-T * Pc * X + beta * Y, or +! +! Y := alpha * D * Pr * A-T * Pc * X + beta * Y, or +! +! Y := alpha * Pr * A-T * Pc * D * X + beta * Y, or +! +! X is a distributed +! vector and T is a M-by-M distributed triangular matrix. +! +! Parameters: +! alpha - real. The scalar alpha. +! a - type(). The sparse matrix containing A. +! x - real,dimension(:). The input vector containing the entries of X. +! beta - real. The scalar beta. +! y - real,dimension(:). The input vector containing the entries of Y. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! trans - character(optional). Whether A or A'. If not present 'N' is assumed. +! unitd - character(optional). Specify some type of operation with the diagonal matrix D. +! choice - integer(optional). The kind of update to perform on overlap elements. +! d - real,dimension(:)(optional). Matrix for diagonal scaling. +! work - real,dimension(:)(optional). Working area. +! +subroutine psb_dspsv(alpha,a,x,beta,y,desc_a,info,& + & trans, unitd, choice, d, work) + use psb_dspmat_type + use psb_serial_mod + use psb_descriptor_type + use psb_comm_mod + use psi_mod + use psb_error_mod + + real(kind(1.D0)), intent(in) :: alpha, beta + real(kind(1.d0)), intent(in) :: x(:) + real(kind(1.d0)), intent(inout) :: y(:) + type(psb_dspmat_type), intent(in) :: a + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + real(kind(1.d0)), intent(in), optional :: d(:) + real(kind(1.d0)), intent(inout), optional :: work(:) + character, intent(in), optional :: trans, unitd + integer, intent(in), optional :: choice + + ! locals + integer :: int_err(5), icontxt, nprow, npcol, me, mycol,& + & err_act, n, iix, jjx, ia, ja, iia, jja, temp(2), lldx,lldy, lchoice + character :: lunitd + integer, parameter :: nb=4 + real(kind(1.d0)),pointer :: tmpx(:), xp(:), yp(:) + character(len=20) :: name, ch_err + + name='psb_dspsv' + info=0 + call psb_erractionsave(err_act) + + icontxt=desc_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + ! just this case right now + ia = 1 + ja = 1 + ix = 1 + iy = 1 + ik = 1 + + if (present(choice)) then + lchoice = choice + else + lchoice = AVG_ + endif + + if (present(unitd)) then + lunitd = unitd + else + lunitd = 'U' + endif + + if (present(trans)) then + if((trans.eq.'N').or.(trans.eq.'T')) then + itrans = trans + else if (trans.eq.'C') then + info = 3020 + call psb_errpush(info,name) + goto 9999 + else + info = 70 + call psb_errpush(info,name) + goto 9999 + end if + else + itrans = 'N' + endif + + m = desc_data(m_) + nrow = desc_a%matrix_data(psb_n_row_) + ncol = desc_a%matrix_data(psb_n_col_) + lldx = size(x) + lldy = size(y) + + if((lldx.lt.ncol).or.(lldy.lt.ncol)) then + info=3010 + call psb_errpush(info,name) + goto 9999 + end if + + ! check for presence/size of a work area + liwork= 2*ncol + if (a%pr(1) /= 0) llwork = liwork + m * ik + if (a%pl(1) /= 0) llwork = llwork + m * ik + if (present(work)) then + if(size(work).lt.liwork) then + call psb_realloc(liwork,work,info) + if(info.ne.0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + end if + iwork => work + else + call psb_realloc(liwork,iwork,info) + if(info.ne.0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + end if + iwork(1)=0.d0 + + if(present(d)) then + lld = size(d) + id => d + else + lld=1 + allocate(id(1)) + id=1.d0 + end if + + ! checking for matrix correctness + call psb_chkmat(m,m,ia,ja,desc_a%matrix_data,info,iia,jja) + ! checking for vectors correctness + call psb_chkvect(m,ik,size(x),ix,ijx,desc_data%matrix_data,info,iix,jjx) + call psb_chkvect(m,ik,size(y),iy,ijy,desc_data%matrix_data,info,iiy,jjy) + if(info.ne.0) then + info=4010 + ch_err='psb_chkvect/mat' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if(ja.ne.ix) then + ! this case is not yet implemented + info = 3030 + end if + + if((iix.ne.1).or.(iiy.ne.1)) then + ! this case is not yet implemented + info = 3040 + end if + + if(info.ne.0) then + call psb_errpush(info,name) + goto 9999 + end if + + ! Perform local triangular system solve + call dcssm(itrans,nrow,ik,alpha,lunitd,id,a%pr,& + & a%fida,a%descra,a%aspk,a%ia1,a%ia2,a%infoa,& + & a%pl,x,lldx,beta,y,lldy,& + & iwork,liwork,info) + if(info.ne.0) then + info = 4010 + ch_err='dcssm' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + ! update overlap elements + if(lchoice.gt.0) then + yp => y(iiy:lldy) + call psi_swapdata(ior(SWAP_SEND,SWAP_RECV),& + & done,yp,desc_a,iwork,info) +!!$ call PSI_dSwapData(ior(SWAP_SEND,SWAP_RECV),ik,& +!!$ & done,y,lldy,desc_a%matrix_data,desc_a%ovrlap_index,& +!!$ & iwork,liwork,info) + + i=0 + ! switch on update type + select case (lchoice) + case(SQUARE_ROOT_) + do while(desc_a%ovrlap_elem(i).ne.-ione) + y(desc_a%ovrlap_elem(i+ovrlp_elem_)) =& + & y(desc_a%ovrlap_elem(i+ovrlp_elem_))/& + & sqrt(real(desc_a%ovrlap_elem(i+n_dom_ovr_))) + i = i+2 + end do + case(AVG_) + do while(desc_a%ovrlap_elem(i).ne.-ione) + y(desc_a%ovrlap_elem(i+ovrlp_elem_)) =& + & y(desc_a%ovrlap_elem(i+ovrlp_elem_))/& + & real(desc_a%ovrlap_elem(i+n_dom_ovr_)) + i = i+2 + end do + case(SUM_) + ! do nothing + case default + ! wrong value for choice argument + info = 70 + int_err=(/10,lchoice/) + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end select + end if + + if(.not.present(work)) deallocate(iwork) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return +end subroutine psb_dspsv + diff --git a/src/serial/Makefile b/src/serial/Makefile new file mode 100644 index 00000000..141d52fa --- /dev/null +++ b/src/serial/Makefile @@ -0,0 +1,30 @@ +include ../../Make.inc + +#FCOPT= $(FCOPT) +F90_PSDOBJS= dcsdp90.o dcssm90.o dcssm90v.o dfixcoo.o dipcoo2csr.o dipcsr2coo.o\ + dcsprt90.o dspgtdiag.o dspinfo.o dspgtrow.o dspscal.o imsr.o imsrx.o \ + dsymbmm90.o dnumbmm90.o drwextd.o dtransp90.o smmp.o dcsmm90.o dcsmv90.o\ + dcsrws90.o psdneigh.o psbdcoins.o string_impl.o dcsnmi90.o + + +LIBDIR= ../../lib +INCLUDES=-I$(LIBDIR) -I.. +LIBNAME=$(LIBDIR)/$(F90LIB) +HERE=. + +INCDIRS=-I. -I.. -I$(LIBDIR) + +lib: $(F90_PSDOBJS) + ar -cur $(LIBNAME) $(F90_PSDOBJS) + ranlib $(LIBNAME) + +#$(F90_PSDOBJS): $(MODS) + +.f.o: + $(F90) $(FCOPT) $(INCDIRS) -c $< + +veryclean: clean + /bin/rm -f $(LIBNAME) + +clean: + /bin/rm -f $(F90_PSDOBJS) $(LOCAL_MODS) diff --git a/src/serial/aux/Makefile b/src/serial/aux/Makefile new file mode 100644 index 00000000..357d9f00 --- /dev/null +++ b/src/serial/aux/Makefile @@ -0,0 +1,44 @@ +include ../../../../Make.inc +# +# The object files +# + +FOBJS = daxpby.o getrepflag.o geterr.o \ + isr.o isrx.o lsame.o \ + setrepflag.o seterr.o sperror.o \ + write_message.o mrgsrt.o xerbla.o \ + xsperr.o zaxpby.o zseterr.o \ + zsperror.o zwrite_message.o \ + zxsperr.o zsetrepflag.o isaperm.o ibsrch.o + + +OBJS=$(FOBJS) + +# +# Where the library should go, and how it is called. +# Note that we are regenerating most of libsparker.a on the fly. +#LIBDIR=../../../LIB +#LIBNAME=libsparker.a +LIBFILE=$(LIBDIR)/$(LIBNAME) +SPARKERDIR=.. +INCDIRS=-I. -I$(SPARKERDIR) -I$(LIBDIR) + +# +# No change should be needed below +# + +default: lib + +lib: $(OBJS) + $(AR) $(LIBFILE) $(OBJS) + $(RANLIB) $(LIBFILE) + +$(FOBJS): $(SPARKERDIR)/sparker.fh + +clean: cleanobjs + +veryclean: cleanobjs + +cleanobjs: + /bin/rm -f $(OBJS) + diff --git a/src/serial/aux/ibsrch.f b/src/serial/aux/ibsrch.f new file mode 100644 index 00000000..f8f0cc2f --- /dev/null +++ b/src/serial/aux/ibsrch.f @@ -0,0 +1,25 @@ + subroutine ibsrch(ipos,key,n,v) + integer ipos, key, n + integer v(n) + + integer lb, ub, m + + + lb = 1 + ub = n + ipos = -1 + + do while (lb.le.ub) + m = (lb+ub)/2 + if (key.eq.v(m)) then + ipos = m + lb = ub + 1 + else if (key.lt.v(m)) then + ub = m-1 + else + lb = m + 1 + end if + enddo + + return + end diff --git a/src/serial/aux/isaperm.f b/src/serial/aux/isaperm.f new file mode 100644 index 00000000..9f0b783d --- /dev/null +++ b/src/serial/aux/isaperm.f @@ -0,0 +1,69 @@ +*********************************************************************** +* * +* REFERENCES = * +* * +* [1] D. E. Knuth * +* The art of computer programming Vol. 1 * +* Sect. 1.3.3 * +* Addison-Wesley * +* * +* * +* FUNCTION: Checks whether a vector really is a permutation * +* works by exploiting the cycle structure of the * +* permutation. * +* * +* * +*********************************************************************** + LOGICAL FUNCTION ISAPERM(N,IP) +C .. Scalar Arguments .. + INTEGER N +C .. +C .. Array Arguments .. + INTEGER IP(N) +C .. +C .. Local Scalars .. + INTEGER I,J,M +C .. + + ISAPERM = .TRUE. +C +C Sanity check first +C + DO I=1, N + IF ((IP(I).LT.1).OR.(IP(I).GT.N)) THEN + ISAPERM = .FALSE. + RETURN + ENDIF + ENDDO + +C +C Now work through the cycles, by marking each successive item as negative. +C No cycle should intersect with any other, hence the .GE.1 check. +C + DO M = 1, N + I = IP(M) + IF (I.LT.0) THEN + IP(M) = -I + ELSE IF (I.NE.M) THEN + J = IP(I) + IP(I) = -J + I = J + DO WHILE ((J.GE.1).AND.(J.NE.M)) + J = IP(I) + IP(I) = -J + I = J + ENDDO + IP(M) = IABS(IP(M)) + IF (J.NE.M) THEN + ISAPERM = .FALSE. + DO I=1, N + IP(I) = IABS(IP(I)) + ENDDO + GOTO 9999 + ENDIF + END IF + ENDDO + 9999 CONTINUE + + RETURN + END diff --git a/src/serial/aux/isr.f b/src/serial/aux/isr.f new file mode 100644 index 00000000..df8d39a2 --- /dev/null +++ b/src/serial/aux/isr.f @@ -0,0 +1,147 @@ + SUBROUTINE ISR(N,X) +C +C Quicksort. +C Adapted from a number of sources, including Don Knuth's TAOCP. +C +C .. Scalar Arguments .. + INTEGER N +C .. +C .. Array Arguments .. + INTEGER X(N) +C .. +C .. Local Scalars .. + INTEGER I, J, XX, ILX, IUX, ISTP, PIV, LPIV + INTEGER IT1, N1, N2 + INTEGER MAXSTACK,NPARMS,ITHRS + PARAMETER (MAXSTACK=64,NPARMS=3,ITHRS=16) + INTEGER ISTACK(NPARMS,MAXSTACK) +C .. + +C +C Small inputs will only get through insertion sort. +C + IF (N.GT.ITHRS) THEN +C +C Init stack pointer +C + ISTP = 1 + ISTACK(1,ISTP) = 1 + ISTACK(2,ISTP) = N + + DO WHILE (ISTP.GT.0) + ILX = ISTACK(1,ISTP) + IUX = ISTACK(2,ISTP) + ISTP = ISTP - 1 +c$$$ write(0,*) 'Debug 1: ',ilx,iux +C +C Choose a pivot with median-of-three heuristics, leave it +C in the LPIV location +C + I = ILX + J = IUX + LPIV = (I+J)/2 + PIV = X(LPIV) + IF (PIV.LT.X(I)) THEN + IT1 = X(I) + X(I) = X(LPIV) + X(LPIV) = IT1 + PIV = X(LPIV) + ENDIF + IF (PIV.GT.X(J)) THEN + IT1 = X(J) + X(J) = X(LPIV) + X(LPIV) = IT1 + PIV = X(LPIV) + ENDIF + IF (PIV.LT.X(I)) THEN + IT1 = X(I) + X(I) = X(LPIV) + X(LPIV) = IT1 + PIV = X(LPIV) + ENDIF +C +C Now PIV is correct; place it into first location + + IT1 = X(I) + X(I) = X(LPIV) + X(LPIV) = IT1 + + I = ILX - 1 + J = IUX + 1 + + 130 CONTINUE + I = I + 1 + XK = X(I) + IF (XK.LT.PIV) GOTO 130 +C +C Ensure finite termination for next loop +C + IT1 = XK + X(I) = PIV + 140 CONTINUE + J = J - 1 + XK = X(J) + IF (XK.GT.PIV) GOTO 140 + X(I) = IT1 + 150 CONTINUE + + IF (J.GT.I) THEN + IT1 = X(I) + X(I) = X(J) + X(J) = IT1 + GO TO 130 + END IF + + if (i.eq.ilx) then + if (x(i).ne.piv) then + write(0,*) 'Should never ever get here????!!!!' + stop + endif + i = i + 1 + endif + + N1 = (I-1)-ILX+1 + N2 = IUX-(I)+1 + IF (N1.GT.N2) THEN + if (n1.gt.ithrs) then + ISTP = ISTP + 1 + ISTACK(1,ISTP) = ILX + ISTACK(2,ISTP) = I-1 + endif + if (n2.gt.ithrs) then + ISTP = ISTP + 1 + ISTACK(1,ISTP) = I + ISTACK(2,ISTP) = IUX + endif + ELSE + if (n2.gt.ithrs) then + ISTP = ISTP + 1 + ISTACK(1,ISTP) = I + ISTACK(2,ISTP) = IUX + endif + if (n1.gt.ithrs) then + ISTP = ISTP + 1 + ISTACK(1,ISTP) = ILX + ISTACK(2,ISTP) = I-1 + endif + ENDIF + ENDDO + ENDIF + + DO J=N-1,1,-1 + IF (X(J+1).LT.X(J)) THEN + XX = X(J) + I=J+1 + 100 CONTINUE + X(I-1) = X(I) + I = I+1 + IF ((I.LE.N)) then + if (X(I).LT.XX) GOTO 100 + endif + X(I-1) = XX + ENDIF + ENDDO + + RETURN + + END diff --git a/src/serial/aux/isrx.f b/src/serial/aux/isrx.f new file mode 100644 index 00000000..5b7fc84b --- /dev/null +++ b/src/serial/aux/isrx.f @@ -0,0 +1,168 @@ + SUBROUTINE ISRX(N,X,INDX) +C +C Quicksort with indices into original positions. +C Adapted from a number of sources, including Don Knuth's TAOCP. +C +C .. Scalar Arguments .. + INTEGER N +C .. +C .. Array Arguments .. + INTEGER INDX(N),X(N) +C .. +C .. Local Scalars .. + INTEGER I, J, II, XX, ILX, IUX, ISTP, PIV, LPIV + INTEGER IT1, IT2, N1, N2 + INTEGER MAXSTACK,NPARMS,ITHRS + PARAMETER (MAXSTACK=64,NPARMS=3,ITHRS=16) + INTEGER ISTACK(NPARMS,MAXSTACK) +C .. + + DO I=1, N + INDX(I) = I + ENDDO +C +C Small inputs will only get through insertion sort. +C + IF (N.GT.ITHRS) THEN +C +C Init stack pointer +C + ISTP = 1 + ISTACK(1,ISTP) = 1 + ISTACK(2,ISTP) = N + + DO WHILE (ISTP.GT.0) + ILX = ISTACK(1,ISTP) + IUX = ISTACK(2,ISTP) + ISTP = ISTP - 1 +C +C Choose a pivot with median-of-three heuristics, leave it +C in the LPIV location +C + I = ILX + J = IUX + LPIV = (I+J)/2 + PIV = X(LPIV) + IF (PIV.LT.X(I)) THEN + IT1 = X(I) + IT2 = INDX(I) + X(I) = X(LPIV) + INDX(I) = INDX(LPIV) + X(LPIV) = IT1 + INDX(LPIV) = IT2 + PIV = X(LPIV) + ENDIF + IF (PIV.GT.X(J)) THEN + IT1 = X(J) + IT2 = INDX(J) + X(J) = X(LPIV) + INDX(J) = INDX(LPIV) + X(LPIV) = IT1 + INDX(LPIV) = IT2 + PIV = X(LPIV) + ENDIF + IF (PIV.LT.X(I)) THEN + IT1 = X(I) + IT2 = INDX(I) + X(I) = X(LPIV) + INDX(I) = INDX(LPIV) + X(LPIV) = IT1 + INDX(LPIV) = IT2 + PIV = X(LPIV) + ENDIF +C +C Now PIV is correct; place it into first location +C + IT1 = X(I) + IT2 = INDX(I) + X(I) = X(LPIV) + INDX(I) = INDX(LPIV) + X(LPIV) = IT1 + INDX(LPIV) = IT2 + + I = ILX - 1 + J = IUX + 1 + + 130 CONTINUE + I = I + 1 + XK = X(I) + IF (XK.LT.PIV) GOTO 130 +C +C Ensure finite termination for next loop +C + IT1 = XK + X(I) = PIV + 140 CONTINUE + J = J - 1 + XK = X(J) + IF (XK.GT.PIV) GOTO 140 + X(I) = IT1 + 150 CONTINUE + + IF (J.GT.I) THEN + IT1 = X(I) + IT2 = INDX(I) + X(I) = X(J) + INDX(I) = INDX(J) + X(J) = IT1 + INDX(J) = IT2 + GO TO 130 + END IF + + if (i.eq.ilx) then + if (x(i).ne.piv) then + write(0,*) + + 'ISRX:: Should never ever get here????!!!!' + stop + endif + i = i + 1 + endif + + N1 = (I-1)-ILX+1 + N2 = IUX-(I)+1 + IF (N1.GT.N2) THEN + if (n1.gt.ithrs) then + ISTP = ISTP + 1 + ISTACK(1,ISTP) = ILX + ISTACK(2,ISTP) = I-1 + endif + if (n2.gt.ithrs) then + ISTP = ISTP + 1 + ISTACK(1,ISTP) = I + ISTACK(2,ISTP) = IUX + endif + ELSE + if (n2.gt.ithrs) then + ISTP = ISTP + 1 + ISTACK(1,ISTP) = I + ISTACK(2,ISTP) = IUX + endif + if (n1.gt.ithrs) then + ISTP = ISTP + 1 + ISTACK(1,ISTP) = ILX + ISTACK(2,ISTP) = I-1 + endif + ENDIF + ENDDO + ENDIF + + DO J=N-1,1,-1 + IF (X(J+1).LT.X(J)) THEN + XX = X(J) + II = INDX(J) + I=J+1 + 100 CONTINUE + X(I-1) = X(I) + INDX(I-1) = INDX(I) + I = I+1 + IF ((I.LE.N)) then + if (X(I).LT.XX) GOTO 100 + endif + X(I-1) = XX + INDX(I-1) = II + ENDIF + ENDDO + + RETURN + + END diff --git a/src/serial/aux/lsame.c b/src/serial/aux/lsame.c new file mode 100644 index 00000000..d34cdede --- /dev/null +++ b/src/serial/aux/lsame.c @@ -0,0 +1,27 @@ +#include +#define FTRUE 1 +#define FFALSE 0 + +#ifdef Add_ +#define lsame lsame_ +#endif + +#ifdef UpCase +#define lsame LSAME +#endif + +#ifdef NoChange +#define lsame lsame +#endif + + +int lsame(a,b,la,lb) +char *a, *b; +int la,lb; +{ + if ((tolower(*a))==(tolower(*b))) { + return(FTRUE); + } else { + return(FFALSE); + } +} diff --git a/src/serial/aux/mrgsrt.f b/src/serial/aux/mrgsrt.f new file mode 100644 index 00000000..06c7a541 --- /dev/null +++ b/src/serial/aux/mrgsrt.f @@ -0,0 +1,204 @@ +*********************************************************************** +* * +* FUNCTION = This subroutine returns an array of pointers, L, * +* to be used to sort the integer input vector K; * +* the routine implements a list merge-sort * +* * +*********************************************************************** +* * +* CALL MRGSRT(N,K,L,IRET) * +* * +* INPUT = * +* * +* SYMBOLIC NAME: N * +* POSITION: First parameter. * +* ATTRIBUTES: INTEGER * +* VALUES: >= 0 * +* DESCRIPTION: Dimension of the array to be sorted * +* * +* SYMBOLIC NAME: K * +* POSITION: Second parameter * +* ATTRIBUTES: INTEGER ARRAY(N) * +* VALUES: Any * +* DESCRIPTION: Input array containing the keys, i.e., values * +* to be sorted * +* * +* * +* * +* OUTPUT = * +* * +* SYMBOLIC NAME: L * +* POSITION: Third parameter * +* ATTRIBUTES: INTEGER ARRAY(N+2) * +* VALUES: >= 0 * +* DESCRIPTION: On exit, this array contains pointers to * +* the keys array. * +* * +*********************************************************************** +*********************************************************************** +* * +*********************************************************************** +*********************************************************************** +* ALGORITHM DESCRIPTION * +* * +* REFERENCES = (1) D. E. Knuth * +* The Art of Computer Programming, * +* vol.3: Sorting and Searching * +* Addison-Wesley, 1973 * +* * +* FUNCTION = This subroutine is based on the well-known merge-sort * +* algorithm; according to (1) we are sorting 'records' * +* R(I) with respect to keys K(I), and to this purpose * +* we use 'links' L(I); at the end of the subroutine, * +* L(0) is the index of the first record in the sorted * +* sequence, then for every record R(I), we have into * +* L(I) the index of the next one in the sequence. A * +* value L(I)=0 signals the end of the sequence. * +* The sorting is stable, i.e., if K(I)=K(J) and I= 0 * +* DESCRIPTION: Number of rows of the matrix op(A). * +* * +* SYMBOLIC NAME: N * +* POSITION: PARAMETER NO 4. * +* ATTRIBUTES: INTEGER*4. * +* VALUES: N >= 0 * +* DESCRIPTION: Number of columns of the matrix op(A) * +* * +* SYMBOLIC NAME: ALPHA * +* POSITION: PARAMETER NO 5. * +* ATTRIBUTES: REAL*8. * +* VALUES: any. * +* DESCRIPTION: Specifies the scalar alpha. * +* * +* * +* SYMBOLIC NAME: AS * +* POSITION: PARAMETER NO 6. * +* ATTRIBUTES: REAL*8: ARRAY(IA(M+1)-1) * +* VALUES: ANY * +* DESCRIPTION: Array containing the non zero coefficients of * +* the sparse matrix op(A). * +* * +* SYMBOLIC NAME: JA * +* POSITION: PARAMETER NO 7. * +* ATTRIBUTES: INTEGER*4: ARRAY(IA(M+1)-1) * +* VALUES: 0 < JA(I) <= M * +* DESCRIPTION: Array containing the column number of the * +* nonzero coefficients stored in array AS. * +* * +* SYMBOLIC NAME: IA * +* POSITION: PARAMETER NO 8. * +* ATTRIBUTES: INTEGER*4: ARRAY(*) * +* VALUES: IA(I) > 0 * +* DESCRIPTION: Contains the pointers for the beginning of * +* each rows. * +* * +* * +* SYMBOLIC NAME: X * +* POSITION: PARAMETER NO 9. * +* ATTRIBUTES: REAL*8 ARRAY(N) (or ARRAY(M) when op(A) = A') * +* VALUES: any. * +* DESCRIPTION: Contains the values of the vector to be * +* multiplied by the matrix A. * +* * +* SYMBOLIC NAME: BETA * +* POSITION: PARAMETER NO 10. * +* ATTRIBUTES: REAL*8. * +* VALUES: any. * +* DESCRIPTION: Specifies the scalar beta. * +* * +* SYMBOLIC NAME: Y * +* POSITION: PARAMETER NO 11. * +* ATTRIBUTES: REAL*8 ARRAY(M) (or ARRAY(N) when op(A) = A') * +* VALUES: any. * +* DESCRIPTION: Contains the values of the vector to be * +* updated by the matrix-vector multiplication. * +* * +* SYMBOLIC NAME: WORK * +* POSITION: PARAMETER NO 12. * +* ATTRIBUTES: REAL*8 ARRAY(M) (or ARRAY(N) when op(A) = A') * +* VALUES: any. * +* DESCRIPTION: Work area available to the program. It is used * +* only when TRANS = 'T'. * +* * +* OUTPUT = * +* * +* * +* SYMBOLIC NAME: Y * +* POSITION: PARAMETER NO 11. * +* ATTRIBUTES: REAL*8 ARRAY(M) (or ARRAY(N) when op(A) = A') * +* VALUES: any. * +* DESCRIPTION: Contains the values of the vector * +* updated by the matrix-vector multiplication. * +* * +* * +*********************************************************************** + SUBROUTINE DCOOMV (TRANS,DIAG,M,N,ALPHA,AS,IA,JA,INFOA,X, + + BETA,Y,WORK,IERROR) +C .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER (ONE=1.0D0,ZERO=0.0D0) +C .. Scalar Arguments .. + DOUBLE PRECISION ALPHA, BETA + INTEGER M, N, IERROR + CHARACTER DIAG, TRANS +C .. Array Arguments .. + DOUBLE PRECISION AS(*), WORK(*), X(*), Y(*) + INTEGER IA(*), JA(*),infoa(*) +C .. Local Scalars .. + DOUBLE PRECISION ACC, TX + INTEGER I, J, K, NNZ, IR, JC + LOGICAL SYM, TRA, UNI +C .. Executable Statements .. +C + IERROR=0 + UNI = (DIAG.EQ.'U') + TRA = (TRANS.EQ.'T') + +C Symmetric matrix upper or lower + SYM = ((TRANS.EQ.'L').OR.(TRANS.EQ.'U')) +C + + IF (ALPHA.EQ.ZERO) THEN + IF (BETA.EQ.ZERO) THEN + DO I = 1, M + Y(I) = ZERO + ENDDO + ELSE + DO 20 I = 1, M + Y(I) = BETA*Y(I) + 20 CONTINUE + ENDIF + RETURN + END IF + + NNZ = INFOA(1) +C + IF (SYM) THEN + IF (UNI) THEN +C +C ......Symmetric with unitary diagonal....... +C ....OK!! +C To be optimized + + IF (BETA.NE.ZERO) THEN + DO I = 1, M +C +C Product for diagonal elements +C + Y(I) = BETA*Y(I) + ALPHA*X(I) + ENDDO + ELSE + DO I = 1, M + Y(I) = ALPHA*X(I) + ENDDO + ENDIF + +C Product for other elements + + + I = 1 + J = I + DO WHILE (I.LE.NNZ) + + DO WHILE ((IA(J).EQ.IA(I)).AND. + + (J.LE.NNZ)) + J = J+1 + ENDDO + + ACC = ZERO + IR = IA(I) + TX = X(IR) + DO K = I, J-1 + JC = JA(K) + ACC = ACC + AS(K)*X(JC) + Y(JC) = Y(JC) + ALPHA * AS(K)*TX + ENDDO + Y(IR) = Y(IR) + ALPHA * ACC + I = J + ENDDO +C + ELSE IF ( .NOT. UNI) THEN +C +C Check if matrix is lower or upper +C + IF (TRANS.EQ.'L') THEN +C +C LOWER CASE: diagonal element is the last element of row +C ....OK! + + IF (BETA.NE.ZERO) THEN + DO I = 1, M + Y(I) = BETA*Y(I) + ENDDO + ELSE + DO I = 1, M + Y(I) = ZERO + ENDDO + ENDIF + + I = 1 + J = I + DO WHILE (I.LE.NNZ) + DO WHILE ((IA(J).EQ.IA(I)).AND. + + (J.LE.NNZ)) + J = J+1 + ENDDO + ACC = ZERO + IR = IA(I) + TX = X(IR) + DO K = I, J-1 + JC = JA(K) + ACC = ACC + AS(K)*X(JC) + IF (IR.NE.JC) THEN + Y(JC) = Y(JC) + ALPHA * AS(K)*TX + ENDIF + ENDDO + Y(IR) = Y(IR) + ALPHA * ACC + I = J + ENDDO + + + ELSE ! ....Trans<>L +C +C UPPER CASE +C ....OK!! (Actually it's just the same as above!) +C + + IF (BETA.NE.ZERO) THEN + DO I = 1, M + Y(I) = BETA*Y(I) + ENDDO + ELSE + DO I = 1, M + Y(I) = ZERO + ENDDO + ENDIF + + I = 1 + J = I + DO WHILE (I.LE.NNZ) + DO WHILE ((IA(J).EQ.IA(I)).AND. + + (J.LE.NNZ)) + J = J+1 + ENDDO + ACC = ZERO + IR = IA(I) + TX = X(IR) + DO K = I, J-1 + JC = JA(K) + ACC = ACC + AS(K)*X(JC) + IF (IR.NE.JC) THEN + Y(JC) = Y(JC) + ALPHA * AS(K)*TX + ENDIF + ENDDO + Y(IR) = Y(IR) + ALPHA * ACC + I = J + ENDDO + + END IF ! ......TRANS=='L' + + END IF ! ......Not UNI +C + ELSE IF ( .NOT. TRA) THEN !......NOT SYM + + IF ( .NOT. UNI) THEN +C +C .......General Not Unit, No Traspose +C + IF (BETA.NE.ZERO) THEN + DO I = 1, M + Y(I) = BETA*Y(I) + ENDDO + ELSE + DO I = 1, M + Y(I) = ZERO + ENDDO + ENDIF + + I = 1 + J = I + IF (nnz > 0) then + IR = IA(1) + ACC = zero + DO + if (i>nnz) then + Y(IR) = Y(IR) + ALPHA * ACC + exit + endif + IF (IA(I) /= IR) THEN + Y(IR) = Y(IR) + ALPHA * ACC + IR = IA(I) + ACC = ZERO + ENDIF + ACC = ACC + AS(I) * X(JA(I)) + I = I + 1 + ENDDO + endif +C + ELSE IF (UNI) THEN +C + + IF (BETA.NE.ZERO) THEN + DO I = 1, M + Y(I) = BETA*Y(I)+ALPHA*X(I) + ENDDO + ELSE + DO I = 1, M + Y(I) = ALPHA*X(I) + ENDDO + ENDIF + + I = 1 + J = I + DO WHILE (I.LE.NNZ) + DO WHILE ((IA(J).EQ.IA(I)).AND. + + (J.LE.NNZ)) + J = J+1 + ENDDO + ACC = ZERO + IR = IA(I) + DO K = I, J-1 + JC = JA(K) + ACC = ACC + AS(K)*X(JC) + ENDDO + Y(IR) = Y(IR) + ALPHA * ACC + I = J + ENDDO + + END IF !....End Testing on UNI +C + ELSE IF (TRA) THEN !....Else on SYM (swapped M and N) +C + IF ( .NOT. UNI) THEN +C + IF (BETA.NE.ZERO) THEN + DO I = 1, M + Y(I) = BETA*Y(I) + ENDDO + ELSE + DO I = 1, M + Y(I) = ZERO + ENDDO + ENDIF +C + ELSE IF (UNI) THEN +C + + IF (BETA.NE.ZERO) THEN + DO I = 1, M + Y(I) = BETA*Y(I)+ALPHA*X(I) + ENDDO + ELSE + DO I = 1, M + Y(I) = ALPHA*X(I) + ENDDO + ENDIF +C + END IF !....UNI +C + IF (ALPHA.EQ.ONE) THEN +C + I = 1 + DO I=1,NNZ + IR = JA(I) + JC = IA(I) + Y(IR) = Y(IR) + AS(I)*X(JC) + ENDDO +C + ELSE IF (ALPHA.EQ.-ONE) THEN +C + + DO I=1,NNZ + IR = JA(I) + JC = IA(I) + Y(IR) = Y(IR) - AS(I)*X(JC) + ENDDO +C + ELSE !.....Else on TRA + + DO I=1,M + WORK(I) = ALPHA*X(I) + ENDDO + + DO I=1,NNZ + IR = JA(I) + JC = IA(I) + Y(IR) = Y(IR) + AS(I)*WORK(JC) + ENDDO + + END IF !.....End testing on ALPHA + + END IF !.....End testing on SYM +C + RETURN +C +C END OF DSRMV +C + END + diff --git a/src/serial/coo/dcoonrmi.f b/src/serial/coo/dcoonrmi.f new file mode 100644 index 00000000..223cbe63 --- /dev/null +++ b/src/serial/coo/dcoonrmi.f @@ -0,0 +1,39 @@ +C ... Compute Infinity norm for sparse matrix in CSR Format ... + DOUBLE PRECISION FUNCTION DCOONRMI(TRANS,M,N,DESCRA,A,IA1,IA2, + + INFOA,IERROR) + IMPLICIT NONE +C .. Scalar Arguments .. + INTEGER M,N, IERROR + CHARACTER TRANS +C .. Array Arguments .. + INTEGER IA1(*),IA2(*),INFOA(*) + CHARACTER DESCRA*11 + DOUBLE PRECISION A(*) +C .. Local scalars .. + INTEGER I, J, K, NNZ + DOUBLE PRECISION NRMI, SUM + + IERROR=0 + NRMI = 0.0 + NNZ = INFOA(1) + I = 1 + J = I + DO WHILE (I.LE.NNZ) + + DO WHILE ((IA1(J).EQ.IA1(I)).AND. + + (J.LE.NNZ)) + J = J+1 + ENDDO + + SUM = 0.0 + DO K = I, J-1 + SUM = SUM + ABS(A(K)) + ENDDO + IF (SUM.GT.NRMI) THEN + NRMI = SUM + ENDIF + I = J + ENDDO + + DCOONRMI = NRMI + END diff --git a/src/serial/coo/dcooprt.f b/src/serial/coo/dcooprt.f new file mode 100644 index 00000000..509991be --- /dev/null +++ b/src/serial/coo/dcooprt.f @@ -0,0 +1,50 @@ +c +c What if a wrong DESCRA is passed? +c +c + SUBROUTINE DCOOPRT(M,N,DESCRA,AR,IA,JA,INFOA,TITLE,IOUT) +C +C +C .. Scalar Arguments .. + INTEGER M, N, IOUT +C .. Array Arguments .. + DOUBLE PRECISION AR(*) + INTEGER IA(*), JA(*),INFOA(*) + CHARACTER DESCRA*11, TITLE*(*) +C .. Local Scalars .. + INTEGER J + + +C .. External Subroutines .. +C +C + if ((descra(1:1).eq.'g').or.(descra(1:1).eq.'G')) then + write(iout,fmt=998) + else if ((descra(1:1).eq.'s').or.(descra(1:1).eq.'S')) then + write(iout,fmt=997) + else + write(iout,fmt=998) + endif + nnzero = infoa(1) + write(iout,fmt=992) + write(iout,fmt=996) + write(iout,fmt=996) title + write(iout,fmt=995) 'Number of rows: ',m + write(iout,fmt=995) 'Number of columns: ',n + write(iout,fmt=995) 'Nonzero entries: ',nnzero + write(iout,fmt=996) + write(iout,fmt=992) + write(iout,*) m,n,nnzero + 998 format('%%MatrixMarket matrix coordinate real general') + 997 format('%%MatrixMarket matrix coordinate real symmetric') + 992 format('%======================================== ') + 996 format('% ',a) + 995 format('% ',a,i9,a,i9,a,i9) + do j=1,nnzero + write(iout,fmt=994) ia(j),ja(j),ar(j) + 994 format(i6,1x,i6,1x,e16.8) + enddo + + + RETURN + END diff --git a/src/serial/coo/dcoosm.f b/src/serial/coo/dcoosm.f new file mode 100644 index 00000000..927f5682 --- /dev/null +++ b/src/serial/coo/dcoosm.f @@ -0,0 +1,82 @@ + SUBROUTINE DCOOSM(TRANST,M,N,UNITD,D,ALPHA,DESCRA,A,IA,JA,INFOA, + * B,LDB,BETA,C,LDC,WORK,LWORK,IERROR) + IMPLICIT NONE + LOGICAL DEBUG + PARAMETER (DEBUG=.FALSE.) + DOUBLE PRECISION ALPHA, BETA + INTEGER LDB, LDC, LWORK, M, N, IERROR + CHARACTER UNITD, TRANST + DOUBLE PRECISION A(*), B(LDB,*), C(LDC,*), D(*), WORK(*) + INTEGER IA(*), JA(*), INFOA(*), INT_VAL(5) + CHARACTER DESCRA*11 + INTEGER I, K, ERR_ACT + CHARACTER DIAG, UPLO + EXTERNAL XERBLA + INTRINSIC DBLE, IDINT + CHARACTER*20 NAME + + NAME = 'DCOOSM\0' + IERROR = 0 + CALL FCPSB_ERRACTIONSAVE(ERR_ACT) + + IF((ALPHA.NE.1.D0) .OR. (BETA.NE.0.D0))then + IERROR=5 + CALL PSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + if (debug) write(*,*) 'DCOOSM ',m + if (debug) write(*,*) 'DCOOSM ',m,ierror + + UPLO = '?' + IF (DESCRA(1:1).EQ.'T' .AND. DESCRA(2:2).EQ.'U') UPLO = 'U' + IF (DESCRA(1:1).EQ.'T' .AND. DESCRA(2:2).EQ.'L') UPLO = 'L' + IF (UPLO.EQ.'?') THEN + IERROR=5 + CALL PSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + END IF + IF (DESCRA(3:3).EQ.'N') DIAG = 'N' + IF (DESCRA(3:3).EQ.'U') DIAG = 'U' + IF(UNITD.EQ.'B') THEN + IERROR=5 + CALL PSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + IF (UNITD.EQ.'R') THEN + DO 40 I = 1, N + DO 20 K = 1, M + B(K,I) = B(K,I)*D(K) + 20 CONTINUE + 40 CONTINUE + END IF + + DO 60 I = 1, N + CALL DCOOSV(UPLO,TRANST,DIAG,M,A,IA,JA,INFOA, + + B(1,I),C(1,I),IERROR) + 60 CONTINUE + IF(IERROR.NE.0) THEN + INT_VAL(1)=IERROR + CALL FCPSB_ERRPUSH(4012,NAME,INT_VAL) + GOTO 9999 + END IF + + IF (UNITD.EQ.'L') THEN + DO 45 I = 1, N + DO 25 K = 1, M + C(K,I) = C(K,I)*D(K) + 25 CONTINUE + 45 CONTINUE + END IF + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + RETURN + + 9999 CONTINUE + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + + IF ( ERR_ACT .NE. 0 ) THEN + CALL FCPSB_SERROR() + RETURN + ENDIF + + RETURN + END diff --git a/src/serial/coo/dcoosv.f b/src/serial/coo/dcoosv.f new file mode 100644 index 00000000..b8b345f4 --- /dev/null +++ b/src/serial/coo/dcoosv.f @@ -0,0 +1,186 @@ +C +C Assumption: the triangular matrix has the diagonal element in the +C "right" place, i.e. the last in its row for Lower and the first +C for Upper. +C + SUBROUTINE DCOOSV (UPLO,TRANS,DIAG,N,AS,IA,JA,INFOA,B,X,IERROR) + DOUBLE PRECISION ZERO + PARAMETER (ZERO=0.0D0) + LOGICAL DEBUG + PARAMETER (DEBUG=.FALSE.) + INTEGER N,IERROR + CHARACTER DIAG, TRANS, UPLO + DOUBLE PRECISION AS(*), B(*), X(*) + INTEGER IA(*), JA(*),INFOA(*) + DOUBLE PRECISION ACC + INTEGER I, J, K, NNZ, II + LOGICAL LOW, TRA, UNI + if (debug) write(*,*) 'DCOOSV ',n + if (debug) write(*,*) 'DCOOSV ',n,nnz,diag,trans,uplo + UNI = (DIAG.EQ.'U') + TRA = (TRANS.EQ.'T') + LOW = (UPLO.EQ.'L') + NNZ = INFOA(1) + if (debug) write(*,*) 'DCOOSV ',n,nnz,uni,tra,low,ia(1),ja(1) + IERROR = 0 + if (debug) write(*,*) 'DCOOSV ierror ',ierror + IF ( .NOT. TRA) THEN + if (debug) write(*,*) 'DCOOSV NOT TRA' + IF (LOW) THEN + if (debug) write(*,*) 'DCOOSV LOW' + IF ( .NOT. UNI) THEN + if (debug) write(*,*) 'DCOOSV NOT UNI' + I = 1 + J = I + DO WHILE (I.LE.NNZ) + DO WHILE ((J.LE.NNZ).AND.(IA(J).EQ.IA(I))) + J = J+1 + ENDDO + ACC = ZERO + IR = IA(I) + DO K = I, J-2 + JC = JA(K) + ACC = ACC + AS(K)*X(JC) + ENDDO + X(IR) = (B(IR)-ACC)/AS(J-1) + I = J + ENDDO + + ELSE IF (UNI) THEN +C +C Bug warning: if UNI, some rows might be empty +C + I = 1 + if (debug) write(*,*) 'DCOOSV UNILOW ', + + i,n,nnz,uni,tra,low + DO II = 1, N + if (debug) write(*,*) 'Loop1 COOSV',i,j,ii,x(ii),b(ii) + DO WHILE ((I.LE.NNZ).AND.(IA(I).LT.II)) + I = I + 1 +c$$$ if (debug) write(*,*) 'Loop2 COOSV',i,ia(i),ii + ENDDO + ACC = ZERO + IF ((I.LE.NNZ).AND.(IA(I).EQ.II)) THEN + J = I + 1 + DO WHILE ((J.LE.NNZ).AND.(IA(J).EQ.IA(I))) + J = J+1 + ENDDO + DO K = I, J-1 + JC = JA(K) + ACC = ACC + AS(K)*X(JC) + ENDDO + ELSE + J = I + ENDIF + X(II) = (B(II)-ACC) + if (debug) write(*,*) 'Loop COOSV',i,j,ii,x(ii),b(ii) + I = J + ENDDO + + END IF + + ELSE IF ( .NOT. LOW) THEN + if (debug) write(*,*) 'DCOOSV NOT LOW' + IF ( .NOT. UNI) THEN + if (debug) write(*,*) 'DCOOSV NOT UNI' + I = NNZ + J = NNZ + DO WHILE (I.GT.0) + DO WHILE ((J.GT.0).AND.(IA(J).EQ.IA(I))) + J = J-1 + ENDDO + ACC = ZERO + IR = IA(I) + DO K = I, J+2,-1 + JC = JA(K) + ACC = ACC + AS(K)*X(JC) + ENDDO + X(IR) = (B(IR)-ACC)/AS(J+1) + I = J + ENDDO + + ELSE IF (UNI) THEN + if (debug) write(*,*) 'DCOOSV UNI' + I = NNZ + DO II = N,1,-1 + DO WHILE ((I.GT.0).AND.(IA(I).GT.II)) + I = I -1 + ENDDO + ACC = ZERO + IF ((I.GT.0).AND.(IA(I).EQ.II)) THEN + J = I - 1 + DO WHILE ((J.GT.0).AND.(IA(J).EQ.IA(I))) + J = J-1 + ENDDO + DO K = I, J+1, -1 + JC = JA(K) + ACC = ACC + AS(K)*X(JC) + ENDDO + ELSE + J = I + ENDIF + X(II) = (B(II)-ACC) + if (debug) write(*,*) 'Loop COOSV',i,j,ii,x(ii),b(ii) + I = J + ENDDO + + END IF + + END IF + + ELSE IF (TRA) THEN + IERROR = 3010 + return +CCCCCCCCCCCCCCCC +C +C TBF +C +CCCCCCCCCCCCCCCC + DO 180 I = 1, N + X(I) = B(I) + 180 CONTINUE + IF (LOW) THEN + IF ( .NOT. UNI) THEN + DO 220 I = N, 1, -1 + X(I) = X(I)/AS(IA(I+1)-1) + ACC = X(I) + DO 200 J = IA(I), IA(I+1) - 2 + K = JA(J) + X(K) = X(K) - AS(J)*ACC + 200 CONTINUE + 220 CONTINUE + ELSE IF (UNI) THEN + DO 260 I = N, 1, -1 + ACC = X(I) + DO 240 J = IA(I), IA(I+1) - 1 + K = JA(J) + X(K) = X(K) - AS(J)*ACC + 240 CONTINUE + 260 CONTINUE + END IF + ELSE IF ( .NOT. LOW) THEN + IF ( .NOT. UNI) THEN + DO 300 I = 1, N + X(I) = X(I)/AS(IA(I)) + ACC = X(I) + DO 280 J = IA(I) + 1, IA(I+1) - 1 + K = JA(J) + X(K) = X(K) - AS(J)*ACC + 280 CONTINUE + 300 CONTINUE + ELSE IF (UNI) THEN + DO 340 I = 1, N + ACC = X(I) + DO 320 J = IA(I), IA(I+1) - 1 + K = JA(J) + X(K) = X(K) - AS(J)*ACC + 320 CONTINUE + 340 CONTINUE + END IF + END IF + END IF + RETURN + END + + + diff --git a/src/serial/coo/dcoozero.f b/src/serial/coo/dcoozero.f new file mode 100644 index 00000000..33512867 --- /dev/null +++ b/src/serial/coo/dcoozero.f @@ -0,0 +1,46 @@ + SUBROUTINE DCOOZERO(M,N,DESCRA,A,IA1,IA2, + + INFOA,IA,JA,MZ,NZ,IERROR) +C +C This subroutione performs the operation: +C +C A(IA : IA + MZ - 1, JA : JA + NZ - 1) = 0 +C +C This isn't accomplished by removing elements +C from sparse matrix representation, but assigning them +C the zero value. +C Columns are supposed to be ordered +C into the same row. This subroutine will +C not work properly otherwise. +C + IMPLICIT NONE +C .. Scalar Arguments .. + INTEGER M,N,IA,JA,MZ,NZ,IERROR +C .. Array Arguments .. + INTEGER IA1(*),IA2(*),INFOA(*) + CHARACTER DESCRA*11 + DOUBLE PRECISION A(*) +C .. Local scalars .. + INTEGER I, J, JBEGIN, JEND, AUX, NNZ + DOUBLE PRECISION + + IERROR=0 + IF (((JA + NZ - 1) .GT. N) .OR. + + ((IA + MZ - 1) .GT. M) .OR. + + (IA .LT. 1) .OR. (JA .LT. 1)) THEN + IERROR = 1 + GOTO 9999 + ENDIF + NNZ = INFOA(1) + I = 1 + DO WHILE ((IA1(I).LT.IA).AND.(I.LE.NNZ)) + I = I + 1 + ENDDO + DO WHILE ((IA1(I).LE.(IA+MZ-1)).AND.(I.LE.NNZ)) + IF ((JA.LE.IA2(I)).AND.(IA2(I).LE.(JA+NZ-1))) THEN + A(I) = 0.0D0 + ENDIF + I = I + 1 + ENDDO + + RETURN + END diff --git a/src/serial/coo/dcrupdate.f b/src/serial/coo/dcrupdate.f new file mode 100644 index 00000000..bc337be0 --- /dev/null +++ b/src/serial/coo/dcrupdate.f @@ -0,0 +1,104 @@ + SUBROUTINE DCRUPDATE(M, N, DESCRA, A, IA1, + + IA2, INFOA, IA, JA, DESCRH, H, IH1, IH2, + + INFOH, IH, JH, FLAG, GLOB_TO_LOC, + + IWORK, LIWORK, IERROR) +C +C .. Matrix A to be updated is required to be stored with +C .. column indices belonging to the same row ordered. +C .. Block H to be inserted don't need to be stored in such way. +C +C Flag = 0: put elements to 0.0D0; +C Flag = 1: replace elements with new value; +C Flag = 2: sum block value to elements; +C + IMPLICIT NONE +C .. Scalar Arguments .. + INTEGER IA, JA, IH, JH, M, N, + + IERROR, FLAG, LIWORK +C .. Array Arguments .. + INTEGER IA1(*),IA2(*),IH1(*),IH2(*), + + INFOA(*),INFOH(*),IWORK(*), + + GLOB_TO_LOC(*) + CHARACTER DESCRA*11,DESCRH*11 + DOUBLE PRECISION A(*),H(*) +C .. Local scalars .. + INTEGER I, J, K, XBLCK, XMATR, + + AUX, AUX1, AUX2, AUX3, + + LOC_COLUMN, LOC_POINTER, SHIFT1, + + SHIFT2 +C .. Local arrays .. + IERROR = 0 + + DO I = 1, M + XBLCK = IH + I - 1 + XMATR = IA + I - 1 + SHIFT1 = IA2(XMATR + 1) - IA2(XMATR) + SHIFT2 = 2 * SHIFT1 +C If columns are already sorted, return point is 100 +C CALL MRGSRT(IA2(XMATR + 1) - IA2(XMATR), +C + IA1(IA2(XMATR)), +C + IWORK(SHIFT2 + 1), +C + *100) + GOTO 100 + K = IWORK(SHIFT2 + 1) +C If columns have been sorted by mrgsrt + DO J = 1, IA2(XMATR + 1) - IA2(XMATR) + IWORK(J) = IA1(IA2(XMATR) - 1 + K) + IWORK(SHIFT1 + J) = IA2(XMATR) - 1 + K + K = IWORK(SHIFT2 + 1 + K) + ENDDO + GOTO 101 +C Else + 100 CONTINUE + DO J = IA2(XMATR), IA2(XMATR + 1) - 1 + AUX = J - IA2(XMATR) + 1 + IWORK(AUX) = IA1(J) + IWORK(SHIFT1 + AUX) = J + ENDDO +C End If +C Now IWORK(1: .. ) contains ordered column indices +C and IWORK(SHIFT1 + 1: .. ) contains position of those +C indices in the stored matrix data structures. + 101 CONTINUE + DO J = IH2(XBLCK), IH2(XBLCK + 1) - 1 + IF ((JH .LE. IH1(J)) .AND. + + (IH1(J) .LE. (JH + N - 1))) THEN + LOC_COLUMN = GLOB_TO_LOC(JA - JH + IH1(J)) +C Binary search + AUX1 = 1 + AUX2 = IA2(XMATR + 1) - IA2(XMATR) + DO K = 1, IA2(XMATR + 1) - IA2(XMATR) + AUX = (AUX1 + AUX2) / 2 + IF (LOC_COLUMN .GT. IWORK(AUX)) THEN + AUX1 = AUX + 1 + ELSE + AUX2 = AUX - 1 + ENDIF + IF ((LOC_COLUMN .EQ. IWORK(AUX)) .OR. + + (AUX1 .GT. AUX2)) + + EXIT + ENDDO + IF (LOC_COLUMN .EQ. IWORK(AUX)) THEN + LOC_POINTER = IWORK(SHIFT1 + AUX) + ELSE + IERROR = 1 + GOTO 9999 + ENDIF + IF (FLAG .EQ. 0) THEN + A(LOC_POINTER) = 0.0D0 + ELSE IF (FLAG .EQ. 1) THEN + A(LOC_POINTER) = H(J) + ELSE IF (FLAG .EQ. 2) THEN + A(LOC_POINTER) = A(LOC_POINTER) + H(J) + ELSE + IERROR = 1 + ENDIF + ENDIF + ENDDO + ENDDO + 9999 RETURN + END + + + + diff --git a/src/serial/csr/Makefile b/src/serial/csr/Makefile new file mode 100644 index 00000000..ba89924a --- /dev/null +++ b/src/serial/csr/Makefile @@ -0,0 +1,43 @@ +include ../../../../Make.inc + +# +# The object files +# + +FOBJS = dcsrck.o dcsrmm.o dcsrsm.o dcsrmv.o dcsrsv.o dcrnrmi.o \ + dcrcrupd.o dcocrupd.o dcsrprt.o dcsrmv4.o dcsrmv2.o dcsrmv3.o\ + zcsrck.o zcrnrmi.o zcsrmm.o zsrmv.o zcsrsm.o zsrsv.o \ + zcrcrupd.o zcocrupd.o zcsrprt.o + +OBJS=$(FOBJS) + +# +# Where the library should go, and how it is called. +# Note that we are regenerating most of libsparker.a on the fly. +#LIBDIR=../../LIB +#LIBNAME=libsparker.a +LIBFILE=$(LIBDIR)/$(LIBNAME) +SPARKERDIR=.. +INCDIRS=-I. -I$(SPARKERDIR) -I$(LIBDIR) + +# +# No change should be needed below +# + + +default: lib + +lib: $(OBJS) + $(AR) $(LIBFILE) $(OBJS) + $(RANLIB) $(LIBFILE) + +$(OBJS): $(SPARKERDIR)/sparker.fh + + +clean: cleanobjs + +veryclean: cleanobjs + +cleanobjs: + /bin/rm -f $(OBJS) + diff --git a/src/serial/csr/dcocrupd.f b/src/serial/csr/dcocrupd.f new file mode 100644 index 00000000..0851f4fb --- /dev/null +++ b/src/serial/csr/dcocrupd.f @@ -0,0 +1,51 @@ + SUBROUTINE DCOCRUPD(M, N, DESCRA, A, IA1, + + IA2, INFOA, IA, JA, DESCRH, H, IH1, IH2, + + INFOH, IH, JH, FLAG, GLOB_TO_LOC, + + IWORK, LIWORK, IERROR) +C +C .. Matrix A to be updated is required to be stored with +C .. column indices belonging to the same row ordered. +C .. Block H to be inserted don't need to be stored in such a way. +C +C Flag = 0: put elements to 0.0D0; +C Flag = 1: replace elements with new value; +C Flag = 2: sum block value to elements; +C + IMPLICIT NONE + include 'sparker.fh' +C .. Scalar Arguments .. + INTEGER IA, JA, IH, JH, M, N, + + IERROR, FLAG, LIWORK +C .. Array Arguments .. + INTEGER IA1(*),IA2(*),IH1(*),IH2(*), + + INFOA(*),INFOH(*),IWORK(*), + + GLOB_TO_LOC(*) + CHARACTER DESCRA*11,DESCRH*11 + DOUBLE PRECISION A(*),H(*) +C .. Local scalars .. + INTEGER J, NNZ, IP1, NNZI +C .. Local arrays .. + IERROR = 0 +c$$$ write(0,*) 'dcocrupd ',infoa(upd_),ibits(infoa(upd_),2,1) + IF (IBITS(INFOA(UPD_),2,1).EQ.1) THEN +C +C Smart update capability +C + IP1 = INFOA(UPD_PNT_) + NNZ = IA2(IP1+NNZ_) + NNZI = INFOH(1) + DO J = 1, NNZI + NNZ = NNZ + 1 + A(NNZ) = H(J) + ENDDO + IA2(IP1+NNZ_) = NNZ + ELSE + IERROR = 2 + ENDIF + 9999 CONTINUE + RETURN + END + + + + diff --git a/src/serial/csr/dcrcrupd.f b/src/serial/csr/dcrcrupd.f new file mode 100644 index 00000000..9341f4d6 --- /dev/null +++ b/src/serial/csr/dcrcrupd.f @@ -0,0 +1,152 @@ + SUBROUTINE DCRCRUPD(M, N, DESCRA, A, IA1, + + IA2, INFOA, IA, JA, DESCRH, H, IH1, IH2, + + INFOH, IH, JH, FLAG, GLOB_TO_LOC, + + IWORK, LIWORK, IERROR) +C +C .. Matrix A to be updated is required to be stored with +C .. column indices belonging to the same row ordered. +C .. Block H to be inserted don't need to be stored in such a way. +C +C Flag = 0: put elements to 0.0D0; +C Flag = 1: replace elements with new value; +C Flag = 2: sum block value to elements; +C + IMPLICIT NONE + include 'sparker.fh' +C .. Scalar Arguments .. + INTEGER IA, JA, IH, JH, M, N, + + IERROR, FLAG, LIWORK +C .. Array Arguments .. + INTEGER IA1(*),IA2(*),IH1(*),IH2(*), + + INFOA(*),INFOH(*),IWORK(*), + + GLOB_TO_LOC(*) + CHARACTER DESCRA*11,DESCRH*11 + DOUBLE PRECISION A(*),H(*) +C .. Local scalars .. + INTEGER I, J, XBLCK, XMATR, + + NRC, IPH, JPH, JPA, LPA, IRET, LNK, NNZ, IP1 +C .. Local arrays .. + IERROR = 0 +c$$$ write(0,*) 'dcrcrupd ',infoa(upd_),ibits(infoa(upd_),2,1) + IF (IBITS(INFOA(UPD_),2,1).EQ.1) THEN +C +C Smart update capability +C + IP1 = INFOA(UPD_PNT_) + NNZ = IA2(IP1+NNZ_) + DO I = 1, M + XBLCK = IH + I - 1 + DO J = IH2(XBLCK),IH2(XBLCK+1) - 1 + NNZ = NNZ + 1 + A(NNZ) = H(J) + ENDDO + ENDDO + IA2(IP1+NNZ_) = NNZ + ELSE + IF (FLAG.EQ.0) THEN + DO I = 1, M + XBLCK = IH + I - 1 + XMATR = IA + I - 1 + NRC = IH2(XBLCK+1) - IH2(XBLCK) + IPH = IH2(XBLCK) + IF (LIWORK.LT.2*NRC+2) THEN + IERROR = 10 + RETURN + ENDIF + DO J = 1, NRC + IWORK(J) = GLOB_TO_LOC(JA - JH + IH1(IPH+J-1)) + ENDDO + CALL MRGSRT(NRC,IWORK(1),IWORK(NRC+1),IRET) + + JPA = IA2(XMATR) + LPA = IA2(XMATR+1) + LNK = IWORK(NRC+1) + DO J = 1, NRC + JPH = IWORK(LNK) + DO WHILE ((IA1(JPA).NE.JPH).AND.(JPA.LT.LPA)) + JPA = JPA + 1 + ENDDO + IF (IA1(JPA).EQ.JPH) THEN + A(JPA) = 0.0D0 + LNK = IWORK(NRC+1+LNK) + ELSE + IERROR = 1 + GOTO 9999 + ENDIF + enddo + ENDDO + ELSE IF (FLAG.EQ.1) THEN + DO I = 1, M + XBLCK = IH + I - 1 + XMATR = IA + I - 1 + NRC = IH2(XBLCK+1) - IH2(XBLCK) + IPH = IH2(XBLCK) + IF (LIWORK.LT.2*NRC+2) THEN + IERROR = 10 + RETURN + ENDIF + DO J = 1, NRC + IWORK(J) = GLOB_TO_LOC(JA - JH + IH1(IPH+J-1)) + ENDDO + CALL MRGSRT(NRC,IWORK(1),IWORK(NRC+1),IRET) + + JPA = IA2(XMATR) + LPA = IA2(XMATR+1) + LNK = IWORK(NRC+1) + DO J = 1, NRC + JPH = IWORK(LNK) + DO WHILE((IA1(JPA).NE.JPH).AND.(JPA.LT.LPA)) + JPA = JPA + 1 + ENDDO + IF (IA1(JPA).EQ.JPH) THEN + A(JPA) = H(IPH+LNK-1) + LNK = IWORK(NRC+1+LNK) + ELSE + IERROR = 1 + GOTO 9999 + ENDIF + ENDDO + enddo + ELSE IF (FLAG.EQ.2) THEN + DO I = 1, M + XBLCK = IH + I - 1 + XMATR = IA + I - 1 + NRC = IH2(XBLCK+1) - IH2(XBLCK) + IPH = IH2(XBLCK) + IF (LIWORK.LT.2*NRC+2) THEN + IERROR = 10 + RETURN + ENDIF + DO J = 1, NRC + IWORK(J) = GLOB_TO_LOC(JA - JH + IH1(IPH+J-1)) + ENDDO + CALL MRGSRT(NRC,IWORK(1),IWORK(NRC+1),IRET) + + JPA = IA2(XMATR) + LPA = IA2(XMATR+1) + LNK = IWORK(NRC+1) + DO J = 1, NRC + JPH = IWORK(LNK) + DO WHILE((IA1(JPA).NE.JPH).AND.(JPA.LT.LPA)) + JPA = JPA + 1 + ENDDO + IF (IA1(JPA).EQ.JPH) THEN + A(JPA) = A(JPA) + H(IPH+LNK-1) + LNK = IWORK(NRC+1+LNK) + ELSE + IERROR = 1 + GOTO 9999 + ENDIF + ENDDO + enddo + ELSE + IERROR = 2 + ENDIF + ENDIF + 9999 CONTINUE + RETURN + END + + + + diff --git a/src/serial/csr/dcrnrmi.f b/src/serial/csr/dcrnrmi.f new file mode 100644 index 00000000..9ff1b0c0 --- /dev/null +++ b/src/serial/csr/dcrnrmi.f @@ -0,0 +1,30 @@ +C ... Compute Infinity norm for sparse matrix in CSR Format ... + DOUBLE PRECISION FUNCTION DCRNRMI(TRANS,M,N,DESCRA,A,IA1,IA2, + + INFOA,IERROR) + IMPLICIT NONE +C .. Scalar Arguments .. + INTEGER M,N, IERROR + CHARACTER TRANS +C .. Array Arguments .. + INTEGER IA1(*),IA2(*),INFOA(*) + CHARACTER DESCRA*11 + DOUBLE PRECISION A(*) +C .. Local scalars .. + INTEGER I, J + DOUBLE PRECISION NRMI, SUM + + IERROR=0 + NRMI = 0.0 + DO I = 1, M + SUM = 0.0 + DO J = IA2(I), IA2(I+1)-1 + SUM = SUM + ABS(A(J)) + ENDDO + + IF (SUM.GT.NRMI) THEN + NRMI = SUM + ENDIF + ENDDO + + DCRNRMI = NRMI + END diff --git a/src/serial/csr/dcrzero.f b/src/serial/csr/dcrzero.f new file mode 100644 index 00000000..a2c700ab --- /dev/null +++ b/src/serial/csr/dcrzero.f @@ -0,0 +1,55 @@ + SUBROUTINE DCRZERO(M,N,DESCRA,A,IA1,IA2, + + INFOA,IA,JA,MZ,NZ,IERROR) +C +C This subroutione performs the operation: +C +C A(IA : IA + MZ - 1, JA : JA + NZ - 1) = 0 +C +C This isn't accomplished by removing elements +C from sparse matrix representation, but assigning them +C the zero value. +C Columns are supposed to be ordered +C into the same row. This subroutine will +C not work properly if this hypotesis is not +C verified. +C + IMPLICIT NONE +C .. Scalar Arguments .. + INTEGER M,N,IA,JA,MZ,NZ,IERROR +C .. Array Arguments .. + INTEGER IA1(*),IA2(*),INFOA(*) + CHARACTER DESCRA*11 + DOUBLE PRECISION A(*) +C .. Local scalars .. + INTEGER I, J, JBEGIN, JEND, AUX + DOUBLE PRECISION + + IERROR=0 + IF (((JA + NZ - 1) .GT. N) .OR. + + ((IA + MZ - 1) .GT. M) .OR. + + (IA .LT. 1) .OR. (JA .LT. 1)) THEN + IERROR = 1 + GOTO 9999 + ENDIF + DO I = IA, IA + M - 1 +C Scan current line until found first element + DO JBEGIN = IA2(I), IA2(I + 1) - 1 + IF (IA1(JBEGIN) .GE. JA) EXIT + ENDDO +C If reached last column end not yet +C encountered proper column, skip this row + IF ((JBEGIN .EQ. IA2(I + 1) - 1) .AND. + + (IA1(JBEGIN) .LT. JA)) CYCLE +C Now I'm sure there's at least one element +C to process: scan until found last element + AUX = JA + N - 1 + DO JEND = JBEGIN, IA2(I + 1) - 1 + IF (IA1(JEND) .GE. AUX) EXIT + ENDDO + IF (IA1(JEND) .GT. AUX) JEND = JEND - 1 + DO J = JBEGIN, JEND + A(J) = 0.0D0 + ENDDO + ENDDO + 9999 RETURN + END diff --git a/src/serial/csr/dcsrck.f b/src/serial/csr/dcsrck.f new file mode 100644 index 00000000..04df3924 --- /dev/null +++ b/src/serial/csr/dcsrck.f @@ -0,0 +1,131 @@ +C +C Purpose +C ======= +C +C Performing checks on sparse matrix. +C +C Parameters +C ========== +C +C TRANS - CHARACTER*1 +C On entry TRANS specifies whether the routine will use +C matrix P or the transpose of P for the permutation as follows: +C TRANS = 'N' -> permute with matrix P +C TRANS = 'T' or 'C' -> permute the transpose of P +C Unchanged on exit. +C +C M - INTEGER +C On entry: number of rows of matrix A. +C Unchanged on exit. +C +C N - INTEGER +C On entry: number of columns of matrix A. +C Unchanged on exit. +C +C DESCRA - CHARACTER*5 array of DIMENSION (10) +C On entry DESCRA defines the format of the input sparse matrix. +C Unchanged on exit. +C +C A - DOUBLE PRECISION array of DIMENSION (*) +C On entry A specifies the values of the input sparse +C matrix. +C Unchanged on exit. +C +C IA1 - INTEGER array of dimension (*) +C On entry IA1 holds integer information on input sparse +C matrix. Actual information will depend on data format used. +C Unchanged on exit. +C +C IA2 - INTEGER array of dimension (*) +C On entry IA2 holds integer information on input sparse +C matrix. Actual information will depend on data format used. +C Unchanged on exit. +C +C WORK - DOUBLE PRECISION array of dimension (LWORK) +C On entry: work area. +C On exit INT(WORK(1)) contains the minimum value +C for LWORK satisfying DSPRP memory requirements. +C +C LWORK - INTEGER +C On entry LWORK specifies the dimension of WORK +C Unchanged on exit. +C +C IERROR - INTEGER +C On exit specify the error code. +C IERROR = 0 no errors +C IERROR > 0 error in integrity check + + + SUBROUTINE DCSRCK(TRANS,M,N,DESCRA,A,IA1,IA2, + + WORK,LWORK,IERROR) + IMPLICIT NONE +C .. Scalar Arguments .. + INTEGER LWORK,M, N, IERROR + CHARACTER TRANS +C .. Array Arguments .. + DOUBLE PRECISION A(*), WORK(*) + INTEGER IA1(*), IA2(*) + CHARACTER DESCRA*11 +C .. Local Scalars .. + INTEGER I, J, nrow, nind +C .. External Subroutines .. + EXTERNAL XERBLA +C +C .. Executable Statements .. +C + +C +C Check #1: Character descriptor have valid values +C + IERROR = 0 + + IF ((DESCRA(1:1).NE.'G').AND.(DESCRA(1:1).NE.'S').AND. + & (DESCRA(1:1).NE.'H').AND.(DESCRA(1:1).NE.'T').AND. + & (DESCRA(1:1).NE.'A').AND.(DESCRA(1:1).NE.'D')) THEN + IERROR = 11 + GOTO 9999 + END IF + IF ((DESCRA(2:2).NE.'U').AND.(DESCRA(2:2).NE.'L')) THEN + IERROR = 12 + GOTO 9999 + END IF + IF ((DESCRA(3:3).NE.'U').AND.(DESCRA(3:3).NE.'N')) THEN + IERROR = 13 + GOTO 9999 + END IF +C +C Check #2: Pointers have non decreasing order +C + IF (IA2(1).LE.0) THEN + IERROR = 14 + GOTO 9999 + ENDIF + + NROW = 0 + DO 10 I = 1, M + IF (IA2(I) .GT. IA2(I+1)) THEN + NROW = NROW + 1 + END IF + 10 CONTINUE + IF (NROW .GT. 0) THEN + IERROR = 15 + GOTO 9999 + END IF +C +C Check #3: Indices are within problem dimension +C + NIND = 0 + DO 20 I = 1, M + DO 30 J = IA2(I), IA2(I+1) - 1 + IF ((IA1(J).LT.0) .OR. (IA1(J).GT.N)) THEN + NIND = NIND + 1 + END IF + 30 CONTINUE + 20 CONTINUE + IF (NIND .GT. 0) THEN + IERROR = 16 + GOTO 9999 + END IF + 9999 CONTINUE + RETURN + END diff --git a/src/serial/csr/dcsrmm.f b/src/serial/csr/dcsrmm.f new file mode 100644 index 00000000..ba94a3a0 --- /dev/null +++ b/src/serial/csr/dcsrmm.f @@ -0,0 +1,114 @@ +c +c What if a wrong DESCRA is passed? +c +c +* +* + SUBROUTINE DCSRMM(TRANSA,M,K,N,ALPHA,DESCRA,AR, + * JA,IA,B,LDB,BETA,C,LDC,WORK,LWORK,IERROR) +C +C +C .. Scalar Arguments .. + DOUBLE PRECISION ALPHA, BETA + INTEGER K, LDB, LDC, M, N, LWORK,IERROR + CHARACTER TRANSA +C .. Array Arguments .. + DOUBLE PRECISION AR(*), B(LDB,*), C(LDC,*), WORK(*) + INTEGER IA(*), JA(*) + CHARACTER DESCRA*11 +C .. Local Scalars .. + INTEGER I, J, K4 + CHARACTER DIAG, TRANS +c .. Local Arrays .. + CHARACTER*20 NAME + INTEGER INT_VAL(5) + +C .. External Subroutines .. + EXTERNAL DCSRMV +C .. Executable Statements .. +C +C + NAME = 'DCSRMM\0' + IERROR = 0 + CALL FCPSB_ERRACTIONSAVE(ERR_ACT) +C + IF (DESCRA(1:1).EQ.'G') TRANS = TRANSA + IF (DESCRA(1:1).EQ.'S' .AND. DESCRA(2:2).EQ.'U') TRANS = 'U' + IF (DESCRA(1:1).EQ.'S' .AND. DESCRA(2:2).EQ.'L') TRANS = 'L' +c +c Does DSRMV manage this case too? = 0 * +* DESCRIPTION: Number of rows of the matrix op(A). * +* * +* SYMBOLIC NAME: N * +* POSITION: PARAMETER NO 4. * +* ATTRIBUTES: INTEGER*4. * +* VALUES: N >= 0 * +* DESCRIPTION: Number of columns of the matrix op(A) * +* * +* SYMBOLIC NAME: ALPHA * +* POSITION: PARAMETER NO 5. * +* ATTRIBUTES: REAL*8. * +* VALUES: any. * +* DESCRIPTION: Specifies the scalar alpha. * +* * +* * +* SYMBOLIC NAME: AS * +* POSITION: PARAMETER NO 6. * +* ATTRIBUTES: REAL*8: ARRAY(IA(M+1)-1) * +* VALUES: ANY * +* DESCRIPTION: Array containing the non zero coefficients of * +* the sparse matrix op(A). * +* * +* SYMBOLIC NAME: JA * +* POSITION: PARAMETER NO 7. * +* ATTRIBUTES: INTEGER*4: ARRAY(IA(M+1)-1) * +* VALUES: 0 < JA(I) <= M * +* DESCRIPTION: Array containing the column number of the * +* nonzero coefficients stored in array AS. * +* * +* SYMBOLIC NAME: IA * +* POSITION: PARAMETER NO 8. * +* ATTRIBUTES: INTEGER*4: ARRAY(*) * +* VALUES: IA(I) > 0 * +* DESCRIPTION: Contains the pointers for the beginning of * +* each rows. * +* * +* * +* SYMBOLIC NAME: X * +* POSITION: PARAMETER NO 9. * +* ATTRIBUTES: REAL*8 ARRAY(N) (or ARRAY(M) when op(A) = A') * +* VALUES: any. * +* DESCRIPTION: Contains the values of the vector to be * +* multiplied by the matrix A. * +* * +* SYMBOLIC NAME: BETA * +* POSITION: PARAMETER NO 10. * +* ATTRIBUTES: REAL*8. * +* VALUES: any. * +* DESCRIPTION: Specifies the scalar beta. * +* * +* SYMBOLIC NAME: Y * +* POSITION: PARAMETER NO 11. * +* ATTRIBUTES: REAL*8 ARRAY(M) (or ARRAY(N) when op(A) = A') * +* VALUES: any. * +* DESCRIPTION: Contains the values of the vector to be * +* updated by the matrix-vector multiplication. * +* * +* SYMBOLIC NAME: WORK * +* POSITION: PARAMETER NO 12. * +* ATTRIBUTES: REAL*8 ARRAY(M) (or ARRAY(N) when op(A) = A') * +* VALUES: any. * +* DESCRIPTION: Work area available to the program. It is used * +* only when TRANS = 'T'. * +* * +* OUTPUT = * +* * +* * +* SYMBOLIC NAME: Y * +* POSITION: PARAMETER NO 11. * +* ATTRIBUTES: REAL*8 ARRAY(M) (or ARRAY(N) when op(A) = A') * +* VALUES: any. * +* DESCRIPTION: Contains the values of the vector * +* updated by the matrix-vector multiplication. * +* * +* * +*********************************************************************** + SUBROUTINE DCSRMV(TRANS,DIAG,M,N,ALPHA,AS,JA,IA,X,BETA,Y, + + WORK,LWORK,IERROR) +C .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER (ONE=1.0D0,ZERO=0.0D0) +C .. Scalar Arguments .. + DOUBLE PRECISION ALPHA, BETA + INTEGER M, N,LWORK,IERROR + CHARACTER DIAG, TRANS +C .. Array Arguments .. + DOUBLE PRECISION AS(*), WORK(*), X(*), Y(*) + INTEGER IA(*), JA(*) +C .. Local Scalars .. + DOUBLE PRECISION ACC + INTEGER I, J, K, NCOLA, NROWA + LOGICAL SYM, TRA, UNI +C .. Executable Statements .. +C + IERROR = 0 + UNI = (DIAG.EQ.'U') + TRA = (TRANS.EQ.'T') + +C Symmetric matrix upper or lower + SYM = ((TRANS.EQ.'L').OR.(TRANS.EQ.'U')) +C + IF ( .NOT. TRA) THEN + NROWA = M + NCOLA = N + ELSE IF (TRA) THEN + NROWA = N + NCOLA = M + END IF !(....TRA) + + IF (ALPHA.EQ.ZERO) THEN + IF (BETA.EQ.ZERO) THEN + DO I = 1, M + Y(I) = ZERO + ENDDO + ELSE + DO 20 I = 1, M + Y(I) = BETA*Y(I) + 20 CONTINUE + ENDIF + RETURN + END IF + +C + IF (SYM) THEN + IF (UNI) THEN +C +C ......Symmetric with unitary diagonal....... +C ....OK!! +C To be optimized + + IF (BETA.NE.ZERO) THEN + DO 40 I = 1, M +C +C Product for diagonal elements +C + Y(I) = BETA*Y(I) + ALPHA*X(I) + 40 CONTINUE + ELSE + DO I = 1, M + Y(I) = ALPHA*X(I) + ENDDO + ENDIF + +C Product for other elements + DO 80 I = 1, M + ACC = ZERO + DO 60 J = IA(I), IA(I+1) - 1 + K = JA(J) + Y(K) = Y(K) + ALPHA*AS(J)*X(I) + ACC = ACC + AS(J)*X(K) + 60 CONTINUE + Y(I) = Y(I) + ALPHA*ACC + 80 CONTINUE +C + ELSE IF ( .NOT. UNI) THEN +C +C Check if matrix is lower or upper +C + IF (TRANS.EQ.'L') THEN +C +C LOWER CASE: diagonal element is the last element of row +C ....OK! + + IF (BETA.NE.ZERO) THEN + DO 100 I = 1, M + Y(I) = BETA*Y(I) + 100 CONTINUE + ELSE + DO I = 1, M + Y(I) = ZERO + ENDDO + ENDIF + + DO 140 I = 1, M + ACC = ZERO + DO 120 J = IA(I), IA(I+1) - 1 ! it was -2 + K = JA(J) +C +C To be optimized +C + IF (K.NE.I) THEN !for symmetric element + Y(K) = Y(K) + ALPHA*AS(J)*X(I) + ENDIF + ACC = ACC + AS(J)*X(K) + 120 CONTINUE + + Y(I) = Y(I) + ALPHA*ACC + 140 CONTINUE + ELSE ! ....Trans<>L +C +C UPPER CASE +C ....OK!! +C + IF (BETA.NE.ZERO) THEN + DO 160 I = 1, M + Y(I) = BETA*Y(I) + 160 CONTINUE + ELSE + DO I = 1, M + Y(I) = ZERO + ENDDO + ENDIF + + DO 200 I = 1, M + ACC = ZERO + DO 180 J = IA(I) , IA(I+1) - 1 ! removed +1 + K = JA(J) +C +C To be optimized +C + IF(K.NE.I) THEN + Y(K) = Y(K) + ALPHA*AS(J)*X(I) + ENDIF + ACC = ACC + AS(J)*X(K) + 180 CONTINUE + Y(I) = Y(I) + ALPHA*ACC + 200 CONTINUE + END IF ! ......TRANS=='L' + END IF ! ......Not UNI +C + ELSE IF ( .NOT. TRA) THEN !......NOT SYM + + IF ( .NOT. UNI) THEN +C +C .......General Not Unit, No Traspose +C + + IF (BETA.NE.ZERO) THEN + DO 240 I = 1, M + ACC = ZERO + DO 220 J = IA(I), IA(I+1) - 1 + ACC = ACC + AS(J)*X(JA(J)) + 220 CONTINUE + Y(I) = ALPHA*ACC + BETA*Y(I) + 240 CONTINUE + ELSE + DO I = 1, M + ACC = ZERO + DO J = IA(I), IA(I+1) - 1 + ACC = ACC + AS(J)*X(JA(J)) + ENDDO + Y(I) = ALPHA*ACC + ENDDO + ENDIF +C + ELSE IF (UNI) THEN +C + IF (BETA.NE.ZERO) THEN + DO 280 I = 1, M + ACC = ZERO + DO 260 J = IA(I), IA(I+1) - 1 + ACC = ACC + AS(J)*X(JA(J)) + 260 CONTINUE + Y(I) = ALPHA*(ACC+X(I)) + BETA*Y(I) + 280 CONTINUE + ELSE !(BETA.EQ.ZERO) + DO I = 1, M + ACC = ZERO + DO J = IA(I), IA(I+1) - 1 + ACC = ACC + AS(J)*X(JA(J)) + ENDDO + Y(I) = ALPHA*(ACC+X(I)) + ENDDO + ENDIF + END IF !....End Testing on UNI +C + ELSE IF (TRA) THEN !....Else on SYM (swapped M and N) +C + IF ( .NOT. UNI) THEN +C + IF (BETA.NE.ZERO) THEN + DO 300 I = 1, M + Y(I) = BETA*Y(I) + 300 CONTINUE + ELSE !(BETA.EQ.ZERO) + DO I = 1, M + Y(I) = ZERO + ENDDO + ENDIF +C + ELSE IF (UNI) THEN +C + + IF (BETA.NE.ZERO) THEN + DO 320 I = 1, M + Y(I) = BETA*Y(I) + ALPHA*X(I) + 320 CONTINUE + ELSE !(BETA.EQ.ZERO) + DO I = 1, M + Y(I) = ALPHA*X(I) + ENDDO + ENDIF + +C + END IF !....UNI +C + IF (ALPHA.EQ.ONE) THEN +C + DO 360 I = 1, N + DO 340 J = IA(I), IA(I+1) - 1 + K = JA(J) + Y(K) = Y(K) + AS(J)*X(I) + 340 CONTINUE + 360 CONTINUE +C + ELSE IF (ALPHA.EQ.-ONE) THEN +C + DO 400 I = 1, n + DO 380 J = IA(I), IA(I+1) - 1 + K = JA(J) + Y(K) = Y(K) - AS(J)*X(I) + 380 CONTINUE + 400 CONTINUE +C + ELSE !.....Else on TRA +C +C This work array is used for efficiency +C + IF (LWORK.LT.N) THEN + IERROR = 60 + WORK(1) = DBLE(N) + RETURN + ENDIF + DO 420 I = 1, N + WORK(I) = ALPHA*X(I) + 420 CONTINUE +C + DO 460 I = 1, n + DO 440 J = IA(I), IA(I+1) - 1 + K = JA(J) + Y(K) = Y(K) + AS(J)*WORK(I) + 440 CONTINUE + 460 CONTINUE +C + END IF !.....End testing on ALPHA + + END IF !.....End testing on SYM +C + RETURN +C +C END OF DSRMV +C + END + diff --git a/src/serial/csr/dcsrmv2.f b/src/serial/csr/dcsrmv2.f new file mode 100644 index 00000000..5adf5ac5 --- /dev/null +++ b/src/serial/csr/dcsrmv2.f @@ -0,0 +1,406 @@ +*********************************************************************** +* DSRMV modified for SPARKER +* * +* FUNCTION: Driver for routines performing one of the sparse * +* matrix vector operations * +* * +* y = alpha*op(A)*x + beta*y * +* * +* where op(A) is one of: * +* * +* op(A) = A or op(A) = A' or * +* op(A) = lower or upper part of A * +* * +* alpha and beta are scalars. * +* The data structure of the matrix is related * +* to the scalar computer. * +* This is an internal routine called by: * +* DSMMV * +* * +* ENTRY-POINT = DSRMV * +* INPUT = * +* * +* * +* SYMBOLIC NAME: TRANS * +* POSITION: PARAMETER NO 1. * +* ATTRIBUTES: CHARACTER*1 * +* VALUES: 'N' 'T' 'L' 'U' * +* DESCRIPTION: Specifies the form of op(A) to be used in the * +* matrix vector multiplications as follows: * +* * +* TRANS = 'N' , op( A ) = A. * +* * +* TRANS = 'T' , op( A ) = A'. * +* * +* TRANS = 'L' or 'U', op( A ) = lower or * +* upper part of A * +* * +* SYMBOLIC NAME: DIAG * +* POSITION: PARAMETER NO 2. * +* ATTRIBUTES: CHARACTER*1 * +* VALUES: 'N' 'U' * +* DESCRIPTION: * +* Specifies whether or not the matrix A has * +* unit diagonal as follows: * +* * +* DIAG = 'N' A is not assumed * +* to have unit diagonal * +* * +* DIAG = 'U' A is assumed * +* to have unit diagonal. * +* * +* WARNING: it is the caller's responsibility * +* to ensure that if the matrix has unit * +* diagonal, there are no elements of the * +* diagonal are stored in the arrays AS and JA. * +* * +* SYMBOLIC NAME: M * +* POSITION: PARAMETER NO 3. * +* ATTRIBUTES: INTEGER*4. * +* VALUES: M >= 0 * +* DESCRIPTION: Number of rows of the matrix op(A). * +* * +* SYMBOLIC NAME: N * +* POSITION: PARAMETER NO 4. * +* ATTRIBUTES: INTEGER*4. * +* VALUES: N >= 0 * +* DESCRIPTION: Number of columns of the matrix op(A) * +* * +* SYMBOLIC NAME: ALPHA * +* POSITION: PARAMETER NO 5. * +* ATTRIBUTES: REAL*8. * +* VALUES: any. * +* DESCRIPTION: Specifies the scalar alpha. * +* * +* * +* SYMBOLIC NAME: AS * +* POSITION: PARAMETER NO 6. * +* ATTRIBUTES: REAL*8: ARRAY(IA(M+1)-1) * +* VALUES: ANY * +* DESCRIPTION: Array containing the non zero coefficients of * +* the sparse matrix op(A). * +* * +* SYMBOLIC NAME: JA * +* POSITION: PARAMETER NO 7. * +* ATTRIBUTES: INTEGER*4: ARRAY(IA(M+1)-1) * +* VALUES: 0 < JA(I) <= M * +* DESCRIPTION: Array containing the column number of the * +* nonzero coefficients stored in array AS. * +* * +* SYMBOLIC NAME: IA * +* POSITION: PARAMETER NO 8. * +* ATTRIBUTES: INTEGER*4: ARRAY(*) * +* VALUES: IA(I) > 0 * +* DESCRIPTION: Contains the pointers for the beginning of * +* each rows. * +* * +* * +* SYMBOLIC NAME: X * +* POSITION: PARAMETER NO 9. * +* ATTRIBUTES: REAL*8 ARRAY(N) (or ARRAY(M) when op(A) = A') * +* VALUES: any. * +* DESCRIPTION: Contains the values of the vector to be * +* multiplied by the matrix A. * +* * +* SYMBOLIC NAME: BETA * +* POSITION: PARAMETER NO 10. * +* ATTRIBUTES: REAL*8. * +* VALUES: any. * +* DESCRIPTION: Specifies the scalar beta. * +* * +* SYMBOLIC NAME: Y * +* POSITION: PARAMETER NO 11. * +* ATTRIBUTES: REAL*8 ARRAY(M) (or ARRAY(N) when op(A) = A') * +* VALUES: any. * +* DESCRIPTION: Contains the values of the vector to be * +* updated by the matrix-vector multiplication. * +* * +* SYMBOLIC NAME: WORK * +* POSITION: PARAMETER NO 12. * +* ATTRIBUTES: REAL*8 ARRAY(M) (or ARRAY(N) when op(A) = A') * +* VALUES: any. * +* DESCRIPTION: Work area available to the program. It is used * +* only when TRANS = 'T'. * +* * +* OUTPUT = * +* * +* * +* SYMBOLIC NAME: Y * +* POSITION: PARAMETER NO 11. * +* ATTRIBUTES: REAL*8 ARRAY(M) (or ARRAY(N) when op(A) = A') * +* VALUES: any. * +* DESCRIPTION: Contains the values of the vector * +* updated by the matrix-vector multiplication. * +* * +* * +*********************************************************************** + SUBROUTINE DCSRMV2(TRANS,DIAG,M,N,ALPHA,AS,JA,IA,X,LDX, + + BETA,Y,LDY, WORK,LWORK,IERROR) + integer nb + parameter (nb=2) +C .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER (ONE=1.0D0,ZERO=0.0D0) +C .. Scalar Arguments .. + DOUBLE PRECISION ALPHA, BETA + INTEGER M, N,LWORK,IERROR,ldx,ldy + CHARACTER DIAG, TRANS +C .. Array Arguments .. + DOUBLE PRECISION AS(*), WORK(*), X(LDX,*), Y(LDY,*) + INTEGER IA(*), JA(*) +C .. Local Scalars .. + DOUBLE PRECISION ACC(nb) + INTEGER I, J, K, NCOLA, NROWA + LOGICAL SYM, TRA, UNI +C .. Executable Statements .. +C + IERROR = 0 + UNI = (DIAG.EQ.'U') + TRA = (TRANS.EQ.'T') + +C Symmetric matrix upper or lower + SYM = ((TRANS.EQ.'L').OR.(TRANS.EQ.'U')) +C + if ( .not. tra) then + nrowa = m + ncola = n + else if (tra) then + nrowa = n + ncola = m + end if !(....tra) + + if (alpha.eq.zero) then + if (beta.eq.zero) then + do i = 1, m + y(i,1:nb) = zero + enddo + else + do 20 i = 1, m + y(i,1:nb) = beta*y(i,1:nb) + 20 continue + endif + return + end if + +c + if (sym) then + if (uni) then +c +c ......Symmetric with unitary diagonal....... +C ....OK!! +C To be optimized + + if (beta.ne.zero) then + do i = 1, m +C +C Product for diagonal elements +c + y(i,1:nb) = beta*y(i,1:nb) + alpha*x(i,1:nb) + enddo + else + do i = 1, m + y(i,1:nb) = alpha*x(i,1:nb) + enddo + endif + +C Product for other elements + do 80 i = 1, m + acc = zero + do 60 j = ia(i), ia(i+1) - 1 + k = ja(j) + y(k,1:nb) = y(k,1:nb) + alpha*as(j)*x(i,1:nb) + acc(1:nb) = acc(1:nb) + as(j)*x(k,1:nb) + 60 continue + y(i,1:nb) = y(i,1:nb) + alpha*acc(1:nb) + 80 continue +C + else if ( .not. uni) then +C +C Check if matrix is lower or upper +C + if (trans.eq.'L') then +C +C LOWER CASE: diagonal element is the last element of row +C ....OK! + + if (beta.ne.zero) then + do 100 i = 1, m + y(i,1:nb) = beta*y(i,1:nb) + 100 continue + else + do i = 1, m + y(i,1:nb) = zero + enddo + endif + + do 140 i = 1, m + acc = zero + do 120 j = ia(i), ia(i+1) - 1 ! it was -2 + K = ja(j) +C +C To be optimized +C + if (k.ne.i) then !for symmetric element + y(k,1:nb) = y(k,1:nb) + alpha*as(j)*x(i,1:nb) + endif + acc(1:nb) = acc(1:nb) + as(j)*x(k,1:nb) + 120 continue + + y(i,1:nb) = y(i,1:nb) + alpha*acc(1:nb) + 140 continue + else ! ....Trans<>L +C +C UPPER CASE +C ....OK!! +C + if (beta.ne.zero) then + do 160 i = 1, m + y(i,1:nb) = beta*y(i,1:nb) + 160 continue + else + do i = 1, m + y(i,1:nb) = zero + enddo + endif + + do 200 i = 1, m + acc = zero + do 180 j = ia(i) , ia(i+1) - 1 ! removed +1 + k = ja(j) +C +C To be optimized +C + if (k.ne.i) then + y(k,1:nb) = y(k,1:nb) + alpha*as(j)*x(i,1:nb) + endif + acc(1:nb) = acc(1:nb) + as(j)*x(k,1:nb) + 180 continue + y(i,1:nb) = y(i,1:nb) + alpha*acc(1:nb) + 200 continue + end if ! ......TRANS=='L' + end if ! ......Not UNI +c + else if ( .not. tra) then !......NOT SYM + + if ( .not. uni) then +C +C .......General Not Unit, No Traspose +C + + if (beta.ne.zero) then + do 240 i = 1, m + acc(1:nb) = zero + do 220 j = ia(i), ia(i+1) - 1 + acc(1:nb) = acc(1:nb) + as(j)*x(ja(j),1:nb) + 220 continue + y(i,1:nb) = alpha*acc(1:nb) + beta*y(i,1:nb) + 240 continue + else + do i = 1, m + acc = zero + do j = ia(i), ia(i+1) - 1 + acc(1:nb) = acc(1:nb) + as(j)*x(ja(j),1:nb) + enddo + y(i,1:nb) = alpha*acc(1:nb) + enddo + endif +c + else if (uni) then +c + if (beta.ne.zero) then + do 280 i = 1, m + acc(1:nb) = zero + do 260 j = ia(i), ia(i+1) - 1 + acc(1:nb) = acc(1:nb) + as(j)*x(ja(j),1:nb) + 260 continue + y(i,1:nb) = alpha*(acc(1:nb)+x(i,1:nb)) + beta*y(i,1:nb) + 280 continue + else !(beta.eq.zero) + do i = 1, m + acc(1:nb) = zero + do j = ia(i), ia(i+1) - 1 + acc(1:nb) = acc(1:nb) + as(j)*x(ja(j),1:nb) + enddo + y(i,1:nb) = alpha*(acc(1:nb)+x(i,1:nb)) + enddo + endif + end if !....End Testing on UNI +C + else if (tra) then !....Else on SYM (swapped M and N) +C + if ( .not. uni) then +c + if (beta.ne.zero) then + do 300 i = 1, m + y(i,1:nb) = beta*y(i,1:nb) + 300 continue + else !(BETA.EQ.ZERO) + do i = 1, m + y(i,1:nb) = zero + enddo + endif +c + else if (uni) then +c + + if (beta.ne.zero) then + do 320 i = 1, m + y(i,1:nb) = beta*y(i,1:nb) + alpha*x(i,1:nb) + 320 continue + else !(BETA.EQ.ZERO) + do i = 1, m + y(i,1:nb) = alpha*x(i,1:nb) + enddo + endif + +c + end if !....UNI +C + if (alpha.eq.one) then +c + do 360 i = 1, n + do 340 j = ia(i), ia(i+1) - 1 + k = ja(j) + y(k,1:nb) = y(k,1:nb) + as(j)*x(i,1:nb) + 340 continue + 360 continue +c + else if (alpha.eq.-one) then +c + do 400 i = 1, n + do 380 j = ia(i), ia(i+1) - 1 + k = ja(j) + y(k,1:nb) = y(k,1:nb) - as(j)*x(i,1:nb) + 380 continue + 400 continue +c + else !.....Else on TRA +C +C This work array is used for efficiency +C + if (lwork.lt.n) then + ierror = 60 + work(1) = dble(n) + return + endif +c$$$ do 420 i = 1, n +c$$$ work(i) = alpha*x(i,1:4) +c$$$ 420 continue +c$$$C +c$$$ DO 460 I = 1, n +c$$$ DO 440 J = IA(I), IA(I+1) - 1 +c$$$ K = JA(J) +c$$$ Y(K) = Y(K) + AS(J)*WORK(I) +c$$$ 440 CONTINUE +c$$$ 460 CONTINUE +c + end if !.....end testing on alpha + + end if !.....end testing on sym +c + return +c +c end of dsrmv +c + end + diff --git a/src/serial/csr/dcsrmv3.f b/src/serial/csr/dcsrmv3.f new file mode 100644 index 00000000..819e30b6 --- /dev/null +++ b/src/serial/csr/dcsrmv3.f @@ -0,0 +1,406 @@ +*********************************************************************** +* DSRMV modified for SPARKER +* * +* FUNCTION: Driver for routines performing one of the sparse * +* matrix vector operations * +* * +* y = alpha*op(A)*x + beta*y * +* * +* where op(A) is one of: * +* * +* op(A) = A or op(A) = A' or * +* op(A) = lower or upper part of A * +* * +* alpha and beta are scalars. * +* The data structure of the matrix is related * +* to the scalar computer. * +* This is an internal routine called by: * +* DSMMV * +* * +* ENTRY-POINT = DSRMV * +* INPUT = * +* * +* * +* SYMBOLIC NAME: TRANS * +* POSITION: PARAMETER NO 1. * +* ATTRIBUTES: CHARACTER*1 * +* VALUES: 'N' 'T' 'L' 'U' * +* DESCRIPTION: Specifies the form of op(A) to be used in the * +* matrix vector multiplications as follows: * +* * +* TRANS = 'N' , op( A ) = A. * +* * +* TRANS = 'T' , op( A ) = A'. * +* * +* TRANS = 'L' or 'U', op( A ) = lower or * +* upper part of A * +* * +* SYMBOLIC NAME: DIAG * +* POSITION: PARAMETER NO 2. * +* ATTRIBUTES: CHARACTER*1 * +* VALUES: 'N' 'U' * +* DESCRIPTION: * +* Specifies whether or not the matrix A has * +* unit diagonal as follows: * +* * +* DIAG = 'N' A is not assumed * +* to have unit diagonal * +* * +* DIAG = 'U' A is assumed * +* to have unit diagonal. * +* * +* WARNING: it is the caller's responsibility * +* to ensure that if the matrix has unit * +* diagonal, there are no elements of the * +* diagonal are stored in the arrays AS and JA. * +* * +* SYMBOLIC NAME: M * +* POSITION: PARAMETER NO 3. * +* ATTRIBUTES: INTEGER*4. * +* VALUES: M >= 0 * +* DESCRIPTION: Number of rows of the matrix op(A). * +* * +* SYMBOLIC NAME: N * +* POSITION: PARAMETER NO 4. * +* ATTRIBUTES: INTEGER*4. * +* VALUES: N >= 0 * +* DESCRIPTION: Number of columns of the matrix op(A) * +* * +* SYMBOLIC NAME: ALPHA * +* POSITION: PARAMETER NO 5. * +* ATTRIBUTES: REAL*8. * +* VALUES: any. * +* DESCRIPTION: Specifies the scalar alpha. * +* * +* * +* SYMBOLIC NAME: AS * +* POSITION: PARAMETER NO 6. * +* ATTRIBUTES: REAL*8: ARRAY(IA(M+1)-1) * +* VALUES: ANY * +* DESCRIPTION: Array containing the non zero coefficients of * +* the sparse matrix op(A). * +* * +* SYMBOLIC NAME: JA * +* POSITION: PARAMETER NO 7. * +* ATTRIBUTES: INTEGER*4: ARRAY(IA(M+1)-1) * +* VALUES: 0 < JA(I) <= M * +* DESCRIPTION: Array containing the column number of the * +* nonzero coefficients stored in array AS. * +* * +* SYMBOLIC NAME: IA * +* POSITION: PARAMETER NO 8. * +* ATTRIBUTES: INTEGER*4: ARRAY(*) * +* VALUES: IA(I) > 0 * +* DESCRIPTION: Contains the pointers for the beginning of * +* each rows. * +* * +* * +* SYMBOLIC NAME: X * +* POSITION: PARAMETER NO 9. * +* ATTRIBUTES: REAL*8 ARRAY(N) (or ARRAY(M) when op(A) = A') * +* VALUES: any. * +* DESCRIPTION: Contains the values of the vector to be * +* multiplied by the matrix A. * +* * +* SYMBOLIC NAME: BETA * +* POSITION: PARAMETER NO 10. * +* ATTRIBUTES: REAL*8. * +* VALUES: any. * +* DESCRIPTION: Specifies the scalar beta. * +* * +* SYMBOLIC NAME: Y * +* POSITION: PARAMETER NO 11. * +* ATTRIBUTES: REAL*8 ARRAY(M) (or ARRAY(N) when op(A) = A') * +* VALUES: any. * +* DESCRIPTION: Contains the values of the vector to be * +* updated by the matrix-vector multiplication. * +* * +* SYMBOLIC NAME: WORK * +* POSITION: PARAMETER NO 12. * +* ATTRIBUTES: REAL*8 ARRAY(M) (or ARRAY(N) when op(A) = A') * +* VALUES: any. * +* DESCRIPTION: Work area available to the program. It is used * +* only when TRANS = 'T'. * +* * +* OUTPUT = * +* * +* * +* SYMBOLIC NAME: Y * +* POSITION: PARAMETER NO 11. * +* ATTRIBUTES: REAL*8 ARRAY(M) (or ARRAY(N) when op(A) = A') * +* VALUES: any. * +* DESCRIPTION: Contains the values of the vector * +* updated by the matrix-vector multiplication. * +* * +* * +*********************************************************************** + SUBROUTINE DCSRMV3(TRANS,DIAG,M,N,ALPHA,AS,JA,IA,X,LDX, + + BETA,Y,LDY, WORK,LWORK,IERROR) + integer nb + parameter (nb=3) +C .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER (ONE=1.0D0,ZERO=0.0D0) +C .. Scalar Arguments .. + DOUBLE PRECISION ALPHA, BETA + INTEGER M, N,LWORK,IERROR,ldx,ldy + CHARACTER DIAG, TRANS +C .. Array Arguments .. + DOUBLE PRECISION AS(*), WORK(*), X(LDX,*), Y(LDY,*) + INTEGER IA(*), JA(*) +C .. Local Scalars .. + DOUBLE PRECISION ACC(nb) + INTEGER I, J, K, NCOLA, NROWA + LOGICAL SYM, TRA, UNI +C .. Executable Statements .. +C + IERROR = 0 + UNI = (DIAG.EQ.'U') + TRA = (TRANS.EQ.'T') + +C Symmetric matrix upper or lower + SYM = ((TRANS.EQ.'L').OR.(TRANS.EQ.'U')) +C + if ( .not. tra) then + nrowa = m + ncola = n + else if (tra) then + nrowa = n + ncola = m + end if !(....tra) + + if (alpha.eq.zero) then + if (beta.eq.zero) then + do i = 1, m + y(i,1:nb) = zero + enddo + else + do 20 i = 1, m + y(i,1:nb) = beta*y(i,1:nb) + 20 continue + endif + return + end if + +c + if (sym) then + if (uni) then +c +c ......Symmetric with unitary diagonal....... +C ....OK!! +C To be optimized + + if (beta.ne.zero) then + do i = 1, m +C +C Product for diagonal elements +c + y(i,1:nb) = beta*y(i,1:nb) + alpha*x(i,1:nb) + enddo + else + do i = 1, m + y(i,1:nb) = alpha*x(i,1:nb) + enddo + endif + +C Product for other elements + do 80 i = 1, m + acc = zero + do 60 j = ia(i), ia(i+1) - 1 + k = ja(j) + y(k,1:nb) = y(k,1:nb) + alpha*as(j)*x(i,1:nb) + acc(1:nb) = acc(1:nb) + as(j)*x(k,1:nb) + 60 continue + y(i,1:nb) = y(i,1:nb) + alpha*acc(1:nb) + 80 continue +C + else if ( .not. uni) then +C +C Check if matrix is lower or upper +C + if (trans.eq.'L') then +C +C LOWER CASE: diagonal element is the last element of row +C ....OK! + + if (beta.ne.zero) then + do 100 i = 1, m + y(i,1:nb) = beta*y(i,1:nb) + 100 continue + else + do i = 1, m + y(i,1:nb) = zero + enddo + endif + + do 140 i = 1, m + acc = zero + do 120 j = ia(i), ia(i+1) - 1 ! it was -2 + K = ja(j) +C +C To be optimized +C + if (k.ne.i) then !for symmetric element + y(k,1:nb) = y(k,1:nb) + alpha*as(j)*x(i,1:nb) + endif + acc(1:nb) = acc(1:nb) + as(j)*x(k,1:nb) + 120 continue + + y(i,1:nb) = y(i,1:nb) + alpha*acc(1:nb) + 140 continue + else ! ....Trans<>L +C +C UPPER CASE +C ....OK!! +C + if (beta.ne.zero) then + do 160 i = 1, m + y(i,1:nb) = beta*y(i,1:nb) + 160 continue + else + do i = 1, m + y(i,1:nb) = zero + enddo + endif + + do 200 i = 1, m + acc = zero + do 180 j = ia(i) , ia(i+1) - 1 ! removed +1 + k = ja(j) +C +C To be optimized +C + if (k.ne.i) then + y(k,1:nb) = y(k,1:nb) + alpha*as(j)*x(i,1:nb) + endif + acc(1:nb) = acc(1:nb) + as(j)*x(k,1:nb) + 180 continue + y(i,1:nb) = y(i,1:nb) + alpha*acc(1:nb) + 200 continue + end if ! ......TRANS=='L' + end if ! ......Not UNI +c + else if ( .not. tra) then !......NOT SYM + + if ( .not. uni) then +C +C .......General Not Unit, No Traspose +C + + if (beta.ne.zero) then + do 240 i = 1, m + acc(1:nb) = zero + do 220 j = ia(i), ia(i+1) - 1 + acc(1:nb) = acc(1:nb) + as(j)*x(ja(j),1:nb) + 220 continue + y(i,1:nb) = alpha*acc(1:nb) + beta*y(i,1:nb) + 240 continue + else + do i = 1, m + acc = zero + do j = ia(i), ia(i+1) - 1 + acc(1:nb) = acc(1:nb) + as(j)*x(ja(j),1:nb) + enddo + y(i,1:nb) = alpha*acc(1:nb) + enddo + endif +c + else if (uni) then +c + if (beta.ne.zero) then + do 280 i = 1, m + acc(1:nb) = zero + do 260 j = ia(i), ia(i+1) - 1 + acc(1:nb) = acc(1:nb) + as(j)*x(ja(j),1:nb) + 260 continue + y(i,1:nb) = alpha*(acc(1:nb)+x(i,1:nb)) + beta*y(i,1:nb) + 280 continue + else !(beta.eq.zero) + do i = 1, m + acc(1:nb) = zero + do j = ia(i), ia(i+1) - 1 + acc(1:nb) = acc(1:nb) + as(j)*x(ja(j),1:nb) + enddo + y(i,1:nb) = alpha*(acc(1:nb)+x(i,1:nb)) + enddo + endif + end if !....End Testing on UNI +C + else if (tra) then !....Else on SYM (swapped M and N) +C + if ( .not. uni) then +c + if (beta.ne.zero) then + do 300 i = 1, m + y(i,1:nb) = beta*y(i,1:nb) + 300 continue + else !(BETA.EQ.ZERO) + do i = 1, m + y(i,1:nb) = zero + enddo + endif +c + else if (uni) then +c + + if (beta.ne.zero) then + do 320 i = 1, m + y(i,1:nb) = beta*y(i,1:nb) + alpha*x(i,1:nb) + 320 continue + else !(BETA.EQ.ZERO) + do i = 1, m + y(i,1:nb) = alpha*x(i,1:nb) + enddo + endif + +c + end if !....UNI +C + if (alpha.eq.one) then +c + do 360 i = 1, n + do 340 j = ia(i), ia(i+1) - 1 + k = ja(j) + y(k,1:nb) = y(k,1:nb) + as(j)*x(i,1:nb) + 340 continue + 360 continue +c + else if (alpha.eq.-one) then +c + do 400 i = 1, n + do 380 j = ia(i), ia(i+1) - 1 + k = ja(j) + y(k,1:nb) = y(k,1:nb) - as(j)*x(i,1:nb) + 380 continue + 400 continue +c + else !.....Else on TRA +C +C This work array is used for efficiency +C + if (lwork.lt.n) then + ierror = 60 + work(1) = dble(n) + return + endif +c$$$ do 420 i = 1, n +c$$$ work(i) = alpha*x(i,1:4) +c$$$ 420 continue +c$$$C +c$$$ DO 460 I = 1, n +c$$$ DO 440 J = IA(I), IA(I+1) - 1 +c$$$ K = JA(J) +c$$$ Y(K) = Y(K) + AS(J)*WORK(I) +c$$$ 440 CONTINUE +c$$$ 460 CONTINUE +c + end if !.....end testing on alpha + + end if !.....end testing on sym +c + return +c +c end of dsrmv +c + end + diff --git a/src/serial/csr/dcsrmv4.f b/src/serial/csr/dcsrmv4.f new file mode 100644 index 00000000..7c6e1675 --- /dev/null +++ b/src/serial/csr/dcsrmv4.f @@ -0,0 +1,406 @@ +*********************************************************************** +* DSRMV modified for SPARKER +* * +* FUNCTION: Driver for routines performing one of the sparse * +* matrix vector operations * +* * +* y = alpha*op(A)*x + beta*y * +* * +* where op(A) is one of: * +* * +* op(A) = A or op(A) = A' or * +* op(A) = lower or upper part of A * +* * +* alpha and beta are scalars. * +* The data structure of the matrix is related * +* to the scalar computer. * +* This is an internal routine called by: * +* DSMMV * +* * +* ENTRY-POINT = DSRMV * +* INPUT = * +* * +* * +* SYMBOLIC NAME: TRANS * +* POSITION: PARAMETER NO 1. * +* ATTRIBUTES: CHARACTER*1 * +* VALUES: 'N' 'T' 'L' 'U' * +* DESCRIPTION: Specifies the form of op(A) to be used in the * +* matrix vector multiplications as follows: * +* * +* TRANS = 'N' , op( A ) = A. * +* * +* TRANS = 'T' , op( A ) = A'. * +* * +* TRANS = 'L' or 'U', op( A ) = lower or * +* upper part of A * +* * +* SYMBOLIC NAME: DIAG * +* POSITION: PARAMETER NO 2. * +* ATTRIBUTES: CHARACTER*1 * +* VALUES: 'N' 'U' * +* DESCRIPTION: * +* Specifies whether or not the matrix A has * +* unit diagonal as follows: * +* * +* DIAG = 'N' A is not assumed * +* to have unit diagonal * +* * +* DIAG = 'U' A is assumed * +* to have unit diagonal. * +* * +* WARNING: it is the caller's responsibility * +* to ensure that if the matrix has unit * +* diagonal, there are no elements of the * +* diagonal are stored in the arrays AS and JA. * +* * +* SYMBOLIC NAME: M * +* POSITION: PARAMETER NO 3. * +* ATTRIBUTES: INTEGER*4. * +* VALUES: M >= 0 * +* DESCRIPTION: Number of rows of the matrix op(A). * +* * +* SYMBOLIC NAME: N * +* POSITION: PARAMETER NO 4. * +* ATTRIBUTES: INTEGER*4. * +* VALUES: N >= 0 * +* DESCRIPTION: Number of columns of the matrix op(A) * +* * +* SYMBOLIC NAME: ALPHA * +* POSITION: PARAMETER NO 5. * +* ATTRIBUTES: REAL*8. * +* VALUES: any. * +* DESCRIPTION: Specifies the scalar alpha. * +* * +* * +* SYMBOLIC NAME: AS * +* POSITION: PARAMETER NO 6. * +* ATTRIBUTES: REAL*8: ARRAY(IA(M+1)-1) * +* VALUES: ANY * +* DESCRIPTION: Array containing the non zero coefficients of * +* the sparse matrix op(A). * +* * +* SYMBOLIC NAME: JA * +* POSITION: PARAMETER NO 7. * +* ATTRIBUTES: INTEGER*4: ARRAY(IA(M+1)-1) * +* VALUES: 0 < JA(I) <= M * +* DESCRIPTION: Array containing the column number of the * +* nonzero coefficients stored in array AS. * +* * +* SYMBOLIC NAME: IA * +* POSITION: PARAMETER NO 8. * +* ATTRIBUTES: INTEGER*4: ARRAY(*) * +* VALUES: IA(I) > 0 * +* DESCRIPTION: Contains the pointers for the beginning of * +* each rows. * +* * +* * +* SYMBOLIC NAME: X * +* POSITION: PARAMETER NO 9. * +* ATTRIBUTES: REAL*8 ARRAY(N) (or ARRAY(M) when op(A) = A') * +* VALUES: any. * +* DESCRIPTION: Contains the values of the vector to be * +* multiplied by the matrix A. * +* * +* SYMBOLIC NAME: BETA * +* POSITION: PARAMETER NO 10. * +* ATTRIBUTES: REAL*8. * +* VALUES: any. * +* DESCRIPTION: Specifies the scalar beta. * +* * +* SYMBOLIC NAME: Y * +* POSITION: PARAMETER NO 11. * +* ATTRIBUTES: REAL*8 ARRAY(M) (or ARRAY(N) when op(A) = A') * +* VALUES: any. * +* DESCRIPTION: Contains the values of the vector to be * +* updated by the matrix-vector multiplication. * +* * +* SYMBOLIC NAME: WORK * +* POSITION: PARAMETER NO 12. * +* ATTRIBUTES: REAL*8 ARRAY(M) (or ARRAY(N) when op(A) = A') * +* VALUES: any. * +* DESCRIPTION: Work area available to the program. It is used * +* only when TRANS = 'T'. * +* * +* OUTPUT = * +* * +* * +* SYMBOLIC NAME: Y * +* POSITION: PARAMETER NO 11. * +* ATTRIBUTES: REAL*8 ARRAY(M) (or ARRAY(N) when op(A) = A') * +* VALUES: any. * +* DESCRIPTION: Contains the values of the vector * +* updated by the matrix-vector multiplication. * +* * +* * +*********************************************************************** + SUBROUTINE DCSRMV4(TRANS,DIAG,M,N,ALPHA,AS,JA,IA,X,LDX, + + BETA,Y,LDY, WORK,LWORK,IERROR) + integer nb + parameter (nb=4) +C .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER (ONE=1.0D0,ZERO=0.0D0) +C .. Scalar Arguments .. + DOUBLE PRECISION ALPHA, BETA + INTEGER M, N,LWORK,IERROR,ldx,ldy + CHARACTER DIAG, TRANS +C .. Array Arguments .. + DOUBLE PRECISION AS(*), WORK(*), X(LDX,*), Y(LDY,*) + INTEGER IA(*), JA(*) +C .. Local Scalars .. + DOUBLE PRECISION ACC(nb) + INTEGER I, J, K, NCOLA, NROWA + LOGICAL SYM, TRA, UNI +C .. Executable Statements .. +C + IERROR = 0 + UNI = (DIAG.EQ.'U') + TRA = (TRANS.EQ.'T') + +C Symmetric matrix upper or lower + SYM = ((TRANS.EQ.'L').OR.(TRANS.EQ.'U')) +C + if ( .not. tra) then + nrowa = m + ncola = n + else if (tra) then + nrowa = n + ncola = m + end if !(....tra) + + if (alpha.eq.zero) then + if (beta.eq.zero) then + do i = 1, m + y(i,1:nb) = zero + enddo + else + do 20 i = 1, m + y(i,1:nb) = beta*y(i,1:nb) + 20 continue + endif + return + end if + +c + if (sym) then + if (uni) then +c +c ......Symmetric with unitary diagonal....... +C ....OK!! +C To be optimized + + if (beta.ne.zero) then + do i = 1, m +C +C Product for diagonal elements +c + y(i,1:nb) = beta*y(i,1:nb) + alpha*x(i,1:nb) + enddo + else + do i = 1, m + y(i,1:nb) = alpha*x(i,1:nb) + enddo + endif + +C Product for other elements + do 80 i = 1, m + acc = zero + do 60 j = ia(i), ia(i+1) - 1 + k = ja(j) + y(k,1:nb) = y(k,1:nb) + alpha*as(j)*x(i,1:nb) + acc(1:nb) = acc(1:nb) + as(j)*x(k,1:nb) + 60 continue + y(i,1:nb) = y(i,1:nb) + alpha*acc(1:nb) + 80 continue +C + else if ( .not. uni) then +C +C Check if matrix is lower or upper +C + if (trans.eq.'L') then +C +C LOWER CASE: diagonal element is the last element of row +C ....OK! + + if (beta.ne.zero) then + do 100 i = 1, m + y(i,1:nb) = beta*y(i,1:nb) + 100 continue + else + do i = 1, m + y(i,1:nb) = zero + enddo + endif + + do 140 i = 1, m + acc = zero + do 120 j = ia(i), ia(i+1) - 1 ! it was -2 + K = ja(j) +C +C To be optimized +C + if (k.ne.i) then !for symmetric element + y(k,1:nb) = y(k,1:nb) + alpha*as(j)*x(i,1:nb) + endif + acc(1:nb) = acc(1:nb) + as(j)*x(k,1:nb) + 120 continue + + y(i,1:nb) = y(i,1:nb) + alpha*acc(1:nb) + 140 continue + else ! ....Trans<>L +C +C UPPER CASE +C ....OK!! +C + if (beta.ne.zero) then + do 160 i = 1, m + y(i,1:nb) = beta*y(i,1:nb) + 160 continue + else + do i = 1, m + y(i,1:nb) = zero + enddo + endif + + do 200 i = 1, m + acc = zero + do 180 j = ia(i) , ia(i+1) - 1 ! removed +1 + k = ja(j) +C +C To be optimized +C + if (k.ne.i) then + y(k,1:nb) = y(k,1:nb) + alpha*as(j)*x(i,1:nb) + endif + acc(1:nb) = acc(1:nb) + as(j)*x(k,1:nb) + 180 continue + y(i,1:nb) = y(i,1:nb) + alpha*acc(1:nb) + 200 continue + end if ! ......TRANS=='L' + end if ! ......Not UNI +c + else if ( .not. tra) then !......NOT SYM + + if ( .not. uni) then +C +C .......General Not Unit, No Traspose +C + + if (beta.ne.zero) then + do 240 i = 1, m + acc(1:nb) = zero + do 220 j = ia(i), ia(i+1) - 1 + acc(1:nb) = acc(1:nb) + as(j)*x(ja(j),1:nb) + 220 continue + y(i,1:nb) = alpha*acc(1:nb) + beta*y(i,1:nb) + 240 continue + else + do i = 1, m + acc = zero + do j = ia(i), ia(i+1) - 1 + acc(1:nb) = acc(1:nb) + as(j)*x(ja(j),1:nb) + enddo + y(i,1:nb) = alpha*acc(1:nb) + enddo + endif +c + else if (uni) then +c + if (beta.ne.zero) then + do 280 i = 1, m + acc(1:nb) = zero + do 260 j = ia(i), ia(i+1) - 1 + acc(1:nb) = acc(1:nb) + as(j)*x(ja(j),1:nb) + 260 continue + y(i,1:nb) = alpha*(acc(1:nb)+x(i,1:nb)) + beta*y(i,1:nb) + 280 continue + else !(beta.eq.zero) + do i = 1, m + acc(1:nb) = zero + do j = ia(i), ia(i+1) - 1 + acc(1:nb) = acc(1:nb) + as(j)*x(ja(j),1:nb) + enddo + y(i,1:nb) = alpha*(acc(1:nb)+x(i,1:nb)) + enddo + endif + end if !....End Testing on UNI +C + else if (tra) then !....Else on SYM (swapped M and N) +C + if ( .not. uni) then +c + if (beta.ne.zero) then + do 300 i = 1, m + y(i,1:nb) = beta*y(i,1:nb) + 300 continue + else !(BETA.EQ.ZERO) + do i = 1, m + y(i,1:nb) = zero + enddo + endif +c + else if (uni) then +c + + if (beta.ne.zero) then + do 320 i = 1, m + y(i,1:nb) = beta*y(i,1:nb) + alpha*x(i,1:nb) + 320 continue + else !(BETA.EQ.ZERO) + do i = 1, m + y(i,1:nb) = alpha*x(i,1:nb) + enddo + endif + +c + end if !....UNI +C + if (alpha.eq.one) then +c + do 360 i = 1, n + do 340 j = ia(i), ia(i+1) - 1 + k = ja(j) + y(k,1:nb) = y(k,1:nb) + as(j)*x(i,1:nb) + 340 continue + 360 continue +c + else if (alpha.eq.-one) then +c + do 400 i = 1, n + do 380 j = ia(i), ia(i+1) - 1 + k = ja(j) + y(k,1:nb) = y(k,1:nb) - as(j)*x(i,1:nb) + 380 continue + 400 continue +c + else !.....Else on TRA +C +C This work array is used for efficiency +C + if (lwork.lt.n) then + ierror = 60 + work(1) = dble(n) + return + endif +c$$$ do 420 i = 1, n +c$$$ work(i) = alpha*x(i,1:4) +c$$$ 420 continue +c$$$C +c$$$ DO 460 I = 1, n +c$$$ DO 440 J = IA(I), IA(I+1) - 1 +c$$$ K = JA(J) +c$$$ Y(K) = Y(K) + AS(J)*WORK(I) +c$$$ 440 CONTINUE +c$$$ 460 CONTINUE +c + end if !.....end testing on alpha + + end if !.....end testing on sym +c + return +c +c end of dsrmv +c + end + diff --git a/src/serial/csr/dcsrprt.f b/src/serial/csr/dcsrprt.f new file mode 100644 index 00000000..5515dd05 --- /dev/null +++ b/src/serial/csr/dcsrprt.f @@ -0,0 +1,54 @@ +c +c What if a wrong DESCRA is passed? +c +c +* +* + SUBROUTINE DCSRPRT(M,N,DESCRA,AR,JA,IA,TITLE,IOUT) +C +C +C .. Scalar Arguments .. + INTEGER M, N, IOUT +C .. Array Arguments .. + DOUBLE PRECISION AR(*) + INTEGER IA(*), JA(*) + CHARACTER DESCRA*11, TITLE*(*) +C .. Local Scalars .. + INTEGER I, J, nnzero + + +C .. External Subroutines .. +C +C + if ((descra(1:1).eq.'g').or.(descra(1:1).eq.'G')) then + write(iout,fmt=998) + else if ((descra(1:1).eq.'s').or.(descra(1:1).eq.'S')) then + write(iout,fmt=997) + else + write(iout,fmt=998) + endif + nnzero = ia(m+1) -1 + write(iout,fmt=992) + write(iout,fmt=996) + write(iout,fmt=996) title + write(iout,fmt=995) 'Number of rows: ',m + write(iout,fmt=995) 'Number of columns: ',n + write(iout,fmt=995) 'Nonzero entries: ',nnzero + write(iout,fmt=996) + write(iout,fmt=992) + write(iout,*) m,n,nnzero + 998 format('%%MatrixMarket matrix coordinate real general') + 997 format('%%MatrixMarket matrix coordinate real symmetric') + 992 format('%======================================== ') + 996 format('% ',a) + 995 format('% ',a,i9,a,i9,a,i9) + + do i=1, m + do j=ia(i),ia(i+1)-1 + write(iout,fmt=994) i,ja(j),ar(j) + 994 format(i6,1x,i6,1x,e16.8) + enddo + enddo + + RETURN + END diff --git a/src/serial/csr/dcsrsm.f b/src/serial/csr/dcsrsm.f new file mode 100644 index 00000000..28a2bf43 --- /dev/null +++ b/src/serial/csr/dcsrsm.f @@ -0,0 +1,85 @@ + SUBROUTINE DCSRSM(TRANST,M,N,UNITD,D,ALPHA,DESCRA,A,JA,IA, + * B,LDB,BETA,C,LDC,WORK,LWORK,IERROR) + DOUBLE PRECISION ALPHA, BETA + INTEGER LDB, LDC, LWORK, M, N, IERROR + CHARACTER UNITD, TRANST + DOUBLE PRECISION A(*), B(LDB,*), C(LDC,*), D(*), WORK(*) + INTEGER IA(*), JA(*) + CHARACTER DESCRA*11 + INTEGER I, K + CHARACTER DIAG, UPLO + LOGICAL DEBUG + PARAMETER (DEBUG=.FALSE.) +C .. Local Arrays .. + CHARACTER*20 NAME + INTEGER INT_VAL(5) + + NAME = 'DCSRSM\0' + IERROR = 0 + CALL FCPSB_ERRACTIONSAVE(ERR_ACT) + + IF((ALPHA.NE.1.D0) .OR. (BETA.NE.0.D0))then + IERROR=5 + CALL PSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + UPLO = '?' + IF (DESCRA(1:1).EQ.'T' .AND. DESCRA(2:2).EQ.'U') UPLO = 'U' + IF (DESCRA(1:1).EQ.'T' .AND. DESCRA(2:2).EQ.'L') UPLO = 'L' + IF (UPLO.EQ.'?') THEN + IERROR=5 + CALL PSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + END IF + IF (DESCRA(3:3).EQ.'N') DIAG = 'N' + IF (DESCRA(3:3).EQ.'U') DIAG = 'U' + IF(UNITD.EQ.'B') THEN + IERROR=5 + CALL PSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + IF (UNITD.EQ.'R') THEN + DO 40 I = 1, N + DO 20 K = 1, M + B(K,I) = B(K,I)*D(K) + 20 CONTINUE + 40 CONTINUE + END IF + + DO 60 I = 1, N + CALL DCSRSV(UPLO,TRANST,DIAG,M,A,JA,IA,B(1,I),C(1,I)) + 60 CONTINUE + IF(IERROR.NE.0) THEN + INT_VAL(1)=IERROR + CALL FCPSB_ERRPUSH(4012,NAME,INT_VAL) + GOTO 9999 + END IF + + if (debug) then + write(0,*) 'Check from DCSRSM' + do k=1,m + write(0,*) k, b(k,1),c(k,1) + enddo + endif + + IF (UNITD.EQ.'L') THEN + DO 45 I = 1, N + DO 25 K = 1, M + C(K,I) = C(K,I)*D(K) + 25 CONTINUE + 45 CONTINUE + END IF + + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + RETURN + + 9999 CONTINUE + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + + IF ( ERR_ACT .NE. 0 ) THEN + CALL FCPSB_SERROR() + RETURN + ENDIF + + RETURN + END diff --git a/src/serial/csr/dcsrsv.f b/src/serial/csr/dcsrsv.f new file mode 100644 index 00000000..18db010a --- /dev/null +++ b/src/serial/csr/dcsrsv.f @@ -0,0 +1,100 @@ + SUBROUTINE DCSRSV(UPLO,TRANS,DIAG,N,AS,JA,IA,B,X,IERROR) + DOUBLE PRECISION ZERO + PARAMETER (ZERO=0.0D0) + INTEGER N, IERROR + CHARACTER DIAG, TRANS, UPLO + DOUBLE PRECISION AS(*), B(*), X(*) + INTEGER IA(*), JA(*) + DOUBLE PRECISION ACC + INTEGER I, J, K + LOGICAL LOW, TRA, UNI + UNI = (DIAG.EQ.'U') + TRA = (TRANS.EQ.'T') + LOW = (UPLO.EQ.'L') + IF ( .NOT. TRA) THEN + IF (LOW) THEN + IF ( .NOT. UNI) THEN + DO 40 I = 1, N + ACC = ZERO + DO 20 J = IA(I), IA(I+1) - 2 + ACC = ACC + AS(J)*X(JA(J)) + 20 CONTINUE + X(I) = (B(I)-ACC)/AS(IA(I+1)-1) + 40 CONTINUE + ELSE IF (UNI) THEN + DO 80 I = 1, N + ACC = ZERO + DO 60 J = IA(I), IA(I+1) - 1 + ACC = ACC + AS(J)*X(JA(J)) + 60 CONTINUE + X(I) = B(I) - ACC + 80 CONTINUE + END IF + ELSE IF ( .NOT. LOW) THEN + IF ( .NOT. UNI) THEN + DO 120 I = N, 1, -1 + ACC = ZERO + DO 100 J = IA(I) + 1, IA(I+1) - 1 + ACC = ACC + AS(J)*X(JA(J)) + 100 CONTINUE + X(I) = (B(I)-ACC)/AS(IA(I)) + 120 CONTINUE + ELSE IF (UNI) THEN + DO 160 I = N, 1, -1 + ACC = ZERO + DO 140 J = IA(I), IA(I+1) - 1 + ACC = ACC + AS(J)*X(JA(J)) + 140 CONTINUE + X(I) = B(I) - ACC + 160 CONTINUE + END IF + END IF + ELSE IF (TRA) THEN + DO 180 I = 1, N + X(I) = B(I) + 180 CONTINUE + IF (LOW) THEN + IF ( .NOT. UNI) THEN + DO 220 I = N, 1, -1 + X(I) = X(I)/AS(IA(I+1)-1) + ACC = X(I) + DO 200 J = IA(I), IA(I+1) - 2 + K = JA(J) + X(K) = X(K) - AS(J)*ACC + 200 CONTINUE + 220 CONTINUE + ELSE IF (UNI) THEN + DO 260 I = N, 1, -1 + ACC = X(I) + DO 240 J = IA(I), IA(I+1) - 1 + K = JA(J) + X(K) = X(K) - AS(J)*ACC + 240 CONTINUE + 260 CONTINUE + END IF + ELSE IF ( .NOT. LOW) THEN + IF ( .NOT. UNI) THEN + DO 300 I = 1, N + X(I) = X(I)/AS(IA(I)) + ACC = X(I) + DO 280 J = IA(I) + 1, IA(I+1) - 1 + K = JA(J) + X(K) = X(K) - AS(J)*ACC + 280 CONTINUE + 300 CONTINUE + ELSE IF (UNI) THEN + DO 340 I = 1, N + ACC = X(I) + DO 320 J = IA(I), IA(I+1) - 1 + K = JA(J) + X(K) = X(K) - AS(J)*ACC + 320 CONTINUE + 340 CONTINUE + END IF + END IF + END IF + RETURN + END + + + diff --git a/src/serial/dp/Makefile b/src/serial/dp/Makefile new file mode 100644 index 00000000..7ae0d7eb --- /dev/null +++ b/src/serial/dp/Makefile @@ -0,0 +1,46 @@ +include ../../../../Make.inc +# +# The object files +# + +FOBJS = dcrcr.o dcrdi.o dcrel.o dcrjd.o dgblock.o partition.o \ + dgindex.o djadrp.o djadrp1.o dcsrrp.o dcsrp1.o check_dim.o \ + Max_nnzero.o dcoco.o dcocr.o dcrco.o dcrinco.o djdcox.o djdco.o dvtfg.o dgind_tri.o \ + gen_block.o dcoinco.o reordvn.o zreordvn.o\ + zcrcr.o zcsrrp.o zcsrp1.o zgindex.o zgind_tri.o zcocr.o zcrinco.o zcoco.o + +# +# dgind_tri.o +# + + +OBJS=$(FOBJS) + +# +# Where the library should go, and how it is called. +# Note that we are regenerating most of libsparker.a on the fly. +#LIBDIR=../../LIB +#LIBNAME=libsparker.a +LIBFILE=$(LIBDIR)/$(LIBNAME) +SPARKERDIR=.. +INCDIRS=-I. -I$(SPARKERDIR) -I$(LIBDIR) + +# +# No change should be needed below +# + + +lib: $(FOBJS) + $(AR) $(LIBFILE) $(OBJS) + $(RANLIB) $(LIBFILE) + +$(FOBJS): $(SPARKERDIR)/sparker.fh + +clean: cleanobjs + +veryclean: cleanobjs + +cleanobjs: + /bin/rm -f $(OBJS) + + diff --git a/src/serial/dp/Max_nnzero.f b/src/serial/dp/Max_nnzero.f new file mode 100644 index 00000000..243493c4 --- /dev/null +++ b/src/serial/dp/Max_nnzero.f @@ -0,0 +1,17 @@ + INTEGER FUNCTION MAX_NNZERO(M, IA2) + IMPLICIT NONE + + INTEGER M + INTEGER IA2(*) + + INTEGER I,MAX_NZ + + MAX_NZ = 0 + + DO I = 1, M + MAX_NZ = MAX(MAX_NZ,IA2(I+1)- IA2(I)) + ENDDO + + MAX_NNZERO = MAX_NZ + END + diff --git a/src/serial/dp/check_dim.f b/src/serial/dp/check_dim.f new file mode 100644 index 00000000..e54272a3 --- /dev/null +++ b/src/serial/dp/check_dim.f @@ -0,0 +1,57 @@ + SUBROUTINE CHECK_DIM(M, N, IA, NG, IA2, + + NZ, LARN, LIAN1, LIAN2, IERRV) + + IMPLICIT NONE + INCLUDE 'sparker.fh' + +C +C .. Scalar Arguments .. + INTEGER M,N,NG,LARN,LIAN1,LIAN2, NZ + +C .. Array Arguments .. + INTEGER IA(3,*), RES(*), IA2(*), IERRV(*) + +C Local scalars + INTEGER NNZ, BLOCK, DIM_BLOCK, LIMIT + INTEGER MAX_NNZERO, MAX_NZ + + EXTERNAL MAX_NNZERO + + MAX_NZ = MAX_NNZERO(M,IA2) + + NNZ = NZ + + LIMIT = INT(DIM_BLOCK*PERCENT) + + DO BLOCK = 1, NG + DIM_BLOCK = IA(1,BLOCK+1)-IA(1,BLOCK) + LIMIT = INT(DIM_BLOCK*PERCENT) + + NNZ = NNZ+(DIM_BLOCK-LIMIT)*MAX_NZ + END DO + + IERRV(1)=0 + IERRV(2) = NNZ + IERRV(3) = NNZ + IERRV(4) = 6+3*(NG+1)+M+MAX_NZ*NG+1 + IF (6+3*(NG+1)+M+MAX_NZ*NG+1.GT.LIAN2) THEN + IERRV(1) = 30 +c$$$ write(0,*) 'check_dim: error 1', +c$$$ + 6+3*(NG+1)+M+MAX_NZ*NG+1,LIAN2 + ENDIF + + IF (NNZ.GT.LIAN1) THEN +c$$$ write(0,*) 'check_dim: error 2',nnz,lian1 + IERRV(1) = 31 + ENDIF + + IF (NNZ.GT.LARN) THEN +c$$$ write(0,*) 'check_dim: error 3',nnz,larn + IERRV(1) = 32 + ENDIF + + RETURN + END + + + diff --git a/src/serial/dp/dcoco.f b/src/serial/dp/dcoco.f new file mode 100644 index 00000000..c5173036 --- /dev/null +++ b/src/serial/dp/dcoco.f @@ -0,0 +1,250 @@ +C Covert matrix from COO format to COO Format +C + SUBROUTINE DCOCO(TRANS,M,N,UNITD,D,DESCRA,AR,IA1,IA2,INFO, + * P1,DESCRN,ARN,IA1N,IA2N,INFON,P2,LARN,LIA1N, + * LIA2N,AUX,LAUX,IERROR) + + IMPLICIT NONE + INCLUDE 'sparker.fh' + +C .. Scalar Arguments .. + INTEGER LARN, LAUX, LIA1N, LIA2N, + + M, N, IERROR + CHARACTER TRANS,UNITD +C .. Array Arguments .. + DOUBLE PRECISION AR(*), ARN(*), D(*) + INTEGER AUX(0:LAUX-1) + INTEGER IA1(*), IA2(*), INFO(*), IA1N(*), IA2N(*), + * INFON(*), P1(*), P2(*) + CHARACTER DESCRA*11, DESCRN*11 +C .. Local Scalars .. + INTEGER IPX, IP1, IP2, CHECK_FLAG + INTEGER NNZ, K, I, J, NZL, IRET + INTEGER ELEM_IN, ELEM_OUT + LOGICAL SCALE + INTEGER MAX_NNZERO + logical debug + parameter (debug=.false.) +c .. Local Arrays .. + CHARACTER*20 NAME + INTEGER INT_VAL(5) +C +C ...Common variables... +C This flag describe the action to do + +C .. External Subroutines .. + EXTERNAL MAX_NNZERO +C .. Executable Statements .. +C + + NAME = 'DCOCO\0' + IERROR = 0 + CALL FCPSB_ERRACTIONSAVE(ERR_ACT) + + CHECK_FLAG=IBITS(info(upd_),1,2) + IF (TRANS.EQ.'N') THEN + SCALE = (UNITD.EQ.'L') ! meaningless + P1(1) = 0 + P2(1) = 0 + + NNZ = INFO(nnz_) + if (debug) then + write(*,*) 'On entry to DCOCO: NNZ LAUX ', + + nnz,laux,larn,lia1n,lia2n + endif + IF (LAUX.LT.NNZ+2) THEN + IERROR = 60 + INT_VAL(1) = 22 + INT_VAL(2) = NNZ+2 + INT_VAL(3) = LAUX + ELSE IF (LARN.LT.NNZ) THEN + IERROR = 60 + INT_VAL(1) = 18 + INT_VAL(2) = NNZ+2 + INT_VAL(3) = LAUX + ELSE IF (LIA1N.LT.NNZ) THEN + IERROR = 60 + INT_VAL(1) = 19 + INT_VAL(2) = NNZ+2 + INT_VAL(3) = LAUX + ELSE IF (LIA2N.LT.M+1) THEN + IERROR = 60 + INT_VAL(1) = 20 + INT_VAL(2) = NNZ+2 + INT_VAL(3) = LAUX + ENDIF + +C +C Error handling +C + IF(IERROR.NE.0) THEN + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + END IF + + IF (DESCRA(1:1).EQ.'G') THEN +C +C Sort COO data structure +C + if (debug) write(*,*)'First sort',nnz + do k=1, nnz + arn(k) = ar(k) + ia1n(k) = ia1(k) + ia2n(k) = ia2(k) + enddo + + if (debug) write(*,*)'Second sort' + + if ((lia2n.ge.(2*nnz+ireg_flgs+1)) + + .and.(laux.ge.2*(2+nnz))) then +C +C Prepare for smart regeneration +c + ipx = nnz+3 + do i=1, nnz + aux(ipx+i-1) = i + enddo + ip1 = nnz+2 + infon(upd_pnt_) = ip1 + ip2 = ip1+ireg_flgs + ia2n(ip1+ip2_) = ip2 + ia2n(ip1+iflag_) = check_flag + ia2n(ip1+nnzt_) = nnz + ia2n(ip1+nnz_) = 0 + ia2n(ip1+ichk_) = nnz+check_flag + if (debug) write(0,*) 'Build check :',ia2n(ip1+nnzt_) + +C .... Order with key IA1N ... + CALL MRGSRT(NNZ,IA1N,AUX,IRET) + IF (IRET.EQ.0) CALL REORDVN3(NNZ,ARN,IA1N,IA2N,AUX(IPX),AUX) +C .... Order with key IA2N ... + + I = 1 + J = I + DO WHILE (I.LE.NNZ) + DO WHILE ((IA1N(J).EQ.IA1N(I)).AND. + + (J.LE.NNZ)) + J = J+1 + ENDDO + NZL = J - I + CALL MRGSRT(NZL,IA2N(I),AUX,IRET) + IF (IRET.EQ.0) CALL REORDVN3(NZL,ARN(I),IA1N(I),IA2N(I), + + AUX(IPX+I-1),AUX) + I = J + ENDDO + + ia2n(ip2+aux(ipx+1-1)-1) = 1 + +C ... Construct final COO Representation... + ELEM_OUT = 1 +C ... Insert remaining element ... + DO ELEM_IN = 2, NNZ + IF ((IA1N(ELEM_IN).EQ.IA1N(ELEM_OUT)).AND. + + (IA2N(ELEM_IN).EQ.IA2N(ELEM_OUT))) THEN + IF (CHECK_FLAG.EQ.1) THEN +C ... Error, there are duplicated elements ... + IERROR = 130 + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ELSE IF (CHECK_FLAG.EQ.2) THEN +C ... Insert only the first duplicated element ... + ia2n(ip2+aux(ipx+elem_in-1)-1) = elem_out + ELSE IF (CHECK_FLAG.EQ.3) THEN +C ... Sum the duplicated element ... + ARN(ELEM_OUT) = ARN(ELEM_OUT) + ARN(ELEM_IN) + ia2n(ip2+aux(ipx+elem_in-1)-1) = elem_out + END IF + ELSE + ELEM_OUT = ELEM_OUT + 1 + ARN(ELEM_OUT) = ARN(ELEM_IN) + ia2n(ip2+aux(ipx+elem_in-1)-1) = elem_out + IA1N(ELEM_OUT) = IA1N(ELEM_IN) + IA2N(ELEM_OUT) = IA2N(ELEM_IN) + ENDIF + ENDDO + + ELSE + +C .... Order with key IA1N ... + CALL MRGSRT(NNZ,IA1N,AUX,IRET) + IF (IRET.EQ.0) CALL REORDVN(NNZ,ARN,IA1N,IA2N,AUX) +C .... Order with key IA2N ... + + I = 1 + J = I + DO WHILE (I.LE.NNZ) + DO WHILE ((IA1N(J).EQ.IA1N(I)).AND. + + (J.LE.NNZ)) + J = J+1 + ENDDO + NZL = J - I + CALL MRGSRT(NZL,IA2N(I),AUX,IRET) + IF (IRET.EQ.0) CALL REORDVN(NZL,ARN(I),IA1N(I),IA2N(I), + + AUX) + I = J + ENDDO +C ... Construct final COO Representation... + ELEM_OUT = 1 +C ... Insert remaining element ... + DO ELEM_IN = 2, NNZ + IF ((IA1N(ELEM_IN).EQ.IA1N(ELEM_OUT)).AND. + + (IA2N(ELEM_IN).EQ.IA2N(ELEM_OUT))) THEN + IF (CHECK_FLAG.EQ.1) THEN +C ... Error, there are duplicated elements ... + IERROR = 130 + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ELSE IF (CHECK_FLAG.EQ.2) THEN +C ... Insert only the first duplicated element ... + ELSE IF (CHECK_FLAG.EQ.3) THEN +C ... Sum the duplicated element ... + ARN(ELEM_OUT) = ARN(ELEM_OUT) + ARN(ELEM_IN) + END IF + ELSE + ELEM_OUT = ELEM_OUT + 1 + ARN(ELEM_OUT) = ARN(ELEM_IN) + IA1N(ELEM_OUT) = IA1N(ELEM_IN) + IA2N(ELEM_OUT) = IA2N(ELEM_IN) + ENDIF + ENDDO + ENDIF + INFON(nnz_) = ELEM_OUT + infon(srtd_) = isrtdcoo + + if (debug) write(*,*)'Done Rebuild COO',infon(1) + + ELSE IF (DESCRA(1:1).EQ.'S' .AND. DESCRA(2:2).EQ.'U') THEN + + DO 20 K = 1, M + P2(K) = K + 20 CONTINUE + + ELSE IF (DESCRA(1:1).EQ.'T' .AND. DESCRA(2:2).EQ.'U') THEN + + ELSE IF (DESCRA(1:1).EQ.'T' .AND. DESCRA(2:2).EQ.'L') THEN + + END IF +C + ELSE IF (TRANS.NE.'N') THEN +C +C TO DO +C + IERROR = 3021 + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + + END IF + + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + RETURN + + 9999 CONTINUE + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + + IF ( ERR_ACT .NE. 0 ) THEN + CALL FCPSB_SERROR() + RETURN + ENDIF + + RETURN + END diff --git a/src/serial/dp/dcocr.f b/src/serial/dp/dcocr.f new file mode 100644 index 00000000..fbe6be7d --- /dev/null +++ b/src/serial/dp/dcocr.f @@ -0,0 +1,525 @@ +C Covert matrix from COO format to CSR Format +C Note: this never sets IP1 and P2! +C + SUBROUTINE DCOCR(TRANS,M,N,UNITD,D,DESCRA,AR,JA,IA,INFO, + * P1,DESCRN,ARN,IAN1,IAN2,INFON,P2,LARN,LIAN1, + * LIAN2,AUX,LAUX,IERROR) + + IMPLICIT NONE + INCLUDE 'sparker.fh' + +C +C .. Scalar Arguments .. + INTEGER LARN, LAUX, LAUX2, LIAN1, LIAN2, M, + + N, IUPDUP, IERROR + CHARACTER TRANS,UNITD +C .. Array Arguments .. + DOUBLE PRECISION AR(*), ARN(*), D(*) + INTEGER AUX(0:LAUX-1) + INTEGER JA(*), IA(*), INFO(*), IAN1(*), IAN2(*), + * INFON(*), P1(*), P2(*) + CHARACTER DESCRA*11, DESCRN*11 +C .. Local Scalars .. + INTEGER NNZ, K, ROW, I, J, NZL, IRET + integer ipx, ip1, ip2, CHECK_FLAG + INTEGER ELEM, ELEM_CSR + LOGICAL SCALE + INTEGER MAX_NNZERO + logical debug + parameter (debug=.false.) +c .. Local Arrays .. + CHARACTER*20 NAME + INTEGER INT_VAL(5) + +C +C ...Common variables... + +C .. External Subroutines .. + EXTERNAL MAX_NNZERO +C .. Executable Statements .. +C + + NAME = 'DCOCR\0' + IERROR = 0 + CALL FCPSB_ERRACTIONSAVE(ERR_ACT) + + CHECK_FLAG=IBITS(INFO(UPD_),1,2) +c$$$ write(0,*) 'DCOCR FLAG ',info(upd_),check_flag + IF (TRANS.EQ.'N') THEN + IERRV(1) = 0 + SCALE = (UNITD.EQ.'L') ! meaningless + P1(1) = 0 + P2(1) = 0 + + NNZ = INFO(1) + if (debug) then + write(0,*) 'On entry to DCOCR: NNZ LAUX ', + + nnz,laux,larn,lian1,lian2 + endif + IF (LAUX.LT.NNZ+2) THEN + IERROR = 60 + INT_VAL(1) = 22 + INT_VAL(2) = NNZ+2 + INT_VAL(3) = LAUX + ELSE IF (LARN.LT.NNZ) THEN + IERROR = 60 + INT_VAL(1) = 18 + INT_VAL(2) = NNZ+2 + INT_VAL(3) = LAUX + ELSE IF (LIA1N.LT.NNZ) THEN + IERROR = 60 + INT_VAL(1) = 19 + INT_VAL(2) = NNZ+2 + INT_VAL(3) = LAUX + ELSE IF (LIA2N.LT.M+1) THEN + IERROR = 60 + INT_VAL(1) = 20 + INT_VAL(2) = NNZ+2 + INT_VAL(3) = LAUX + ENDIF + +C +C Error handling +C + IF(IERROR.NE.0) THEN + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + END IF + + IF (DESCRA(1:1).EQ.'G') THEN +C +C Sort COO data structure +C + if (debug) write(0,*)'First sort',nnz +c$$$ if (debug) then +c$$$ do k=1,nnz +c$$$ write(*,*) k,ia(k),ja(k),ar(k) +c$$$ enddo +c$$$ endif + if ((lian2.ge.((m+1)+nnz+ireg_flgs+1)) + + .and.(laux.ge.2*(2+nnz))) then +C +C Prepare for smart regeneration +c + + ipx = nnz+3 + do i=1, nnz + aux(ipx+i-1) = i + enddo + ip1 = m+2 + infon(upd_pnt_) = ip1 + ip2 = ip1+ireg_flgs + ian2(ip1+ip2_) = ip2 + ian2(ip1+iflag_) = check_flag + ian2(ip1+nnzt_) = nnz + ian2(ip1+nnz_) = 0 + ian2(ip1+ichk_) = nnz+check_flag + +c$$$ write(0,*)'DCOCR m,ip1,ip2,nnz',m, +c$$$ + ip1,ip2,nnz,ian2(ip1+nnzt_) + + if (debug) write(0,*) 'Build check :',ian2(ip1+nnzt_) +C .... Order with key IA ... + CALL MRGSRT(NNZ,IA,AUX,IRET) + IF (IRET.EQ.0) CALL REORDVN3(NNZ,AR,IA,JA,AUX(IPX),AUX) + if (debug) then + do i=1, nnz-1 + if (ia(i).gt.ia(i+1)) then + write(0,*) 'Sorting error:',i,ia(i),ia(i+1) + endif + enddo + write(0,*) 'nnz :',m,nnz,ia(nnz),ja(nnz) + endif + +C .... Order with key IA2N ... + + I = 1 + J = I +c$$$ DO WHILE (I.LE.NNZ) +c$$$ DO WHILE ((IA(J).EQ.IA(I)).AND. +c$$$ + (J.LE.NNZ)) + DO + if (I>NNZ) exit + DO + if (j>nnz) exit + if (ia(j) /= ia(i)) exit + J = J+1 + ENDDO + NZL = J - I + CALL MRGSRT(NZL,JA(I),AUX,IRET) + IF (IRET.EQ.0) CALL REORDVN3(NZL,AR(I),IA(I),JA(I), + + AUX(IPX+I-1),AUX) + I = J + ENDDO + + + + + +C ... Construct CSR Representation... + ELEM = 1 + ELEM_CSR = 1 +C ... Insert first element ... + DO ROW = 1, IA(1) + IAN2(ROW) = 1 + ENDDO + if (debug) write(0,*)'Rebuild CSR',ia(1),elem_csr + IAN1(ELEM_CSR) = JA(ELEM) + ARN(ELEM_CSR) = AR(ELEM) + ian2(ip2+aux(ipx+elem-1)-1) = elem_csr + ELEM = ELEM+1 + ELEM_CSR = ELEM_CSR+1 +C ... Insert remaining element ... + DO ROW = IA(1), M +c$$$ if (debug) write(*,*)'CSR Loop:',row,m,elem_csr +c$$$ DO WHILE ((IA(ELEM).EQ.ROW).AND.(ELEM.LE.NNZ)) + DO + if (elem > nnz) exit + if (ia(elem) /= row) exit + IF (IA(ELEM).NE.IA(ELEM-1)) THEN +C ... Insert first element of a row ... + IAN1(ELEM_CSR) = JA(ELEM) + ARN(ELEM_CSR) = AR(ELEM) + ian2(ip2+aux(ipx+elem-1)-1) = elem_csr + ELEM_CSR = ELEM_CSR+1 + ELSE IF (JA(ELEM).NE.JA(ELEM-1)) THEN +C ... Insert other element of row ... + IAN1(ELEM_CSR) = JA(ELEM) + ARN(ELEM_CSR) = AR(ELEM) + ian2(ip2+aux(ipx+elem-1)-1) = elem_csr + ELEM_CSR = ELEM_CSR+1 + ELSE + IF (CHECK_FLAG.EQ.1) THEN +C ... Error, there are duplicated elements ... + IERROR = 130 + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ELSE IF (CHECK_FLAG.EQ.2) THEN +C ... Insert only the last duplicated element ... + ARN(ELEM_CSR-1) = AR(ELEM) + ian2(ip2+aux(ipx+elem-1)-1) = elem_csr-1 + ELSE IF (CHECK_FLAG.EQ.3) THEN +C ... Sum the duplicated element ... + ARN(ELEM_CSR-1) = ARN(ELEM_CSR-1) + AR(ELEM) + ian2(ip2+aux(ipx+elem-1)-1) = elem_csr-1 + END IF + ENDIF + ELEM = ELEM + 1 + ENDDO + IAN2(ROW+1) = ELEM_CSR + ENDDO + ELSE +C .... Order with key IA ... + CALL MRGSRT(NNZ,IA,AUX,IRET) + IF (IRET.EQ.0) CALL REORDVN(NNZ,AR,IA,JA,AUX) +C .... Order with key IA2N ... + I = 1 + J = I +c$$$ DO WHILE (I.LE.NNZ) +c$$$ DO WHILE ((IA(J).EQ.IA(I)).AND. +c$$$ + (J.LE.NNZ)) + DO + if (I>NNZ) exit + DO + if (j>nnz) exit + if (ia(j) /= ia(i)) exit + J = J+1 + ENDDO + NZL = J - I + CALL MRGSRT(NZL,JA(I),AUX,IRET) + IF (IRET.EQ.0) CALL REORDVN(NZL,AR(I),IA(I),JA(I),AUX) + I = J + ENDDO + + + +C ... Construct CSR Representation... + ELEM = 1 + ELEM_CSR = 1 +C ... Insert first element ... + DO ROW = 1, IA(1) + IAN2(ROW) = 1 + ENDDO + if (debug) write(0,*)'Rebuild CSR',ia(1),elem_csr + IAN1(ELEM_CSR) = JA(ELEM) + ARN(ELEM_CSR) = AR(ELEM) + ELEM = ELEM+1 + ELEM_CSR = ELEM_CSR+1 +C ... Insert remaining element ... + DO ROW = IA(1), M +c$$$ if (debug) write(*,*)'CSR Loop:',row,m,elem_csr +c$$$ DO WHILE ((IA(ELEM).EQ.ROW).AND.(ELEM.LE.NNZ)) + DO + if (elem > nnz) exit + if (ia(elem) /= row) exit + IF (IA(ELEM).NE.IA(ELEM-1)) THEN +C ... Insert first element of a row ... + IAN1(ELEM_CSR) = JA(ELEM) + ARN(ELEM_CSR) = AR(ELEM) + ELEM_CSR = ELEM_CSR+1 + ELSE IF (JA(ELEM).NE.JA(ELEM-1)) THEN +C ... Insert other element of row ... + IAN1(ELEM_CSR) = JA(ELEM) + ARN(ELEM_CSR) = AR(ELEM) + ELEM_CSR = ELEM_CSR+1 + ELSE + IF (CHECK_FLAG.EQ.1) THEN +C ... Error, there are duplicated elements ... + IERROR = 130 + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ELSE IF (CHECK_FLAG.EQ.2) THEN +C ... Insert only the last duplicated element ... + ARN(ELEM_CSR-1) = AR(ELEM) + if (debug) write(0,*) 'Duplicated overwrite', + + elem_csr-1,elem + ELSE IF (CHECK_FLAG.EQ.3) THEN +C ... Sum the duplicated element ... + ARN(ELEM_CSR-1) = ARN(ELEM_CSR-1) + AR(ELEM) + if (debug) write(0,*) 'Duplicated add', + + elem_csr-1,elem + END IF + ENDIF + ELEM = ELEM + 1 + ENDDO + IAN2(ROW+1) = ELEM_CSR + ENDDO + ENDIF + + if (debug) write(0,*)'Done Rebuild CSR', + + ian2(m+1),ia(elem) + if (debug) then + do i=ian2(m+1), nnz + write(0,*) 'Overflow check :',ia(i),ja(i),ar(i) + enddo + endif + + ELSE IF (DESCRA(1:1).EQ.'S' .AND. DESCRA(2:2).EQ.'U') THEN + + DO 20 K = 1, M + P2(K) = K + 20 CONTINUE + + ELSE IF (DESCRA(1:1).EQ.'T' .AND. DESCRA(2:2).EQ.'U') THEN + +C .... Order with key IA ... + CALL MRGSRT(NNZ,IA,AUX,IRET) + IF (IRET.EQ.0) CALL REORDVN(NNZ,AR,IA,JA,AUX) +C .... Order with key IA2N ... + I = 1 + J = I +c$$$ DO WHILE (I.LE.NNZ) +c$$$ DO WHILE ((IA(J).EQ.IA(I)).AND. +c$$$ + (J.LE.NNZ)) + DO + if (I>NNZ) exit + DO + if (j>nnz) exit + if (ia(j) /= ia(i)) exit + J = J+1 + ENDDO + NZL = J - I + CALL MRGSRT(NZL,JA(I),AUX,IRET) + IF (IRET.EQ.0) CALL REORDVN(NZL,AR(I),IA(I),JA(I),AUX) + I = J + ENDDO + + +C ... Construct CSR Representation... + ELEM = 1 + ELEM_CSR = 1 +C ... Insert first element ... + DO ROW = 1, IA(1) + IAN2(ROW) = 1 + ENDDO + if (debug) write(0,*)'Rebuild CSR',ia(1),elem_csr + IF(JA(ELEM).GT.IA(ELEM)) THEN + IAN1(ELEM_CSR) = JA(ELEM) + ARN(ELEM_CSR) = AR(ELEM) + ELEM_CSR = ELEM_CSR+1 + ENDIF + + ELEM = ELEM+1 + +C ... Insert remaining element ... + DO ROW = IA(1), M +c$$$ if (debug) write(*,*)'CSR Loop:',row,m,elem_csr +c$$$ DO WHILE ((IA(ELEM).EQ.ROW).AND.(ELEM.LE.NNZ)) + DO + if (elem > nnz) exit + if (ia(elem) /= row) exit + IF (IA(ELEM).NE.IA(ELEM-1)) THEN +C ... Insert first element of a row ... + IF(JA(ELEM).GT.IA(ELEM)) THEN + IAN1(ELEM_CSR) = JA(ELEM) + ARN(ELEM_CSR) = AR(ELEM) + ELEM_CSR = ELEM_CSR+1 + ENDIF + ELSE IF (JA(ELEM).NE.JA(ELEM-1)) THEN +C ... Insert other element of row ... + IF(JA(ELEM).GT.IA(ELEM)) THEN + IAN1(ELEM_CSR) = JA(ELEM) + ARN(ELEM_CSR) = AR(ELEM) + ELEM_CSR = ELEM_CSR+1 + ENDIF + ELSE + IF (CHECK_FLAG.EQ.1) THEN +C ... Error, there are duplicated elements ... + IERROR = 130 + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ELSE IF (CHECK_FLAG.EQ.2) THEN +C ... Insert only the last duplicated element ... + IF(JA(ELEM).GT.IA(ELEM)) THEN + ARN(ELEM_CSR-1) = AR(ELEM) + ENDIF + if (debug) write(0,*) 'Duplicated overwrite', + + elem_csr-1,elem + ELSE IF (CHECK_FLAG.EQ.3) THEN +C ... Sum the duplicated element ... + IF(JA(ELEM).GT.IA(ELEM)) THEN + ARN(ELEM_CSR-1) = ARN(ELEM_CSR-1) + AR(ELEM) + ENDIF + if (debug) write(0,*) 'Duplicated add', + + elem_csr-1,elem + END IF + ENDIF + ELEM = ELEM + 1 + ENDDO + IAN2(ROW+1) = ELEM_CSR + ENDDO + + + if (debug) write(0,*)'Done Rebuild CSR', + + ian2(m+1),ia(elem) + if (debug) then + do i=ian2(m+1), nnz + write(0,*) 'Overflow check :',ia(i),ja(i),ar(i) + enddo + endif + + + + ELSE IF (DESCRA(1:1).EQ.'T' .AND. DESCRA(2:2).EQ.'L') THEN + +C .... Order with key IA ... + CALL MRGSRT(NNZ,IA,AUX,IRET) + IF (IRET.EQ.0) CALL REORDVN(NNZ,AR,IA,JA,AUX) +C .... Order with key IA2N ... + I = 1 + J = I +c$$$ DO WHILE (I.LE.NNZ) +c$$$ DO WHILE ((IA(J).EQ.IA(I)).AND. +c$$$ + (J.LE.NNZ)) + DO + if (I>NNZ) exit + DO + if (j>nnz) exit + if (ia(j) /= ia(i)) exit + J = J+1 + ENDDO + NZL = J - I + CALL MRGSRT(NZL,JA(I),AUX,IRET) + IF (IRET.EQ.0) CALL REORDVN(NZL,AR(I),IA(I),JA(I),AUX) + I = J + ENDDO + +C ... Construct CSR Representation... + ELEM = 1 + ELEM_CSR = 1 +C ... Insert first element ... + DO ROW = 1, IA(1) + IAN2(ROW) = 1 + ENDDO + if (debug) write(0,*)'Rebuild CSR',ia(1),elem_csr + IF(JA(ELEM).LT.IA(ELEM)) THEN + IAN1(ELEM_CSR) = JA(ELEM) + ARN(ELEM_CSR) = AR(ELEM) + ELEM_CSR = ELEM_CSR+1 + ENDIF + ELEM = ELEM+1 + +C ... Insert remaining element ... + DO ROW = IA(1), M +c$$$ if (debug) write(*,*)'CSR Loop:',row,m,elem_csr +c$$$ DO WHILE ((IA(ELEM).EQ.ROW).AND.(ELEM.LE.NNZ)) + DO + if (elem > nnz) exit + if (ia(elem) /= row) exit + IF (IA(ELEM).NE.IA(ELEM-1)) THEN +C ... Insert first element of a row ... + IF(JA(ELEM).LT.IA(ELEM)) THEN + IAN1(ELEM_CSR) = JA(ELEM) + ARN(ELEM_CSR) = AR(ELEM) + ELEM_CSR = ELEM_CSR+1 + ENDIF + ELSE IF (JA(ELEM).NE.JA(ELEM-1)) THEN +C ... Insert other element of row ... + IF(JA(ELEM).LT.IA(ELEM)) THEN + IAN1(ELEM_CSR) = JA(ELEM) + ARN(ELEM_CSR) = AR(ELEM) + ELEM_CSR = ELEM_CSR+1 + ENDIF + ELSE + IF (CHECK_FLAG.EQ.1) THEN +C ... Error, there are duplicated elements ... + IERROR = 130 + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ELSE IF (CHECK_FLAG.EQ.2) THEN +C ... Insert only the last duplicated element ... + IF(JA(ELEM).LT.IA(ELEM)) THEN + ARN(ELEM_CSR-1) = AR(ELEM) + ENDIF + if (debug) write(0,*) 'Duplicated overwrite', + + elem_csr-1,elem + ELSE IF (CHECK_FLAG.EQ.3) THEN +C ... Sum the duplicated element ... + IF(JA(ELEM).LT.IA(ELEM)) THEN + ARN(ELEM_CSR-1) = ARN(ELEM_CSR-1) + AR(ELEM) + ENDIF + if (debug) write(0,*) 'Duplicated add', + + elem_csr-1,elem + END IF + ENDIF + ELEM = ELEM + 1 + ENDDO + IAN2(ROW+1) = ELEM_CSR + ENDDO + + + if (debug) write(0,*)'Done Rebuild CSR', + + ian2(m+1),ia(elem) + if (debug) then + do i=ian2(m+1), nnz + write(0,*) 'Overflow check :',ia(i),ja(i),ar(i) + enddo + endif + + + END IF +C + ELSE IF (TRANS.NE.'N') THEN +C +C TO DO +C + IERROR = 3021 + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + + END IF + INFON(1)=ELEM_CSR-1 + + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + RETURN + + 9999 CONTINUE + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + + IF ( ERR_ACT .NE. 0 ) THEN + CALL FCPSB_SERROR() + RETURN + ENDIF + + RETURN + END diff --git a/src/serial/dp/dcoinco.f b/src/serial/dp/dcoinco.f new file mode 100644 index 00000000..0ecff660 --- /dev/null +++ b/src/serial/dp/dcoinco.f @@ -0,0 +1,82 @@ + SUBROUTINE DCOINCO(M,N,DESCRA,A,IA1,IA2, + + INFOA,IA,JA,LATOT,LIA1TOT,LIA2TOT, + + DESCRH,H,IH1,IH2,INFOH,IH,JH,WORK,LWORK,IERROR) + + IMPLICIT NONE + INCLUDE 'sparker.fh' +C .. Scalar Arguments .. + INTEGER LWORK, M, N, IERROR + INTEGER LATOT,LIA1TOT,LIA2TOT,IA,JA,IH,JH +C .. Array Arguments .. + DOUBLE PRECISION A(*), H(*), WORK(LWORK) + INTEGER IA1(*), IA2(*), IH1(*), IH2(*), + + INFOA(*), INFOH(*) + CHARACTER DESCRA*11, DESCRH*11 + +C .. Local scalars .. + INTEGER I, J, NZH, nza +c .. Local Arrays .. + CHARACTER*20 NAME + INTEGER INT_VAL(5) + + NAME = 'DCOINCO\0' + IERROR = 0 + CALL FCPSB_ERRACTIONSAVE(ERR_ACT) + + NZH = INFOH(NNZ_) + NZA = INFOA(NNZ_) + + if ((nza+nzh).le.min(latot,lia1tot,lia2tot)) then +C +C In this case we are (hopefully) safe +C + +C Insert Element in COO Format + DO J = 1, NZH + IF ((IH1(J).GE.IH).AND.(IH1(J).LT.IH+M).AND. + $ (IH2(J).GE.JH).AND.(IH2(J).LT.JH+N)) THEN +C If current element belongs to submatrix to insert + nza = nza + 1 + A(nza) = H(J) + IA1(nza) = IH1(J)+IA-IH + IA2(nza) = IH2(J)+JA-JH + ENDIF + ENDDO + + else + +C +C Most likely will have to give up, but try anyway +C +C Insert Element in COO Format + DO J = 1, NZH + IF ((IH1(J).GE.IH).AND.(IH1(J).LT.IH+M).AND. + $ (IH2(J).GE.JH).AND.(IH2(J).LT.JH+N)) THEN +C If current element belongs to submatrix to insert + nza = nza + 1 + + IF ((nza.le.LATOT) .and.(nza.le.LIA1TOT) + + .and.(nza.le.LIA2TOT)) THEN + A(nza) = H(J) + IA1(nza) = IH1(J)+IA-IH + IA2(nza) = IH2(J)+JA-JH + else + IF (nza.GT.LATOT) THEN + IERRV(1) = 10 + IERRV(2) = nza + ELSE IF (nza.GT.LIA1TOT) THEN + IERRV(1) = 20 + IERRV(2) = nza + ELSE IF (nza.GT.LIA2TOT) THEN + IERRV(1) = 30 + IERRV(2) = nza + ENDIF + RETURN + endif + ENDIF + ENDDO + endif + infoa(nnz_) = nza + + return + END diff --git a/src/serial/dp/dcrco.f b/src/serial/dp/dcrco.f new file mode 100644 index 00000000..fe0f4a68 --- /dev/null +++ b/src/serial/dp/dcrco.f @@ -0,0 +1,128 @@ + SUBROUTINE DCRCO(TRANS,M,N,UNITD,D,DESCRA,AR,IA1,IA2,INFO, + * IP1,DESCRN,ARN,IAN1,IAN2,INFON,IP2,LARN,LIAN1, + * LIAN2,AUX,LAUX,IERROR) + + IMPLICIT NONE + INCLUDE 'sparker.fh' + +C +C .. Scalar Arguments .. + INTEGER LARN, LAUX, LIAN1, LIAN2, M, N, IERROR + CHARACTER TRANS,UNITD +C .. Array Arguments .. + DOUBLE PRECISION AR(*), ARN(*), D(*), AUX(LAUX) + INTEGER IA1(*), IA2(*), INFO(*), IAN1(*), IAN2(*), + * INFON(*), IP1(*), IP2(*) + CHARACTER DESCRA*11, DESCRN*11 +C .. Local Scalars .. + INTEGER NNZ, K, ROW, J + INTEGER ELEM + LOGICAL SCALE + INTEGER MAX_NNZERO +c .. Local Arrays .. + CHARACTER*20 NAME + INTEGER INT_VAL(5) + +C .. External Subroutines .. + EXTERNAL MAX_NNZERO +C .. Executable Statements .. +C + + NAME = 'DCRCO\0' + IERROR = 0 + CALL FCPSB_ERRACTIONSAVE(ERR_ACT) + + IF (TRANS.EQ.'N') THEN + SCALE = (UNITD.EQ.'L') ! meaningless + IP1(1) = 0 + IP2(1) = 0 + NNZ = IA2(M+1)-1 + IF (LARN.LT.NNZ) THEN + IERROR = 60 + INT_VAL(1) = 18 + INT_VAL(2) = NNZ + INT_VAL(3) = LARN + ELSE IF (LIAN1.LT.NNZ) THEN + IERROR = 60 + INT_VAL(1) = 19 + INT_VAL(2) = NNZ + INT_VAL(3) = LIAN1 + ELSE IF (LIAN2.LT.NNZ) THEN + IERROR = 60 + INT_VAL(1) = 20 + INT_VAL(2) = NNZ + INT_VAL(3) = LIAN2 + ENDIF + + IF(IERROR.NE.0) THEN + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + END IF + + IF (DESCRA(1:1).EQ.'G') THEN +C ... Construct COO Representation... + ELEM = 1 + + DO ROW = 1, M + DO J = IA2(ROW), IA2(ROW+1)-1 + IAN1(ELEM) = ROW + IAN2(ELEM) = IA1(J) + ARN(ELEM) = AR(J) + ELEM = ELEM + 1 + ENDDO + ENDDO + INFON(1) = IA2(M+1)-1 + ELSE IF (DESCRA(1:1).EQ.'S' .AND. DESCRA(2:2).EQ.'U') THEN + + DO 20 K = 1, M + IP2(K) = K + 20 CONTINUE + +c$$$ CALL DVSSG(M,IA1,IA2,IP2,IAN2(PNG),IP1,IP2,AUX(IWLEN), +c$$$ * AUX(IWORK1)) +c$$$ CALL DVSMR(M,AR,IA1,IA2,IAN2(PNG),AUX(IWLEN),IP1,IP2, +c$$$ * IAN2(PIA),IAN2(PJA),IAN1,ARN,AUX(IWORK1), +c$$$ * AUX(IWORK2),NJA,IER,SCALE) +C + ELSE IF (DESCRA(1:1).EQ.'T' .AND. DESCRA(2:2).EQ.'U') THEN + +c$$$ CALL DVTFG('U',M,IA1,IA2,IAN2(PNG),IP1,IP2,AUX(IWLEN), +c$$$c * AUX(IWORK1),AUX(IWORK2),IAN1(M+1)) +c$$$ * AUX(IWORK1),IAN1(1),IAN1(M+5)) +c$$$ CALL DVTMR(M,AR,IA1,IA2,ISTROW,IAN2(PNG),AUX(IWLEN),IP1,IP2, +c$$$ * IAN2(PIA),IAN2(PJA),IAN1,ARN,NJA,IER,SCALE) +C + + ELSE IF (DESCRA(1:1).EQ.'T' .AND. DESCRA(2:2).EQ.'L') THEN + +c$$$ CALL DVTFG('L',M,IA1,IA2,IAN2(PNG),IP1,IP2,AUX(IWLEN), +c$$$c * AUX(IWORK1),AUX(IWORK2),IAN1(M+1)) +c$$$ * AUX(IWORK1),IAN1(1),IAN1(M+5)) +c$$$ CALL DVTMR(M,AR,IA1,IA2,ISTROW,IAN2(PNG),AUX(IWLEN),IP1,IP2, +c$$$ * IAN2(PIA),IAN2(PJA),IAN1,ARN,NJA,IER,SCALE) + + END IF +C + ELSE IF (TRANS.NE.'N') THEN +C +C TO DO +C + IERROR = 3021 + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + + END IF + + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + RETURN + + 9999 CONTINUE + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + + IF ( ERR_ACT .NE. 0 ) THEN + CALL FCPSB_SERROR() + RETURN + ENDIF + + RETURN + END diff --git a/src/serial/dp/dcrcr.f b/src/serial/dp/dcrcr.f new file mode 100644 index 00000000..29d99fa5 --- /dev/null +++ b/src/serial/dp/dcrcr.f @@ -0,0 +1,280 @@ +C SUBROUTINE DCRCR(TRANS,M,N,UNITD,D,DESCRA,A,IA1,IA2,INFOA,IP1, +C DESCRN,AN,IAN1,IAN2,INFON,IP2,LAN,LIAN1,LIAN2, +C WORK,LWORK,IERROR) +C +C Purpose: CSR to CSR format conversion +C ======= +C +C Parameter: +C ========= +C +C TRANS - CHARACTER*1 +C On entry TRANS specifies whether the routine will convert +C matrix A or the transpose of A as follows: +C TRANS = 'N' -> convert matrix A +C TRANS = 'T' or 'C' -> convert A' (the transpose of A) +C Unchanged on exit. +C +C M - INTEGER +C On entry: number of rows of matrix A (A') +C and number of rows of matrix H +C Unchanged on exit. +C +C N - INTEGER +C On entry: number of columns of matrix A (A') +C and number of columns of matrix H +C Unchanged on exit. +C +C UNITD - CHARACTER*1 +C On entry UNITD specifies whether the diagonal matrix is unit +C or whether row or column scaling has to be performed, as follows: +C UNITD = 'U' -> unit matrix (no scaling) +C UNITD = 'L' -> scale on the left (row scaling) +C UNITD = 'R' -> scale on the right (column scaling) +C UNITD = 'B' -> scale on the right and on the left +C with D^1/2 +C Unchanged on exit. +C +C D - DOUBLE PRECISION array of dimension (M) +C On entry D specifies the main diagonal of the matrix used +C for scaling. +C Unchanged on exit. +C +C DESCRA - CHARACTER*1 array of DIMENSION (9) +C On entry DESCRA describes the characteristics of the input +C sparse matrix. +C Unchanged on exit. +C +C A - DOUBLE PRECISION array of DIMENSION (*) +C On entry A specifies the values of the input sparse +C matrix. +C Unchanged on exit. +C +C IA1 - INTEGER array of dimension (*) +C On entry IA1 holds integer information on input sparse +C matrix. Actual information will depend on data format used. +C Unchanged on exit. +C +C IA2 - INTEGER array of dimension (*) +C On entry IA2 holds integer information on input sparse +C matrix. Actual information will depend on data format used. +C Unchanged on exit. +C +C INFOA - INTEGER array of dimension (10) +C On entry can hold auxiliary information on input matrices +C formats or environment of subsequent calls. +C Might be changed on exit. +C +C IP1 - INTEGER array of dimension (M) +C On exit IP1 specifies the row permutation of matrix AN +C (IP1(1) == 0 if no permutation). +C +C DESCRN - CHARACTER*1 array of DIMENSION (9) +C On exit DESCRN describes the characteristics of the input +C sparse matrix. +C Unchanged on exit. +C +C AN - DOUBLE PRECISION array of DIMENSION (LAN) +C On exit AN specifies the values of the output sparse +C matrix. If LAN=0, INT(AN(1)) is the minimum value for LAN +C satisfying DSPDP memory requirements. +C +C IAN1 - INTEGER array of dimension (LIAN1) +C On exit IAN1 holds integer information on output sparse +C matrix. Actual information will depend on data format used. +C If LIAN1=0, INT(IAN1(1)) is the minimum value for LIAN1 +C satisfying DSPDP memory requirements. +C +C IAN2 - INTEGER array of dimension (LIAN2) +C On exit IAN2 holds integer information on output sparse +C matrix. Actual information will depend on data format used. +C If LIAN2=0, INT(IAN2(1)) is the minimum value for LIAN2 +C satisfying DSPDP memory requirements. +C +C INFON - INTEGER array of dimension (10) +C On exit can hold auxiliary information on output matrices +C formats or environment of subsequent calls. +C +C IP2 - INTEGER array of dimension (M) +C On exit IP2 specifies the column permutation of matrix AN +C (IP2(1) == 0 if no permutation). +C +C LAN - INTEGER +C On entry LAN specifies the dimension of AN +C LAN must satisfy memory required from the new data structure. +C Unchanged on exit. +C +C LIAN1 - INTEGER +C On entry LH1 specifies the dimension of IAN1 +C LH1 must satisfy memory required from the new data structure. +C Unchanged on exit. +C +C LIAN2 - INTEGER +C On entry LIAN2 specifies the dimension of IAN2 +C LIAN2 must satisfy memory required from the new data structure. +C Unchanged on exit. +C +C WORK - DOUBLE PRECISION array of dimension (LWORK) +C On entry: work area. +C On exit INT(WORK(1)) contains the minimum value +C for LWORK satisfying DSPDP memory requirements. +C +C LWORK - INTEGER +C On entry LWORK specifies the dimension of WORK +C LWORK must satisfy memory necessary for the data conversion. +C Unchanged on exit. +C +C IERROR - INTEGER +C On exit IERROR contains the value of error flag as follows: +C IERROR = 0 no error +C IERROR > 0 error +C +C + SUBROUTINE DCRCR(TRANS,M,N,UNITD,D,DESCRA,A,IA1,IA2,INFOA,IP1, + * DESCRN,AN,IAN1,IAN2,INFON,IP2,LAN,LIAN1,LIAN2, + * WORK,LWORK,IERROR) + IMPLICIT NONE +C +C .. Scalar Arguments .. + INTEGER M, N, LAN, LIAN1, LIAN2, LWORK, IERROR + CHARACTER TRANS, UNITD +C .. Array Arguments .. + DOUBLE PRECISION A(*), AN(*), D(*), WORK(LWORK) + INTEGER IA1(*), IA2(*), IAN1(*), IAN2(*), IP1(*), IP2(*), + * INFOA(*), INFON(*) + CHARACTER DESCRA*11, DESCRN*11 +C .. Local Scalars .. + INTEGER I, J + LOGICAL EXIT +c .. Local Arrays .. + CHARACTER*20 NAME + INTEGER INT_VAL(5) + +C .. Intrinsic Functions .. + INTRINSIC DBLE, DSQRT + +C .. Executable Statements .. +C + EXIT=.FALSE. + NAME = 'DCOCO\0' + IERROR = 0 + CALL FCPSB_ERRACTIONSAVE(ERR_ACT) +C +C Check for argument errors +C + IF(((DESCRA(1:1) .EQ. 'S' .OR. DESCRA(1:1) .EQ. 'H' .OR. + & DESCRA(1:1) .EQ. 'A') .AND. (UNITD .NE. 'B')) .OR. + & (.NOT.((DESCRA(3:3).EQ.'N').OR.(DESCRA(3:3).EQ.'L').OR. + + (DESCRA(3:3).EQ.'U'))) .OR. + + TRANS.NE.'N') THEN + IERROR = 20 + ENDIF + IF(LAN.LT.(IA2(M+1)-1)) THEN + IF (LAN.LE.0) THEN + EXIT=.TRUE. + AN(1) = DBLE(IA2(M+1)-1) + ELSE + IERROR = 21 + ENDIF + ENDIF + IF(LIAN1.LT.(IA2(M+1)-1)) THEN + IF (LAN.LE.0) THEN + EXIT=.TRUE. + IAN1(1) = IA2(M+1)-1 + ELSE + IERROR = 22 + ENDIF + ENDIF + IF(LIAN2.LT.(M+1)) THEN + IF (LAN.LE.0) THEN + EXIT=.TRUE. + IAN2(1) = M+1 + ELSE + IERROR = 23 + ENDIF + ENDIF + IF ((DESCRA(1:1) .EQ. 'S' .OR. DESCRA(1:1) .EQ. 'H' .OR. + & DESCRA(1:1) .EQ. 'A') .AND. (UNITD .EQ. 'B')) THEN + IF (LWORK.LT.M) THEN + IF (LWORK.LE.0) THEN + EXIT=.TRUE. + ELSE + IERROR = 25 + ENDIF + WORK(1) = DBLE(M) + ENDIF + ELSE + IF (LWORK.LT.0) THEN + WORK(1) = 0.D0 + ENDIF + ENDIF +C +C Error handling +C + IF(IERROR.NE.0) THEN + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + END IF + + IF (EXIT) goto 9998 +C +C Set DESCRN, IP1, IP2 +C + DESCRN(1:3) = DESCRA(1:3) + + IP1(1)=0 + IP2(1)=0 +C +C Compute output matrix +C + DO 20 I = 1, M+1 + IAN2(I) = IA2(I) + 20 CONTINUE + IF ((DESCRA(1:1) .EQ. 'S' .OR. DESCRA(1:1) .EQ. 'H' .OR. + & DESCRA(1:1) .EQ. 'A') .AND. (UNITD .EQ. 'B')) THEN + DO 30 I = 1, M + WORK(I) = DSQRT(D(I)) + 30 CONTINUE + DO 40 I = 1, M + DO 50 J = IA2(I), IA2(I+1)-1 + AN(J) = WORK(I) * A(J) * WORK(IA1(J)) + IAN1(J) = IA1(J) + 50 CONTINUE + 40 CONTINUE + ELSE IF (UNITD .EQ. 'L') THEN + DO 60 I = 1, M + DO 70 J = IA2(I), IA2(I+1)-1 + AN(J) = D(I) * A(J) + IAN1(J) = IA1(J) + 70 CONTINUE + 60 CONTINUE + ELSE IF (UNITD .EQ. 'R') THEN + DO 80 I = 1, M + DO 90 J = IA2(I), IA2(I+1)-1 + AN(J) = A(J) * D(IA1(J)) + IAN1(J) = IA1(J) + 90 CONTINUE + 80 CONTINUE + ELSE IF (UNITD .EQ. 'U') THEN + DO 100 J = 1, IA2(M+1)-1 + AN(J) = A(J) + IAN1(J) = IA1(J) + 100 CONTINUE + ENDIF + + 9998 CONTINUE + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + RETURN + + 9999 CONTINUE + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + + IF ( ERR_ACT .NE. 0 ) THEN + CALL FCPSB_SERROR() + RETURN + ENDIF + + RETURN + END + + diff --git a/src/serial/dp/dcrdi.f b/src/serial/dp/dcrdi.f new file mode 100644 index 00000000..c6c25120 --- /dev/null +++ b/src/serial/dp/dcrdi.f @@ -0,0 +1,222 @@ +C SUBROUTINE DCRDI(TRANS,M,N,DESCRA,A,IA1,IA2,IP1,DESCRN, +C AN,IAN1,IAN2,IP2,LAN,LIAN1,LIAN2, +C IAUX,LIAUX,IERRV) +C +C Purpose: CSR to DIA format conversion +C ======= +C +C Parameter: +C ========= +C +C TRANS - CHARACTER*1 +C On entry TRANS specifies whether A needs to be transposed +C Unchanged on exit. +C +C M - INTEGER +C On entry M specifies the number of rows of the matrix A. +C M must be greater than zero. +C Unchanged on exit. +C +C N - INTEGER +C On entry M specifies the number of columns of the matrix A. +C N must be equal to M (for the time being). +C Unchanged on exit. +C +C K - INTEGER +C On entry K specifies the number of columns of the matrix A. +C K must be greater than or equal to zero. +C Not used, because matrix supposed to be square. +C Unchanged on exit. +C +C DESCRA - CHARACTER*5 array of DIMENSION (9) +C On entry DESCRA defines the format of the sparse matrix. +C Unchanged on exit. +C +C A - DOUBLE PRECISION array of DIMENSION (*) +C On entry A specifies the values of the input sparse +C matrix in CSR storage. +C Unchanged on exit. +C +C IA1 - INTEGER array of dimension (*) +C On entry IA1 holds integer information on columns of input +C sparse matrix A, i.e. which column corresponding element in +C A belongs to. +C Unchanged on exit. +C +C IA2 - INTEGER array of dimension (*) +C On entry IA2 holds rows pointers +C Unchanged on exit. +C +C DESCRN - CHARACTER*5 array of DIMENSION (9) +C On entry DESCRN defines the new format of the sparse matrix. +C Unchanged on exit. +C +C AN - DOUBLE PRECISION array of DIMENSION (*) +C On exit AN specifies the values of the input sparse +C matrix in DIA storage (by diagonals). +C +C IAN1 - INTEGER array of dimension (*) +C On exit IAN1 holds integer information on columns of output +C sparse matrix A, i.e. which diagonal is stored in each column. +C +C IAN2 - INTEGER array of dimension (*) +C On exit IAN2 holds in the first element the number of diagonals +C of the matrix, i.e. the number of columns of output matrix AN. +C +C IAUX - INTEGER array of DIMENSION(LIAUX) +C Work area. +C +C LIAUX - INTEGER +C On entry LIAUX specifies the dimension of IAUX. +C LIAUX must be greater than zero. +C Unchanged on exit. +C +C IERRV - INTEGER array of dimension ..... +C On exit specifies if an error occur as follow: +C IERRV(1) = 0 no error +C IERRV(1) > 0 error +C +C + SUBROUTINE DCRDI(TRANS,M,N,DESCRA,A,IA1,IA2,IP1,DESCRN, + * AN,IAN1,IAN2,IP2,LAN,LIAN1,LIAN2, + * IAUX,LIAUX,IERRV) + IMPLICIT NONE +C +C .. Scalar Arguments .. + INTEGER M, N, LAN, LIAN1, LIAN2, LIAUX + CHARACTER TRANS +C .. Array Arguments .. + DOUBLE PRECISION A(*), AN(*) + INTEGER IA1(*), IA2(*), IAN1(*), IAN2(*), IP1(*), IP2(*), + * IAUX(LIAUX), IERRV(*) + CHARACTER DESCRA*11, DESCRN*11 +C .. Local Scalars .. + INTEGER I, J, K, MAXND +C .. External Subroutines .. +C EXTERNAL XSPERR +C .. Executable Statements .. +C + +C +C Check for argument errors +C + IF (TRANS.NE.'T' .AND. TRANS.NE.'N') THEN + CALL XSPERR('TRANS ',ICHAR(TRANS),1,'DCRDI',IERRV) + ENDIF + IF (M.LE.0) THEN + CALL XSPERR('MATDIM ',M,2,'DCRDI',IERRV) + ENDIF + IF (N.LE.0) THEN + CALL XSPERR('MATDIM ',N,3,'DCRDI',IERRV) + ENDIF + IF(LIAN2.LT.1) THEN + CALL XSPERR('MATST ',LIAN2,16,'DCRDI',IERRV) + LIAN2 = 1 + ENDIF + MAXND = M + N - 1 + IF (LIAUX.LT.MAXND) THEN + CALL XSPERR('LWORK ',LIAUX,18,'DCRDI',IERRV) + LIAUX = MAXND + ENDIF + IF (IERRV(1).NE.0) RETURN + + + DO J=1,MAXND + IAUX(J)=0 + ENDDO + DO I=1,M ! single out diagonals + DO J=IA2(I),IA2(I+1)-1 + IAUX(M-I+IA1(J))=1 + ENDDO + ENDDO + + IAN2(1)=0 ! Computing number of diagonals + DO J=1,MAXND + IAN2(1)=IAN2(1)+IAUX(J) + ENDDO + + IF (LAN.LT.(M*IAN2(1))) THEN + CALL XSPERR('MATST ',LAN,14,'DCRDI',IERRV) + LAN = M * IAN2(1) + ENDIF + IF (LIAN1.LT.IAN2(1)) THEN + CALL XSPERR('MATST ',LIAN1,15,'DCRDI',IERRV) + LIAN1 = IAN2(1) + ENDIF + IF (IERRV(1).NE.0) RETURN + + DO I = 1, 3 + DESCRN(I:I)=DESCRA(I:I) + ENDDO + IP1(1)=0 + IP2(1)=0 + + DO I=1,M ! zeroing AN + DO J=1,IAN2(1) + AN(M*(J-1)+I)=0.D0 + ENDDO + ENDDO + + IF(TRANS.EQ.'N') THEN +C +C Input matrix need not be permuted +C + K=0 + DO J=M,MAXND ! main & upper diagonals + IF(IAUX(J).EQ.1) THEN + K=K+1 + IAN1(K)=J-M + ENDIF + ENDDO + DO J=M-1,1,-1 ! lower diagonals + IF(IAUX(J).EQ.1) THEN + K=K+1 + IAN1(K)=J-M + ENDIF + ENDDO + + DO I=1,M ! build AN (nonzeros only) + DO J=IA2(I),IA2(I+1)-1 + DO K=1,IAN2(1) + IF((IA1(J)-I).EQ.IAN1(K))THEN + AN(M*(K-1)+I)=A(J) + GOTO 10 + ENDIF + ENDDO + 10 ENDDO + ENDDO + + ELSE +C +C Input matrix has to be permuted (square matrix only) +C + K=0 + DO J=M,1,-1 ! main & upper diagonals + IF(IAUX(J).EQ.1) THEN + K=K+1 + IAN1(K)=M-J + ENDIF + ENDDO + DO J=M+1,MAXND ! lower diagonals + IF(IAUX(J).EQ.1) THEN + K=K+1 + IAN1(K)=M-J + ENDIF + ENDDO + + DO I=1,M ! build AN (nonzeros only) + DO J=IA2(I),IA2(I+1)-1 + DO K=1,IAN2(1) + IF((I-IA1(J)).EQ.IAN1(K))THEN + AN(M*(K-1)+IA1(J))=A(J) + GOTO 20 + ENDIF + ENDDO + 20 ENDDO + ENDDO + + ENDIF + + RETURN + END + diff --git a/src/serial/dp/dcrel.f b/src/serial/dp/dcrel.f new file mode 100644 index 00000000..2a9091cb --- /dev/null +++ b/src/serial/dp/dcrel.f @@ -0,0 +1,150 @@ +C SUBROUTINE DCREL(TRANS,M,N,DESCRA,A,IA1,IA2,IP1,DESCRN, +C AN,IAN1,IAN2,IP2,LAN,LIAN1,LIAN2, +C IAUX,LIAUX,IERRV) +C +C Purpose: CSR to ELL format conversion +C ======= +C +C Parameter: +C ========= +C +C ... +C IAN2 - Vector: first element is max number of columns in matrices +C ARN,IAN1, elements to M+1 are column index of diagonal +C in ARN,IAN1 (in future releases) +C ... +C +C + SUBROUTINE DCREL(TRANS,M,N,DESCRA,A,IA1,IA2,IP1,DESCRN, + * AN,IAN1,IAN2,IP2,LAN,LIAN1,LIAN2, + * IAUX,LIAUX,IERRV) + IMPLICIT NONE +C +C .. Scalar Arguments .. + INTEGER LAN, LIAUX, LIAN1, LIAN2, M, N + CHARACTER TRANS +C .. Array Arguments .. + DOUBLE PRECISION A(*), AN(*) + INTEGER IA1(*), IA2(*), IAN1(*), IAN2(*), IP1(*), IP2(*), + * IAUX(LIAUX), IERRV(*) + CHARACTER DESCRA*11, DESCRN*11 +C .. Local Scalars .. + INTEGER I, J, LWORKR +C .. External Subroutines .. + EXTERNAL XSPERR +C .. Executable Statements .. +C + +C +C Check for argument errors +C + IF (TRANS.NE.'T' .AND. TRANS.NE.'N') THEN + CALL XSPERR('TRANS ',ICHAR(TRANS),1,'DCREL',IERRV) + ENDIF + IF (M.LE.0) THEN + CALL XSPERR('MATDIM ',M,2,'DCREL',IERRV) + ENDIF + IF (N.LE.0) THEN + CALL XSPERR('MATDIM ',N,3,'DCREL',IERRV) + ENDIF + IF(LIAN2.LT.1) THEN + LIAN2 = 1 + CALL XSPERR('MATST ',LIAN2,16,'DCREL',IERRV) + ENDIF + IF (TRANS.EQ.'N') THEN + LWORKR = 0 + ELSE IF (TRANS.EQ.'T') THEN + LWORKR = N + ENDIF + IF (LIAUX.LT.LWORKR) THEN + CALL XSPERR('LWORK ',LIAUX,18,'DCREL',IERRV) + LIAUX = LWORKR + ENDIF + IF (IERRV(1).NE.0) RETURN + + + descrn(1:3) = descra(1:3) + IP1(1)=0 + IP2(1)=0 + + IF(TRANS.EQ.'N') THEN +C +C Input matrix need not be permuted +C + IAN2(1)=IA2(2)-IA2(1) + DO I = 2, M + IAN2(1) = MAX0(IAN2(1),IA2(I+1)-IA2(I)) + ENDDO + + IF (LAN.LT.(M*IAN2(1))) THEN + CALL XSPERR('MATST ',LAN,14,'DCREL',IERRV) + LAN = M * IAN2(1) + ENDIF + IF (LIAN1.LT.(M*IAN2(1))) THEN + CALL XSPERR('MATST ',LIAN1,15,'DCREL',IERRV) + LIAN1 = M * IAN2(1) + ENDIF + IF (IERRV(1).NE.0) RETURN + + DO I=1,M + DO J=IA2(I),IA2(I+1)-1 + AN(M*(J-IA2(I))+I)=A(J) + IAN1(M*(J-IA2(I))+I)=IA1(J) + ENDDO + DO J=IA2(I+1)-IA2(I)+1,IAN2(1) + AN(M*(J-1)+I)=0.D0 + IAN1(M*(J-1)+I)=IAN2((J-2)*M+I) + ENDDO + ENDDO + + ELSE +C +C Input matrix has to be permuted +C + + DO J=1,N + IAUX(I)=0 + ENDDO + DO I=1,M + DO J=IA2(I),IA2(I+1)-1 + IAUX(IA1(J))=IAUX(IA1(J))+1 + ENDDO + ENDDO + IAN2(1)=IAUX(1) + DO I = 2, M + IAN2(1) = MAX0(IAN2(1),IAUX(I)) + ENDDO + + IF (LAN.LT.(N*IAN2(1))) THEN + CALL XSPERR('MATST ',LAN,14,'DCREL',IERRV) + LAN = N * IAN2(1) + ENDIF + IF (LIAN1.LT.(N*IAN2(1))) THEN + CALL XSPERR('MATST ',LIAN1,15,'DCREL',IERRV) + LIAN1 = N * IAN2(1) + ENDIF + IF (IERRV(1).NE.0) RETURN + + DO J=1,N + IAUX(I)=0 + ENDDO + DO I=1,M + DO J=IA2(I),IA2(I+1)-1 + IAUX(IA1(J))=IAUX(IA1(J))+1 + AN (N*(IAUX(IA1(J)))+IA1(J))=A(J) + IAN1(N*(IAUX(IA1(J)))+IA1(J))=I + ENDDO + ENDDO + DO I=1,N + DO J=IAUX(I)+1,IAN2(I) + AN (N*(J-1)+I)=0.D0 + IAN1(N*(J-1)+I)=IAN1(N*IAUX(I)+I) + ENDDO + ENDDO + + ENDIF + + + RETURN + END + diff --git a/src/serial/dp/dcrinco.f b/src/serial/dp/dcrinco.f new file mode 100644 index 00000000..a07e766f --- /dev/null +++ b/src/serial/dp/dcrinco.f @@ -0,0 +1,91 @@ + SUBROUTINE DCRINCO(M,N,DESCRA,A,IA1,IA2, + + INFOA,IA,JA,LATOT,LIA1TOT,LIA2TOT, + + DESCRH,H,IH1,IH2,INFOH,IH,JH,WORK,LWORK,IERRV) + IMPLICIT NONE + INCLUDE 'sparker.fh' +C .. Scalar Arguments .. + INTEGER LWORK, M, N + INTEGER LATOT,LIA1TOT,LIA2TOT,IA,JA,IH,JH +C .. Array Arguments .. + DOUBLE PRECISION A(*), H(*), WORK(LWORK) + INTEGER IA1(*), IA2(*), IH1(*), IH2(*), + + INFOA(*), INFOH(*), IERRV(*) + CHARACTER DESCRA*11, DESCRH*11 + +C .. Local scalars .. + INTEGER I, J, NZA, nzh + logical debug + parameter (debug=.false.) + + + IERRV(1) = 0 + nza = infoa(nnz_) + nzh = ih2(ih+m)-ih2(ih) + + if ((nza+nzh).le.min(latot,lia1tot,lia2tot)) then +C +C In this case we are (hopefully) safe +C + DO I = IH, IH+M-1 + if (debug) write(0,*) 'DCRINCO: loop ',i,ih2(i),ih2(i+1) + DO J = IH2(I), IH2(I+1)-1 + IF ((IH1(J).GE.JH).AND.(IH1(J).LT.JH+N)) THEN +C If current element belongs to submatrix to insert + nza = nza +1 + A(nza) = H(J) + IA1(nza) = I+IA-IH + IA2(nza) = IH1(J)+JA-JH + ELSE + if (debug) then + write(*,*) 'DCRINCO: out of range',jh,ih1(j),jh+n + endif + ENDIF + ENDDO + ENDDO + + else +C +C Slow but safe +C + if (debug) write(0,*) 'DCRINCO: ',m,ih,jh,infoa(nnz_) +C Insert Element in COO Format + DO I = IH, IH+M-1 + if (debug) write(0,*) 'DCRINCO: loop ',i,ih2(i),ih2(i+1) + DO J = IH2(I), IH2(I+1)-1 + IF ((IH1(J).GE.JH).AND.(IH1(J).LT.JH+N)) THEN +C If current element belongs to submatrix to insert + nza = nza +1 + IF ((nza.le.LATOT) .and.(nza.le.LIA1TOT) + + .and.(nza.le.LIA2TOT)) THEN + A(nza) = H(J) + IA1(nza) = I+IA-IH + IA2(nza) = IH1(J)+JA-JH + if (debug) then + write(*,*) 'DCRINCO: ',j,h(j),i+ia-ih,ih1(j)+ja-jh + endif + else + IF (nza.GT.LATOT) THEN + IERRV(1) = 10 + IERRV(2) = nza + ELSE IF (nza.GT.LIA1TOT) THEN + IERRV(1) = 20 + IERRV(2) = nza + ELSE IF (nza.GT.LIA2TOT) THEN + IERRV(1) = 30 + IERRV(2) = nza + ENDIF + RETURN + endif + ELSE + if (debug) then + write(*,*) 'DCRINCO: out of range',jh,ih1(j),jh+n + endif + ENDIF + ENDDO + ENDDO + + endif + + infoa(nnz_) = nza + return + END diff --git a/src/serial/dp/dcrjd.f b/src/serial/dp/dcrjd.f new file mode 100644 index 00000000..a343aead --- /dev/null +++ b/src/serial/dp/dcrjd.f @@ -0,0 +1,284 @@ + SUBROUTINE DCRJD(TRANS,M,N,UNITD,D,DESCRA,AR,IA1,IA2,INFO, + * IP1,DESCRN,ARN,IAN1,IAN2,INFON,IP2,LARN,LIAN1, + * LIAN2,AUX,LAUX,SIZE_REQ,IERROR) +C +C Purpose +C ======= +C +C DCRJD converts a CSR matrix into a Jagged Diagonal. +C +C +C Notes +C ===== +C +C Parameters +C ========== +C +C TRANS Whether the transpose should be converted. +C +C M,N Size of input matrix A +C +C UNITD Scaling by diagonal D: 'U'nit, 'L'eft, 'R'ight +C D(*) +C +C DESCRA Input matrix A. +C AR,IA1, +C IA2,INFO +C +C DESCRN Output matrix in JAD format +C ARN,IAN1 +C IAN2,INFON, IP1, IP2 +C + IMPLICIT NONE + INCLUDE 'sparker.fh' + +C +C .. Scalar Arguments .. + INTEGER LARN, LAUX, LAUX2, LIAN1, LIAN2, M, N, + * SIZE_REQ, IERROR + CHARACTER TRANS,UNITD +C .. Array Arguments .. + DOUBLE PRECISION AR(*), ARN(*), D(*), AUX(LAUX) + INTEGER IA1(*), IA2(*), INFO(*), IAN1(*), IAN2(*), + * INFON(*), IP1(*), IP2(*) + CHARACTER DESCRA*11, DESCRN*11 +C .. Local Scalars .. + INTEGER IOFF, ISTROW, NJA, NZ, PIA, + + PJA, PNG, K, MAX_NG, NG, IERROR, LJA, ERR_ACT + LOGICAL SCALE + logical debug + parameter (debug=.false.) + CHARACTER UPLO + INTEGER MAX_NNZERO +c .. Local Arrays .. + CHARACTER*20 NAME + INTEGER INT_VAL(5), IERRV(*) + +C .. External Subroutines .. + EXTERNAL DVTFG + EXTERNAL MAX_NNZERO +C .. Executable Statements .. +C + NAME = 'DCRJD\0' + IERROR = 0 + CALL FCPSB_ERRACTIONSAVE(ERR_ACT) + + IF (LAUX.LT.4) THEN + IERROR = 60 + INT_VAL(1) = 22 + INT_VAL(2) = 4 + INT_VAL(3) = LAUX + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + + IF (TRANS.EQ.'N') THEN +C + NJA = 3*M + SCALE = (UNITD.EQ.'L') ! meaningless + IOFF = 5 +C +C SET THE VALUES OF POINTERS TO VECTOR IAN2 AND AUX +C + PNG = IOFF + PIA = PNG + 1 + PJA = PIA + 3*(M+2) + + IF (DESCRA(1:1).EQ.'G') THEN + +C +C CHECK ON DIMENSION OF IAN2 AND AUX +C + MAX_NG = M/MINJDROWS+1 + + IF ((PIA+3*(MAX_NG+1).GT.LIAN2).OR.(M+1 .GT. LAUX)) THEN +C ... If I haven't sufficent memory to compute NG in IAN2 ... + IF (M+1+3*(MAX_NG+1)/DBLEINT_+1.GT.LAUX) THEN +C ... If I haven't sufficent memory to compute NG in AUX ... + IERROR = 60 + INT_VAL(1) = 22 + INT_VAL(2) = M+1+3*(MAX_NG+1)/DBLEINT_+1 + INT_VAL(3) = LAUX + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ELSE +C ... I have sufficent memory to compute NG in AUX ... + CALL DGBLOCK(M,IA2,IP1,AUX(M+2),NG, AUX, LAUX*2) + CALL CHECK_DIM(M,N,AUX(M+2),NG,IA2, + + NZ,LARN,LIAN1,LIAN2,IERRV) + IF (IERRV(1).NE.0) THEN + SIZE_REQ = MAX(IERRV(2),IERRV(3),IERRV(4)) + + GOTO 9998 + ENDIF + ENDIF + END IF + + NZ = IA2(M+1) - 1 +C +C ... Initialize Permutation Matrix ... +C + DO 10 K = 1, M + IP1(K) = K + 10 CONTINUE + + IP2(1) = 0 +c$$$ write(0,*) 'Calling DGBLOCK first' + CALL DGBLOCK(M,IA2,IP1,IAN2(PIA),IAN2(PNG), AUX, LAUX*2) + + PJA = PIA + 3*(IAN2(PNG)+1) +C +C CHECK FOR ARRAY DIMENSIONS +C + CALL CHECK_DIM(M,N,IAN2(PIA),IAN2(PNG),IA2, + + NZ,LARN,LIAN1,LIAN2,IERRV) + IF (IERRV(1) .NE.0) THEN + SIZE_REQ = MAX(IERRV(2),IERRV(3),IERRV(4)) + GOTO 9998 + ENDIF + + LJA = LIAN2-PJA +c$$$ write(0,*) 'Into DGINDEX: ',lja,pja,lian2 + CALL DGINDEX(M,IAN2(PNG),AR,IA1,IA2,ARN,IAN1,IAN2(PIA), + + IAN2(PJA), INFON, LARN,LIAN1, + + LJA,IP1, AUX, LAUX*2, SIZE_REQ,IERROR) + + IF (IERROR.NE.0) THEN + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + + DESCRN(1:1) = 'G' + DESCRN(2:2) = 'U' + DESCRN(3:3) = 'N' + + ELSE IF (DESCRA(1:1).EQ.'S' .AND. DESCRA(2:2).EQ.'U') THEN +C + ISTROW = 1 + NZ = 2*(IA2(M+1)-1) - M +C +C CHECK ON DIMENSION OF IAN1 AND ARN +C + IF (NZ .GT. LIAN1) THEN + IERROR = 60 + INT_VAL(1) = 19 + INT_VAL(2) = NZ + INT_VAL(3) = LAUX + LIAN1 = NZ + CALL PSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + END IF + IF (NZ .GT. LARN) THEN + IERROR = 60 + INT_VAL(1) = 18 + INT_VAL(2) = NZ + INT_VAL(3) = LAUX + LIAN1 = NZ + CALL PSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + END IF + + DO 20 K = 1, M + IP2(K) = K + 20 CONTINUE + +c$$$ CALL DVSSG(M,IA1,IA2,IP2,IAN2(PNG),IP1,IP2,AUX(IWLEN), +c$$$ * AUX(IWORK1)) +c$$$ CALL DVSMR(M,AR,IA1,IA2,IAN2(PNG),AUX(IWLEN),IP1,IP2, +c$$$ * IAN2(PIA),IAN2(PJA),IAN1,ARN,AUX(IWORK1), +c$$$ * AUX(IWORK2),NJA,IER,SCALE) +C + ELSE IF (DESCRA(1:1).EQ.'T') THEN +C +C Only unit diagonal so far for triangular matrices. +C + + + IF (DESCRA(3:3).NE.'U') THEN + IERROR=3022 + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + + UPLO = DESCRA(2:2) + NZ = IA2(M+1) - 1 +C +C ...Compute levels... +C Each level correspond to a block +C IAN1 is used as a work area + + CALL DVTFG(UPLO,M,IA1,IA2,IAN2(PNG),IP2,IP1,IAN1, + + AUX,AUX(M+1),AUX(2*(M+1))) + +C Generate IA(1,*) + DO K = 1, IAN2(PNG)+1 + IAN2(PIA+3*(K-1)) = IAN1(K) + ENDDO + + CALL GEN_BLOCK(M,IAN2(PNG),IAN2(PIA),AUX) + + PJA = PIA + 3*(IAN2(PNG)+1) + +C +C CHECK FOR ARRAY DIMENSIONS +C + + CALL CHECK_DIM(M,N,IAN2(PIA),IAN2(PNG),IA2, + + NZ,LARN,LIAN1,LIAN2,IERRV) + + IF (IERRV(1).NE.0) THEN + size_req = max(ierrv(2),ierrv(3),ierrv(4)) +c$$$ write(0,*) "error 2",ierrv(1) + GOTO 9998 + endif + LJA = LIAN2-PJA + + CALL DGIND_TRI(M,IAN2(PNG),AR,IA1,IA2,ARN,IAN1,IAN2(PIA), + + IAN2(PJA),LARN,LIAN1,LJA,IP1,AUX, LAUX*2, IERROR) + + IF (IERROR.NE.0) THEN + IERROR=4011 + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + + DESCRN(1:1) = 'T' + DESCRN(2:2) = DESCRA(2:2) + DESCRN(3:3) = DESCRA(3:3) + + END IF +C +C SET THE OUTPUT PARAMETER +C + IAN2(1) = PNG + IAN2(2) = PIA + IAN2(3) = PJA + LARN = NZ + LIAN1 = NZ + LIAN2 = 3*M + 10 + LAUX2 = 4*M + 2 +C + ELSE IF (TRANS.NE.'N') THEN +C +C TO BE DONE +C + IERROR = 3021 + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + + END IF + + 9998 CONTINUE + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + RETURN + + 9999 CONTINUE + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + + IF ( ERR_ACT .NE. 0 ) THEN + CALL FCPSB_SERROR() + RETURN + ENDIF + + RETURN + END diff --git a/src/serial/dp/dcsrp1.f b/src/serial/dp/dcsrp1.f new file mode 100644 index 00000000..72c16304 --- /dev/null +++ b/src/serial/dp/dcsrp1.f @@ -0,0 +1,163 @@ +C +C Purpose +C ======= +C +C Performing column permutation of a sparse matrix. +C +C Parameters +C ========== +C +C TRANS - CHARACTER*1 +C On entry TRANS specifies whether the routine will use +C matrix P or the transpose of P for the permutation as follows: +C TRANS = 'N' -> permute with matrix P +C TRANS = 'T' or 'C' -> permute the transpose of P +C Unchanged on exit. +C +C M - INTEGER +C On entry: number of rows of matrix A. +C Unchanged on exit. +C +C N - INTEGER +C On entry: number of columns of matrix A. +C Unchanged on exit. +C +C DESCRA - CHARACTER*5 array of DIMENSION (10) +C On entry DESCRA defines the format of the input sparse matrix. +C Unchanged on exit. +C +C IA1 - INTEGER array of dimension (*) +C On entry IA1 holds integer information on input sparse +C matrix. Actual information will depend on data format used. +C On exit contain integer information on permuted matrix. +C +C IA2 - INTEGER array of dimension (*) +C On entry IA2 holds integer information on input sparse +C matrix. Actual information will depend on data format used. +C On exit contain integer information on permuted matrix. +C +C INFOA - INTEGER array of dimension (10) +C On entry can hold auxiliary information on input matrices +C formats or environment of subsequent calls. +C Might be changed on exit. +C +C P - INTEGER array of dimension (M) +C On entry P specifies the column permutation of matrix A +C (P(1) == 0 if no permutation). +C Unchanged on exit. +C +C WORK - DOUBLE PRECISION array of dimension (LWORK) +C On entry: work area. +C On exit INT(WORK(1)) contains the minimum value +C for LWORK satisfying DSPRP memory requirements. +C +C LWORK - INTEGER +C On entry LWORK specifies the dimension of WORK +C Unchanged on exit. +C +C IERROR - INTEGER +C On exit IERROR contains the value of error flag as follows: +C IERROR = 0 no error +C IERROR > 0 warning +C IERROR < 0 fatal error +C WORK - DOUBLE PRECISION array of dimension (LWORK) +C Work area. +C +C LWORK - INTEGER +C On entry LWORK specifies the dimension of WORK. +C LWORK must be greater than zero. +C On exit LWORK is the maximum between the initial value and +C the minimum value satisfying DSPRP memory requirements. +C +C IERROR - INTEGER +C On exit IERROR contains the value of error flag as follows: +C IERROR = 0 no error +C IERROR = 4 error on dimension of vector WORK +C IERROR = 32 unknown flag TRANS +C IERROR = 64 LWORK <= 0 +C IERROR = 128 this data structure not yet considered +C +C Notes +C ===== +C It is not possible to call this subroutine with LWORK=0 to get +C the minimal value for LWORK. This functionality needs a better +C connection with DxxxMM +C +C + SUBROUTINE DCSRP1(TRANS,M,N,DESCRA,JA,IA, + + P,WORK,IWORK,LWORK,IERROR) + IMPLICIT NONE + INCLUDE 'sparker.fh' +C .. Scalar Arguments .. + INTEGER LWORK,M, N, IERROR + CHARACTER TRANS +C .. Array Arguments .. + DOUBLE PRECISION WORK(*) + INTEGER JA(*), IA(*), P(*), IWORK(*) + CHARACTER DESCRA*11 +C .. Local Scalars .. + INTEGER I, J +c .. Local Arrays .. + CHARACTER*20 NAME + INTEGER INT_VAL(5) +C .. Intrinsic Functions .. + INTRINSIC DBLE +C +C .. Executable Statements .. +C + NAME = 'DCSRP1\0' + IERROR = 0 + CALL FCPSB_ERRACTIONSAVE(ERR_ACT) + + IF(TRANS.EQ.'N') THEN + DO 30 I=1,M + DO 10 J=IA(I),IA(I+1)-1 + JA(J) = P(JA(J)) + 10 CONTINUE + 30 CONTINUE + WORK(1) = 0.D0 + ELSE IF(TRANS.EQ.'T') THEN +C +C LWORK refers here to INTEGER IWORK (alias for WORK) +C + IF(LWORK.LT.M) THEN + IERROR = 60 + INT_VAL(1) = 18 + INT_VAL(2) = NNZ+2 + INT_VAL(3) = LAUX + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF +C +C Transpose permutation matrix +C + DO 20 I=1,N + IWORK(P(I)) = I + 20 CONTINUE +C +C Permute columns +C + DO 50 I=1,M + DO 40 J=IA(I),IA(I+1)-1 + JA(J) = IWORK(JA(J)) + 40 CONTINUE + 50 CONTINUE +C +C WORK(1) refers here to a value for a DOUBLE PRECISION WORK +C + WORK(1) = DBLE((M+1)/DBLEINT_) + ENDIF + + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + RETURN + + 9999 CONTINUE + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + + IF ( ERR_ACT .NE. 0 ) THEN + CALL FCPSB_SERROR() + RETURN + ENDIF + + RETURN + END diff --git a/src/serial/dp/dcsrrp.f b/src/serial/dp/dcsrrp.f new file mode 100644 index 00000000..0cbd0e5f --- /dev/null +++ b/src/serial/dp/dcsrrp.f @@ -0,0 +1,138 @@ +C +C Purpose +C ======= +C +C Performing column permutation of a sparse matrix. +C +C Parameters +C ========== +C +C TRANS - CHARACTER*1 +C On entry TRANS specifies whether the routine will use +C matrix P or the transpose of P for the permutation as follows: +C TRANS = 'N' -> permute with matrix P +C TRANS = 'T' or 'C' -> permute the transpose of P +C Unchanged on exit. +C +C M - INTEGER +C On entry: number of rows of matrix A. +C Unchanged on exit. +C +C N - INTEGER +C On entry: number of columns of matrix A. +C Unchanged on exit. +C +C DESCRA - CHARACTER*5 array of DIMENSION (10) +C On entry DESCRA defines the format of the input sparse matrix. +C Unchanged on exit. +C +C A - DOUBLE PRECISION array of DIMENSION (*) +C On entry A specifies the values of the input sparse +C matrix. +C Unchanged on exit. +C +C IA1 - INTEGER array of dimension (*) +C On entry IA1 holds integer information on input sparse +C matrix. Actual information will depend on data format used. +C Unchanged on exit. +C +C IA2 - INTEGER array of dimension (*) +C On entry IA2 holds integer information on input sparse +C matrix. Actual information will depend on data format used. +C Unchanged on exit. +C +C INFOA - INTEGER array of dimension (10) +C On entry can hold auxiliary information on input matrices +C formats or environment of subsequent calls. +C Might be changed on exit. +C +C P - INTEGER array of dimension (M) +C On entry P specifies the column permutation of matrix A +C (P(1) == 0 if no permutation). +C Unchanged on exit. +C +C WORK - DOUBLE PRECISION array of dimension (LWORK) +C On entry: work area. +C On exit INT(WORK(1)) contains the minimum value +C for LWORK satisfying DSPRP memory requirements. +C +C LWORK - INTEGER +C On entry LWORK specifies the dimension of WORK +C Unchanged on exit. +C +C IERROR - INTEGER +C On exit IERROR contains the value of error flag as follows: +C IERROR = 0 no error +C IERROR > 0 warning +C IERROR < 0 fatal error +C WORK - DOUBLE PRECISION array of dimension (LWORK) +C Work area. +C +C LWORK - INTEGER +C On entry LWORK specifies the dimension of WORK. +C LWORK must be greater than zero. +C On exit LWORK is the maximum between the initial value and +C the minimum value satisfying DSPRP memory requirements. +C +C IERROR - INTEGER +C On exit IERROR contains the value of error flag as follows: +C IERROR = 0 no error +C IERROR = 4 error on dimension of vector WORK +C IERROR = 32 unknown flag TRANS +C IERROR = 64 LWORK <= 0 +C IERROR = 128 this data structure not yet considered +C +C Notes +C ===== +C It is not possible to call this subroutine with LWORK=0 to get +C the minimal value for LWORK. This functionality needs a better +C connection with DxxxMM +C +C + SUBROUTINE DCSRRP(TRANS,M,N,DESCRA,JA,IA, + + P,WORK,LWORK,IERROR) + IMPLICIT NONE +C .. Scalar Arguments .. + INTEGER LWORK, M, N, IERROR + CHARACTER TRANS +C .. Array Arguments .. + DOUBLE PRECISION WORK(*) + INTEGER JA(*), IA(*), P(*) + CHARACTER DESCRA*11 +c .. Local Arrays .. + CHARACTER*20 NAME + INTEGER INT_VAL(5) +C .. External Subroutines .. + EXTERNAL XERBLA +C +C .. Executable Statements .. +C + NAME = 'DCSRRP\0' + IERROR = 0 + CALL FCPSB_ERRACTIONSAVE(ERR_ACT) + + IF (DESCRA(1:1).EQ.'S' .OR. DESCRA(1:1).EQ.'T') THEN + IERROR=3023 + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + endif + CALL DCSRP1(TRANS,M,N,DESCRA,JA,IA,P,WORK,WORK,LWORK*2,IERROR) + IF(IERROR.NE.0) THEN + IERROR=4011 + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + END IF + + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + RETURN + + 9999 CONTINUE + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + + IF ( ERR_ACT .NE. 0 ) THEN + CALL FCPSB_SERROR() + RETURN + ENDIF + + RETURN + END diff --git a/src/serial/dp/dgblock.f b/src/serial/dp/dgblock.f new file mode 100644 index 00000000..e9e9fa35 --- /dev/null +++ b/src/serial/dp/dgblock.f @@ -0,0 +1,63 @@ + SUBROUTINE DGBLOCK(M,IA2,IPERM,IA,N_BLOCK,WORK,LWORK) + IMPLICIT NONE + +C ...Scalar arguments... + + INTEGER M, N_BLOCK, LWORK + +C ...Array arguments... + + INTEGER IA2(*), IPERM(*), IA(3,*), WORK(*) + +C ...Local scalars... + + INTEGER I, SWAP, KK, LP, IRET +C Compute number of nnzero elements per row + + IPERM(1) = 0 + + DO I = 1, M + WORK(I) = - IA2(I+1) + IA2(I) + ENDDO + +C Sorting Array work +C ........................ + + CALL MRGSRT(M,WORK,WORK(M+1),IRET) + IF (IRET.EQ.0) THEN +C Construct IPERM Vector + LP = WORK(M+1) + + DO I = 1, M + IPERM(LP) = I + LP = WORK(M+1+LP) + ENDDO + + LP = WORK(M+1) + KK = 1 + DO WHILE (.NOT.((LP.EQ.0).OR.(KK.GT.M))) + DO WHILE (LP.LT.KK) + LP = WORK(M+1+LP) + ENDDO +C Swap values of array work + SWAP = WORK(KK) + WORK(KK) = WORK(LP) + WORK(LP) = SWAP + +C Swap values of index array work(m+1) + SWAP = WORK(M+1+LP) + WORK(M+1+LP) = WORK(M+1+KK) + WORK(M+1+KK) = LP + + LP = SWAP + KK = KK+1 + ENDDO + + ENDIF +C Partitioning Matrix in blocks of rows + CALL PARTITION(M, WORK, IA, N_BLOCK) + + END + + + diff --git a/src/serial/dp/dgind_tri.f b/src/serial/dp/dgind_tri.f new file mode 100644 index 00000000..59bd8c06 --- /dev/null +++ b/src/serial/dp/dgind_tri.f @@ -0,0 +1,206 @@ + SUBROUTINE DGIND_TRI(M,N_BLOCKS,A,IA1,IA2,ARN,KA,IA,JA, + + LARN,LKA,LJA,IPERM,WORK, LWORK, IERROR) + + IMPLICIT NONE + INCLUDE 'sparker.fh' + +C ... Scalar arguments ... + + INTEGER M, LWORK,N_BLOCKS,LARN,LKA,LJA, IERROR + +C ... Array arguments ... + + DOUBLE PRECISION A(*), ARN(*), WORK(*) + INTEGER IA1(*), IA2(*), KA(*), + + IA(3,*), IPERM(M), JA(*) + +C .... Local scalars ... + INTEGER I, J, BLOCK, ROW, COL, POINT_AR, POINT_JA, + + DIM_BLOCK, LIMIT + LOGICAL CSR +c .. Local Arrays .. + CHARACTER*20 NAME + INTEGER INT_VAL(5) + + NAME = 'DGIND_TRI\0' + IERROR = 0 + CALL FCPSB_ERRACTIONSAVE(ERR_ACT) + + POINT_AR = 1 + POINT_JA = 0 + IERROR = 0 + + IF ((LARN.LT.POINT_AR).OR.(LKA.LT.POINT_AR)) THEN + IERROR = 60 + INT_VAL(1) = 10 + INT_VAL(2) = POINT_AR + INT_VAL(3) = LARN + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + +C .... Invert Permutation Matrix... + IF (IPERM(1).NE.0) THEN + DO I = 1, M + WORK(IPERM(I)) = I + ENDDO + ENDIF + + DO BLOCK = 1, N_BLOCKS + COL = 1 + DIM_BLOCK = IA(1,BLOCK+1)-IA(1,BLOCK) + LIMIT = INT(DIM_BLOCK*PERCENT) + POINT_JA = POINT_JA+1 + IF (LJA.LT.POINT_JA) THEN + IERROR = 60 + INT_VAL(1) = 12 + INT_VAL(2) = POINT_JA + INT_VAL(3) = LJA + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + + IA(2,BLOCK) = POINT_JA + JA(POINT_JA) = POINT_AR + CSR = .FALSE. + + IF (DIM_BLOCK.NE.0) THEN +C ... If current block is not empty ... +C ... For each Column belonging to Block ... + DO WHILE(.TRUE.) +C ... For each row belonging to the block BLOCK ... + DO I = IA(1,BLOCK), IA(1,BLOCK+1)-1 + IF (IPERM(1).EQ.0) THEN + ROW = I + ELSE + ROW = WORK(I) + ENDIF + +C ... If the current row is too short ... + IF (IA2(ROW)+COL-1.GE.IA2(ROW+1)) THEN +C ... Switch to CSR representation ... + IF (I.LE.IA(1,BLOCK)+LIMIT) THEN + CSR=.TRUE. + POINT_AR = POINT_AR - I + IA(1,BLOCK) + GOTO 998 + ELSE + ARN(POINT_AR) = 0.D0 +C +C The following statement assumes that we never get here with POINT_AR=1 +C + KA (POINT_AR) = KA(POINT_AR-1) + + POINT_AR = POINT_AR+1 + IF ((LARN.LT.POINT_AR).OR.(LKA.LT.POINT_AR)) + + THEN + IERROR = 60 + INT_VAL(1) = 10 + INT_VAL(2) = POINT_AR + INT_VAL(3) = LARN + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + ENDIF + ELSE + ARN(POINT_AR) = A(IA2(ROW)+COL-1) + KA (POINT_AR) = IPERM(IA1(IA2(ROW)+COL-1)) + + POINT_AR = POINT_AR+1 + IF ((LARN.LT.POINT_AR).OR.(LKA.LT.POINT_AR)) THEN + IERROR = 60 + INT_VAL(1) = 10 + INT_VAL(2) = POINT_AR + INT_VAL(3) = LARN + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + ENDIF + ENDDO + + IF (CSR) GOTO 998 + + IF (LJA.LT.POINT_JA+COL) THEN + IERROR = 60 + INT_VAL(1) = 12 + INT_VAL(2) = POINT_JA + INT_VAL(3) = LJA + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + + JA(POINT_JA+COL) = POINT_AR + COL = COL+1 + ENDDO + 998 CONTINUE + + ENDIF + + POINT_JA = POINT_JA+COL-1 + + IF (LJA.LT.POINT_JA) THEN + IERROR = 60 + INT_VAL(1) = 12 + INT_VAL(2) = POINT_JA + INT_VAL(3) = LJA + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + + IA(3,BLOCK) = POINT_JA + +C ... Start CSR Format ... + +C ... For each row belonging to the block BLOCK ... + DO I = IA(1,BLOCK), IA(1,BLOCK+1)-1 + IF (IPERM(1).EQ.0) THEN + ROW = I + ELSE + ROW = WORK(I) + ENDIF + +C ... For each nnzero elements belonging to current row ... + DO J = IA2(ROW)+COL-1, IA2(ROW+1)-1 + ARN(POINT_AR) = A(J) + KA (POINT_AR) = IPERM(IA1(J)) + + POINT_AR = POINT_AR+1 + IF ((LARN.LT.POINT_AR).OR.(LKA.LT.POINT_AR)) THEN + IERROR = 60 + INT_VAL(1) = 10 + INT_VAL(2) = POINT_AR + INT_VAL(3) = LARN + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + ENDDO + + POINT_JA = POINT_JA+1 + IF (LJA.LT.POINT_JA) THEN + IERROR = 60 + INT_VAL(1) = 12 + INT_VAL(2) = POINT_JA + INT_VAL(3) = LJA + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + + JA(POINT_JA) = POINT_AR + ENDDO + ENDDO + + IA(2,N_BLOCKS+1) = POINT_JA + + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + RETURN + + 9999 CONTINUE + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + + IF ( ERR_ACT .NE. 0 ) THEN + CALL FCPSB_SERROR() + RETURN + ENDIF + + RETURN + END + diff --git a/src/serial/dp/dgindex.f b/src/serial/dp/dgindex.f new file mode 100644 index 00000000..8c6637e7 --- /dev/null +++ b/src/serial/dp/dgindex.f @@ -0,0 +1,411 @@ + SUBROUTINE DGINDEX(M,N_BLOCKS,A,IA1,IA2,ARN,KA,IA,JA, INFON, + + LARN,LKA,LJA,IPERM,WORK, LWORK, SIZE_REQ, IERROR) + + IMPLICIT NONE + INCLUDE 'sparker.fh' + +C ... Scalar arguments ... + INTEGER M, LWORK,N_BLOCKS,LARN,LKA,LJA, + + SIZE_REQ,IERROR + +C ... Array arguments ... + + DOUBLE PRECISION A(*), ARN(*) + INTEGER IA1(*), IA2(*), KA(*), + + IA(3,*), IPERM(*), JA(*), WORK(*),INFON(*) + +C .... Local scalars ... + INTEGER I, J, BLOCK, ROW, COL, POINT_AR, POINT_JA, IP1, + + IP2, IPX, NNZ, DIM_BLOCK, LIMIT, IPW,COUNT, IPC,CHECK_FLAG + LOGICAL CSR +c .. Local Arrays .. + CHARACTER*20 NAME + INTEGER INT_VAL(5) + + NAME = 'DGINDEX\0' + IERROR = 0 + CALL FCPSB_ERRACTIONSAVE(ERR_ACT) + + POINT_AR = 1 + POINT_JA = 0 + CHECK_FLAG=IBITS(INFON(UPD_),1,2) + + IF ((LARN.LT.POINT_AR).OR.(LKA.LT.POINT_AR)) THEN + IERROR = 60 + INT_VAL(1) = 11 + INT_VAL(2) = POINT_AR + INT_VAL(3) = LARN + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + + NNZ = IA2(M + 1) - 1 + COUNT = 0 + +C .... Invert Permutation Matrix... + IF (IPERM(1).NE.0) THEN + DO I = 1, M + WORK(IPERM(I)) = I + ENDDO + ENDIF + + IF ( (LKA .GE.( SIZE_REQ )) + + .AND. (LWORK .GE. (M + NNZ+2))) THEN +C +C Prepare for smart regeneration +C + + IPW = M + 2 + IP1 = (LKA-IREG_FLGS-2)/2 + IP2 = IP1+IREG_FLGS + IPC = IP2 + NNZ + 1 + KA(IP1 + IPC_) = IPC + KA(IP1+IP2_) = IP2 + INFON(UPD_PNT_) = IP1 + KA(IP1+IFLAG_) = CHECK_FLAG + KA(IP1+NNZT_) = NNZ + KA(IP1+NNZ_) = 0 + KA(IP1+ICHK_) = NNZ+CHECK_FLAG + I = M+2 + IPX = IA2(I+IP2_) + +C Invert permutation for smart regeneration + + DO I = 1, NNZ + WORK(IPW + IA2(IPX + I -1) - 1) = I + ENDDO + +C Construct JAD matrix... + + DO BLOCK = 1, N_BLOCKS + COL = 1 + DIM_BLOCK = IA(1,BLOCK+1)-IA(1,BLOCK) +c$$$ write(0,*) 'DGINDEX: BLOCK LOOP ',block,n_blocks,dim_block + if (dim_block .gt. maxjdrows) then + write(0,*) 'Wrong value for dim_block',block, + + IA(1,BLOCK+1),IA(1,BLOCK) + return + endif + LIMIT = INT(DIM_BLOCK*PERCENT) + POINT_JA = POINT_JA+1 + IF (LJA.LT.POINT_JA) THEN + IERROR = 60 + INT_VAL(1) = 13 + INT_VAL(2) = POINT_JA + INT_VAL(3) = LJA + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + + IA(2,BLOCK) = POINT_JA + JA(POINT_JA) = POINT_AR + CSR = .FALSE. + + IF (DIM_BLOCK.NE.0) THEN +C ... If current block is not empty ... +C ... For each Column belonging to Block ... + DO WHILE(.TRUE.) +C ... For each row belonging to the block BLOCK ... + DO I = IA(1,BLOCK), IA(1,BLOCK+1)-1 + IF (IPERM(1).EQ.0) THEN + ROW = I + ELSE + ROW = WORK(I) + ENDIF + +C ... If the current row is too short ... + IF (IA2(ROW)+COL-1.GE.IA2(ROW+1)) THEN +C ... Switch to CSR representation ... + IF (I.LE.IA(1,BLOCK)+LIMIT) THEN + CSR=.TRUE. + POINT_AR = POINT_AR - I + IA(1,BLOCK) + GOTO 998 + ELSE + + COUNT = COUNT + 1 + ARN(POINT_AR) = 0.D0 + KA(POINT_AR) = KA(POINT_AR-1) + IF(POINT_AR.LT.IP1) THEN + KA(IPC + COUNT -1) = POINT_AR + ENDIF +C +C The following statement assumes that we never get here with POINT_AR=1 +C + POINT_AR = POINT_AR+1 + IF ((LARN.LT.POINT_AR).OR.(LKA.LT.POINT_AR)) + + THEN + IERROR = 60 + INT_VAL(1) = 11 + INT_VAL(2) = POINT_AR + INT_VAL(3) = LARN + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + ENDIF + ELSE + ARN(POINT_AR) = A(IA2(ROW)+COL-1) + KA(POINT_AR) = IA1(IA2(ROW)+COL-1) + IF(POINT_AR.LT.IP1) THEN + KA(IP2 + WORK(IPW + + + IA2(ROW) +COL -1-1)-1) = POINT_AR + ENDIF + + POINT_AR = POINT_AR+1 + IF ((LARN.LT.POINT_AR).OR.(LKA.LT.POINT_AR)) + + THEN + IERROR = 60 + INT_VAL(1) = 11 + INT_VAL(2) = POINT_AR + INT_VAL(3) = LARN + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + ENDIF + ENDDO + + IF (CSR) GOTO 998 + + IF (LJA.LT.POINT_JA+COL) THEN + IERROR = 60 + INT_VAL(1) = 13 + INT_VAL(2) = POINT_JA + INT_VAL(3) = LJA + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + IF(IERROR.EQ.0) THEN + JA(POINT_JA+COL) = POINT_AR + ENDIF + COL = COL+1 + ENDDO + + ENDIF + 998 CONTINUE + + POINT_JA = POINT_JA+COL-1 + + IF (LJA.LT.POINT_JA) THEN + IERROR = 60 + INT_VAL(1) = 13 + INT_VAL(2) = POINT_JA + INT_VAL(3) = LJA + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + + IA(3,BLOCK) = POINT_JA + +C ... Start CSR Format ... + +C ... For each row belonging to the block BLOCK ... + DO I = IA(1,BLOCK), IA(1,BLOCK+1)-1 + IF (IPERM(1).EQ.0) THEN + ROW = I + ELSE + ROW = WORK(I) + ENDIF + +C ... For each nnzero elements belonging to current row ... + DO J = IA2(ROW)+COL-1, IA2(ROW+1)-1 + + ARN(POINT_AR) = A(J) + KA (POINT_AR) = IA1(J) + IF (POINT_AR.LT.IP1) THEN + KA(IP2 + WORK(IPW + J-1)-1) = POINT_AR + ENDIF + + POINT_AR = POINT_AR+1 + IF ((LARN.LT.POINT_AR).OR.(LKA.LT.POINT_AR)) THEN + IERROR = 60 + INT_VAL(1) = 11 + INT_VAL(2) = POINT_AR + INT_VAL(3) = LARN + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + ENDDO + + POINT_JA = POINT_JA+1 + IF (LJA.LT.POINT_JA) THEN + IERROR = 60 + INT_VAL(1) = 13 + INT_VAL(2) = POINT_JA + INT_VAL(3) = LJA + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + JA(POINT_JA) = POINT_AR + ENDDO + ENDDO + + + ELSE +c$$$ c write(*,*)'inizio a ciclare sui blocchi' + DO BLOCK = 1, N_BLOCKS + COL = 1 + DIM_BLOCK = IA(1,BLOCK+1)-IA(1,BLOCK) + LIMIT = INT(DIM_BLOCK*PERCENT) + POINT_JA = POINT_JA+1 + IF (LJA.LT.POINT_JA) THEN + IERROR = 60 + INT_VAL(1) = 13 + INT_VAL(2) = POINT_JA + INT_VAL(3) = LJA + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + + IA(2,BLOCK) = POINT_JA + JA(POINT_JA) = POINT_AR + CSR = .FALSE. + + IF (DIM_BLOCK.NE.0) THEN +C ... If current block is not empty ... +C ... For each Column belonging to Block ... + DO WHILE(.TRUE.) +C ... For each row belonging to the block BLOCK ... + DO I = IA(1,BLOCK), IA(1,BLOCK+1)-1 + IF (IPERM(1).EQ.0) THEN + ROW = I + ELSE + ROW = WORK(I) + ENDIF + +C ... If the current row is too short ... + IF (IA2(ROW)+COL-1.GE.IA2(ROW+1)) THEN +C ... Switch to CSR representation ... + IF (I.LE.IA(1,BLOCK)+LIMIT) THEN + CSR=.TRUE. + POINT_AR = POINT_AR - I + IA(1,BLOCK) + GOTO 999 + ELSE + COUNT= COUNT+1 + ARN(POINT_AR) = 0.D0 + KA (POINT_AR) = KA(POINT_AR-1) +C +C The following statement assumes that we never get here with POINT_AR=1 +C + POINT_AR = POINT_AR+1 + IF ((LARN.LT.POINT_AR).OR.(LKA.LT.POINT_AR)) + + THEN + IERROR = 60 + INT_VAL(1) = 11 + INT_VAL(2) = POINT_AR + INT_VAL(3) = LARN + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + ENDIF + ELSE + ARN(POINT_AR) = A(IA2(ROW)+COL-1) + KA (POINT_AR) = IA1(IA2(ROW)+COL-1) + POINT_AR = POINT_AR+1 + IF ((LARN.LT.POINT_AR).OR.(LKA.LT.POINT_AR)) + + THEN + IERROR = 60 + INT_VAL(1) = 11 + INT_VAL(2) = POINT_AR + INT_VAL(3) = LARN + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + ENDIF + ENDDO + + IF (CSR) GOTO 999 + + IF (LJA.LT.POINT_JA+COL) THEN + IERROR = 60 + INT_VAL(1) = 13 + INT_VAL(2) = POINT_JA + INT_VAL(3) = LJA + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + + JA(POINT_JA+COL) = POINT_AR + COL = COL+1 + ENDDO + + ENDIF + 999 CONTINUE + + POINT_JA = POINT_JA+COL-1 + + IF (LJA.LT.POINT_JA) THEN + IERROR = 60 + INT_VAL(1) = 13 + INT_VAL(2) = POINT_JA + INT_VAL(3) = LJA + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + + IA(3,BLOCK) = POINT_JA + +C ... Start CSR Format ... + +C ... For each row belonging to the block BLOCK ... + DO I = IA(1,BLOCK), IA(1,BLOCK+1)-1 + IF (IPERM(1).EQ.0) THEN + ROW = I + ELSE + ROW = WORK(I) + ENDIF + +C ... For each nnzero elements belonging to current row ... + DO J = IA2(ROW)+COL-1, IA2(ROW+1)-1 + ARN(POINT_AR) = A(J) + KA (POINT_AR) = IA1(J) + POINT_AR = POINT_AR+1 + IF ((LARN.LT.POINT_AR).OR.(LKA.LT.POINT_AR)) THEN + IERROR = 60 + INT_VAL(1) = 11 + INT_VAL(2) = POINT_AR + INT_VAL(3) = LARN + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + ENDDO + + POINT_JA = POINT_JA+1 + IF (LJA.LT.POINT_JA) THEN + IERROR = 60 + INT_VAL(1) = 13 + INT_VAL(2) = POINT_JA + INT_VAL(3) = LJA + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + JA(POINT_JA) = POINT_AR + ENDDO + ENDDO + + + + + ENDIF + IA(2,N_BLOCKS+1) = POINT_JA + KA(IP1 + ZERO_) = COUNT + + IF(POINT_AR.GE.IP1) THEN + SIZE_REQ=NNZ+COUNT + ELSE + SIZE_REQ=0 + ENDIF + infon(1)=point_ar-1 + + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + RETURN + + 9999 CONTINUE + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + + IF ( ERR_ACT .NE. 0 ) THEN + CALL FCPSB_SERROR() + RETURN + ENDIF + + RETURN + END + diff --git a/src/serial/dp/djadrp.f b/src/serial/dp/djadrp.f new file mode 100644 index 00000000..eec4c4aa --- /dev/null +++ b/src/serial/dp/djadrp.f @@ -0,0 +1,140 @@ +C +C Purpose +C ======= +C +C Performing column permutation of a sparse matrix in JAD format. +C +C Parameters +C ========== +C +C TRANS - CHARACTER*1 +C On entry TRANS specifies whether the routine will use +C matrix P or the transpose of P for the permutation as follows: +C TRANS = 'N' -> permute with matrix P +C TRANS = 'T' or 'C' -> permute the transpose of P +C Unchanged on exit. +C +C M - INTEGER +C On entry: number of rows of matrix A. +C Unchanged on exit. +C +C N - INTEGER +C On entry: number of columns of matrix A. +C Unchanged on exit. +C +C DESCRA - CHARACTER*5 array of DIMENSION (10) +C On entry DESCRA defines the format of the input sparse matrix. +C Unchanged on exit. +C +C IA1 - INTEGER array of dimension (*) +C On entry IA1 holds integer information on input sparse +C matrix. Actual information will depend on data format used. +C On exit contain integer information on permuted matrix. +C +C IA2 - INTEGER array of dimension (*) +C On entry IA2 holds integer information on input sparse +C matrix. Actual information will depend on data format used. +C On exit contain integer information on permuted matrix. +C +C INFOA - INTEGER array of dimension (10) +C On entry can hold auxiliary information on input matrices +C formats or environment of subsequent calls. +C Might be changed on exit. +C +C P - INTEGER array of dimension (M) +C On entry P specifies the column permutation of matrix A +C (P(1) == 0 if no permutation). +C Unchanged on exit. +C +C WORK - DOUBLE PRECISION array of dimension (LWORK) +C On entry: work area. +C On exit INT(WORK(1)) contains the minimum value +C for LWORK satisfying DSPRP memory requirements. +C +C LWORK - INTEGER +C On entry LWORK specifies the dimension of WORK +C Unchanged on exit. +C +C IERROR - INTEGER +C On exit IERROR contains the value of error flag as follows: +C IERROR = 0 no error +C IERROR > 0 warning +C IERROR < 0 fatal error +C WORK - DOUBLE PRECISION array of dimension (LWORK) +C Work area. +C +C LWORK - INTEGER +C On entry LWORK specifies the dimension of WORK. +C LWORK must be greater than zero. +C On exit LWORK is the maximum between the initial value and +C the minimum value satisfying DSPRP memory requirements. +C +C IERROR - INTEGER +C On exit IERROR contains the value of error flag as follows: +C IERROR = 0 no error +C IERROR = 4 error on dimension of vector WORK +C IERROR = 32 unknown flag TRANS +C IERROR = 64 LWORK <= 0 +C IERROR = 128 this data structure not yet considered +C +C Notes +C ===== +C It is not possible to call this subroutine with LWORK=0 to get +C the minimal value for LWORK. This functionality needs a better +C connection with DxxxMM +C +C + SUBROUTINE DJADRP(TRANS,M,N,DESCRA,JA,IA, + + P,WORK,LWORK,IERROR) + IMPLICIT NONE +C .. Scalar Arguments .. + INTEGER LWORK, M, N, IERROR + CHARACTER TRANS +C .. Array Arguments .. + DOUBLE PRECISION WORK(*) + INTEGER JA(*), IA(*), P(*) + CHARACTER DESCRA*11 +C .. Local Scalars .. + INTEGER PIA, PJA, PNG, IOFF +C .. Intrinsic Functions .. + INTRINSIC DBLE +c .. Local Arrays .. + CHARACTER*20 NAME + INTEGER INT_VAL(5) +C +C .. Executable Statements .. +C + NAME = 'DJADRP\0' + IERROR = 0 + CALL FCPSB_ERRACTIONSAVE(ERR_ACT) + + IOFF = 5 +C +C SET THE VALUES OF POINTERS TO VECTOR IAN2 AND AUX +C + PNG = IOFF + PIA = PNG + 1 + PJA = PIA + 3*(IA(PNG)+1) + + IF (DESCRA(1:1).EQ.'G') THEN + CALL DJADRP1(TRANS,M,N,DESCRA,IA(PNG), + + JA,IA(PIA),IA(PJA),P,WORK,LWORK*2) + ELSE + IERROR=3024 + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + RETURN + + 9999 CONTINUE + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + + IF ( ERR_ACT .NE. 0 ) THEN + CALL FCPSB_SERROR() + RETURN + ENDIF + + RETURN + END diff --git a/src/serial/dp/djadrp1.f b/src/serial/dp/djadrp1.f new file mode 100644 index 00000000..a874d47b --- /dev/null +++ b/src/serial/dp/djadrp1.f @@ -0,0 +1,170 @@ +C +C Purpose +C ======= +C +C Performing column permutation of a sparse matrix in JAD format. +C +C Parameters +C ========== +C +C TRANS - CHARACTER*1 +C On entry TRANS specifies whether the routine will use +C matrix P or the transpose of P for the permutation as follows: +C TRANS = 'N' -> permute with matrix P +C TRANS = 'T' or 'C' -> permute the transpose of P +C Unchanged on exit. +C +C M - INTEGER +C On entry: number of rows of matrix A. +C Unchanged on exit. +C +C N - INTEGER +C On entry: number of columns of matrix A. +C Unchanged on exit. +C +C DESCRA - CHARACTER*5 array of DIMENSION (10) +C On entry DESCRA defines the format of the input sparse matrix. +C Unchanged on exit. +C +C IA1 - INTEGER array of dimension (*) +C On entry IA1 holds integer information on input sparse +C matrix. Actual information will depend on data format used. +C On exit contain integer information on permuted matrix. +C +C IA2 - INTEGER array of dimension (*) +C On entry IA2 holds integer information on input sparse +C matrix. Actual information will depend on data format used. +C On exit contain integer information on permuted matrix. +C +C INFOA - INTEGER array of dimension (10) +C On entry can hold auxiliary information on input matrices +C formats or environment of subsequent calls. +C Might be changed on exit. +C +C P - INTEGER array of dimension (M) +C On entry P specifies the column permutation of matrix A +C (P(1) == 0 if no permutation). +C Unchanged on exit. +C +C IWORK - INTEGER array of dimension (LWORK) +C On entry: work area. +C On exit INT(WORK(1)) contains the minimum value +C for LWORK satisfying DSPRP memory requirements. +C +C LWORK - INTEGER +C On entry LWORK specifies the dimension of WORK +C Unchanged on exit. +C +C IERROR - INTEGER +C On exit IERROR contains the value of error flag as follows: +C IERROR = 0 no error +C IERROR = 4 error on dimension of vector WORK +C IERROR = 32 unknown flag TRANS +C IERROR = 64 LWORK <= 0 +C IERROR = 128 this data structure not yet considered +C +C Notes +C ===== +C It is not possible to call this subroutine with LWORK=0 to get +C the minimal value for LWORK. This functionality needs a better +C connection with DxxxMM +C +C + SUBROUTINE DJADRP1(TRANS,M,N,DESCRA,NG,KA,IA,JA, + + P,IWORK,LWORK,IERROR) + IMPLICIT NONE +C .. Scalar Arguments .. + INTEGER LWORK,M, N, NG, IERROR + CHARACTER TRANS +C .. Array Arguments .. + INTEGER KA(*), JA(*), IA(3,*), P(*), IWORK(LWORK) + CHARACTER DESCRA*11 +C .. Local Scalars .. + INTEGER I, K, IPG +C .. Intrinsic Functions .. + INTRINSIC DBLE + LOGICAL DEBUG + PARAMETER (DEBUG=.FALSE.) +C .. Local Arrays .. + CHARACTER*20 NAME + INTEGER INT_VAL(5) +C +C .. Executable Statements .. +C + NAME = 'DJADRP\0' + IERROR = 0 + CALL FCPSB_ERRACTIONSAVE(ERR_ACT) + + IF(TRANS.EQ.'N') THEN + IF (DEBUG) WRITE(0,*)'DJADRP1:',NG + DO IPG = 1, NG + DO K = IA(2,IPG), IA(3,IPG)-1 + DO I = JA(K), JA(K+1) - 1 + KA(I) = P(KA(I)) + ENDDO + ENDDO +C Permute CSR + + DO K = IA(3,IPG), IA(2,IPG+1)-1 + DO I = JA(K), JA(K+1) - 1 + KA(I) = P(KA(I)) + ENDDO + ENDDO + ENDDO + + IWORK(1) = 0 + ELSE IF(TRANS.EQ.'T') THEN +C +C LWORK refers here to INTEGER IWORK (alias for WORK) +C + IF(LWORK.LT.M) THEN + IERROR = 60 + INT_VAL(1) = 11 + INT_VAL(2) = M + INT_VAL(3) = LWORK + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF +C +C Transpose permutation matrix +C + DO 20 I=1,N + IWORK(P(I)) = I + 20 CONTINUE +C +C Permute columns +C + DO IPG = 1, NG + DO K = IA(2,IPG), IA(3,IPG)-1 + DO I = JA(K), JA(K+1) - 1 + KA(I) = IWORK(KA(I)) + ENDDO + ENDDO +C Permute CSR + + DO K = IA(3,IPG), IA(2,IPG+1)-1 + DO I = JA(K), JA(K+1) - 1 + KA(I) = IWORK(KA(I)) + ENDDO + ENDDO + ENDDO +C +C WORK(1) refers here to a value for a DOUBLE PRECISION WORK +C + IWORK(1) = M+1 + ENDIF + + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + RETURN + + 9999 CONTINUE + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + + IF ( ERR_ACT .NE. 0 ) THEN + CALL FCPSB_SERROR() + RETURN + ENDIF + + RETURN + END + diff --git a/src/serial/dp/djdco.f b/src/serial/dp/djdco.f new file mode 100644 index 00000000..c4af5776 --- /dev/null +++ b/src/serial/dp/djdco.f @@ -0,0 +1,57 @@ + SUBROUTINE DJDCO(TRANS,M,N,DESCRA,AR,IA1,IA2,IPERM,INFO, + * IP1,DESCRN,ARN,IA1N,IA2N,INFON,IP2,LARN,LIA1N, + * LIA2N,AUX,LAUX,IERROR) + IMPLICIT NONE + INCLUDE 'sparker.fh' +C +C .. Scalar Arguments .. + INTEGER LARN, LAUX, LIA1N, LIA2N, M, N, IERROR + CHARACTER TRANS +C .. Array Arguments .. + DOUBLE PRECISION AR(*), ARN(*) + INTEGER AUX(0:LAUX-1),IPERM(*) + INTEGER IA1(*), IA2(*), INFO(*), IA1N(*), + * IA2N(*), INFON(*), IP1(*), IP2(*) + CHARACTER DESCRA*11, DESCRN*11 +C .. Local Scalars .. + INTEGER PIA, PJA, PNG + logical debug + parameter (debug=.false.) +c .. Local Arrays .. + CHARACTER*20 NAME + INTEGER INT_VAL(5) + + NAME = 'DJDCO\0' + IERROR = 0 + CALL FCPSB_ERRACTIONSAVE(ERR_ACT) + + PNG = IA2(1) + PIA = IA2(2) + PJA = IA2(3) + + if(debug) write(*,*) 'On entry to DJDCO: NNZ LAUX ', + + info(1),laux,larn,lia1n,lia2n + + CALL DJDCOX(TRANS,M,N,DESCRA,AR,IA2(PIA),IA2(PJA), + * IA1,IA2(PNG),IPERM, INFO, IP1,DESCRN,ARN,IA1N,IA2N,INFON, + * IP2,LARN,LIA1N, LIA2N,AUX,LAUX,IERROR) + IF(IERROR.NE.0) THEN + IERROR=4011 + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + END IF + + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + RETURN + + 9999 CONTINUE + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + + IF ( ERR_ACT .NE. 0 ) THEN + CALL FCPSB_SERROR() + RETURN + ENDIF + + RETURN + END + diff --git a/src/serial/dp/djdcox.f b/src/serial/dp/djdcox.f new file mode 100755 index 00000000..9dd20ea9 --- /dev/null +++ b/src/serial/dp/djdcox.f @@ -0,0 +1,168 @@ + + +C Covert matrix from JAD format to COO Format +C + SUBROUTINE DJDCOX(TRANS,M,N,DESCRA,AR,IA,JA,KA,NG,IPERM,INFO, + * IP1,DESCRN,ARN,IA1N,IA2N,INFON,IP2,LARN,LIA1N, + * LIA2N,AUX,LAUX,IERROR) + + IMPLICIT NONE + INCLUDE 'sparker.fh' + +C +C .. Scalar Arguments .. + INTEGER NG, LARN, LAUX, LIA1N, LIA2N, M, N, IERROR + CHARACTER TRANS,UNITD +C .. Array Arguments .. + DOUBLE PRECISION AR(*), ARN(*) + INTEGER AUX(0:LAUX/2-1),IPERM(*) + INTEGER IA(3,*), JA(*), KA(*), INFO(*), IA1N(*), + * IA2N(*), INFON(*), IP1(*), IP2(*) + CHARACTER DESCRA*11, DESCRN*11 +C .. Local Scalars .. + INTEGER IPX, IPG, NNZ, K, ROW, + * I, J, NZL, IRET + LOGICAL SCALE + logical debug + parameter (debug=.false.) +c .. Local Arrays .. + CHARACTER*20 NAME + INTEGER INT_VAL(5) + +C +C .. Executable Statements .. +C + NAME = 'DJDCOX\0' + IERROR = 0 + CALL FCPSB_ERRACTIONSAVE(ERR_ACT) + + IF (TRANS.EQ.'N') THEN +C SCALE = (UNITD.EQ.'L') ! meaningless + IP1(1) = 0 + IP2(1) = 0 + + IF (IPERM(1).NE.0) THEN + DO I = 1, M + AUX(IPERM(I)) = I + ENDDO + ENDIF + + NNZ = JA(IA(2,NG+1)-1 +1)-1 + + if (debug) then + write(0,*) 'On entry to DJDCOX: NNZ LAUX ', + + nnz,laux,larn,lia1n,lia2n + endif + IF (LAUX.LT.NNZ+2) THEN + IERROR = 60 + INT_VAL(1) = 23 + INT_VAL(2) = NNZ+2 + INT_VAL(3) = LAUX + ELSE IF (LARN.LT.NNZ) THEN + IERROR = 60 + INT_VAL(1) = 19 + INT_VAL(2) = NNZ+2 + INT_VAL(3) = LAUX + ELSE IF (LIA1N.LT.NNZ) THEN + IERROR = 60 + INT_VAL(1) = 20 + INT_VAL(2) = NNZ+2 + INT_VAL(3) = LAUX + ELSE IF (LIA2N.LT.NNZ) THEN + IERROR = 60 + INT_VAL(1) = 21 + INT_VAL(2) = NNZ+2 + INT_VAL(3) = LAUX + ENDIF + + IF (DESCRA(1:1).EQ.'G') THEN + + DO 200 IPG = 1, NG + DO 50 K = IA(2,IPG), IA(3,IPG)-1 + IPX = IA(1,IPG) + DO 40 I = JA(K), JA(K+1) - 1 + ARN(I) = AR(I) + IA1N(I) = AUX(IPX) + IA2N(I) = KA(I) + IPX = IPX + 1 + 40 CONTINUE + 50 CONTINUE + + IPX = IA(1,IPG) + DO 70 K = IA(3,IPG), IA(2,IPG+1)-1 + DO 60 I = JA(K), JA(K+1) - 1 + ARN(I) = AR(I) + IA1N(I) = AUX(IPX) + IA2N(I) = KA(I) + 60 CONTINUE + IPX = IPX + 1 + 70 CONTINUE + 200 CONTINUE + + + +C .... Order with key IA1N.... + CALL MRGSRT(NNZ,IA1N,AUX,IRET) + IF (IRET.EQ.0) CALL REORDVN(NNZ,ARN,IA1N,IA2N,AUX) + +C .... Order with key IA2N ... + I = 1 + J = I + DO WHILE (I.LE.NNZ) + DO WHILE ((IA1N(J).EQ.IA1N(I)).AND. + + (J.LE.NNZ)) + J = J+1 + ENDDO + NZL = J - I + CALL MRGSRT(NZL,IA2N(I),AUX,IRET) + IF (IRET.EQ.0) CALL REORDVN(NZL,ARN(I),IA1N(I),IA2N(I), + + AUX) + I = J + ENDDO + INFON(1)=nnz + + ELSE IF (DESCRA(1:1).EQ.'S' .AND. DESCRA(2:2).EQ.'U') THEN + + DO 20 K = 1, M + IP2(K) = K + 20 CONTINUE + + ELSE IF (DESCRA(1:1).EQ.'T' .AND. DESCRA(2:2).EQ.'U') THEN + + ELSE IF (DESCRA(1:1).EQ.'T' .AND. DESCRA(2:2).EQ.'L') THEN + + END IF +C + ELSE IF (TRANS.NE.'N') THEN +C +C TO DO +C + IERROR = 3021 + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + + + END IF + + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + RETURN + + 9999 CONTINUE + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + + IF ( ERR_ACT .NE. 0 ) THEN + CALL FCPSB_SERROR() + RETURN + ENDIF + + RETURN + END + + + + + + + + + diff --git a/src/serial/dp/dvtfg.f b/src/serial/dp/dvtfg.f new file mode 100644 index 00000000..c9832474 --- /dev/null +++ b/src/serial/dp/dvtfg.f @@ -0,0 +1,164 @@ +C +C Compute level numbers for the triangular matrix. +C + SUBROUTINE DVTFG (UPLO,M,JA,IA,NG,IPA,IPAT,KLEN,IWORK1,IWORK2, + * IWORK3) +C .. Scalar Arguments .. + INTEGER M, NG + CHARACTER UPLO +C .. Array Arguments .. + INTEGER IA(*), IPA(*), IPAT(*), IWORK3(*), JA(*), + * KLEN(*), IWORK2(*), IWORK1(*) +C .. Local Scalars .. + INTEGER I, J, L, L0, L1, LEV, NP, iret +C .. Intrinsic Functions .. + INTRINSIC MAX + logical debug + parameter (debug=.false.) +C .. Executable Statements .. +C + NG = 0 +C +C CHECK ON THE NUMBERS OF THE ELEMENTS OF THE MATRIX +C + IF ((IA(M+1)-1).EQ.0) THEN +C +C THE MATRIX HASN'T ELEMENTS +C THE OUTPUT PERMUTATIONS ARE POSED TO THE IDENTITY MATRIX +C + DO 20 I = 1, M + IPA(I) = I + IPAT(I) = I + KLEN(I) = 0 + 20 CONTINUE + ELSE +C +C COMPUTE LEVEL NUMBER FOR EACH ROWS +C +C IWORK1: AUXILIARY VECTOR WHICH CONTAINS +C LEVEL NUMBERS OF EACH ROW +C + DO 40 I = 1, M + IWORK1(I) = 0 + IWORK3(I) = 0 + 40 CONTINUE + IF (UPLO.EQ.'L') THEN +C +C LOWER TRIANGULAR SPARSE MATRIX +C + DO 80 I = 1, M + IWORK1(I) = 1 + DO 60 J = IA(I), IA(I+1) - 1 + IWORK1(I) = MAX(IWORK1(I),IWORK1(JA(J))+1) + 60 CONTINUE + 80 CONTINUE + ELSE IF (UPLO.EQ.'U') THEN +C +C UPPER TRIANGULAR SPARSE MATRIX +C + DO 120 I = M, 1, -1 + IWORK1(I) = 1 + DO 100 J = IA(I), IA(I+1) - 1 + IWORK1(I) = MAX(IWORK1(I),IWORK1(JA(J))+1) + 100 CONTINUE + 120 CONTINUE + END IF +C +C COUNT NUMBER OF ROWS IN EACH EQUIVALENCE GROUPS +C +C NOTE: GROUP = SET OF ROWS HAVING THE SAME LEVEL NUMBER +C +C IWORK3: AUXILIARY VECTOR WHICH CONTAINS +C THE NUMBER OF ROWS FOR EACH GROUPS +C + DO 140 I = 1, M + IWORK3(IWORK1(I)) = IWORK3(IWORK1(I)) + 1 + 140 CONTINUE +C +C SET UP IWORK2: +C IWORK2(I) POINTS TO THE BEGINNING OF I-TH GROUP +C + IWORK2(1) = 1 + DO 160 I = 2, M + 1 + IWORK2(I) = IWORK3(I-1) + IWORK2(I-1) + IF (IWORK2(I).EQ.M+1) THEN + NG = I - 1 + GO TO 180 + END IF + 160 CONTINUE + 180 CONTINUE +C +C NG : TOTAL NUMBER OF LEVELS +C IWORK3: VECTOR CONTAINING THE NUMBER OF THE ROWS SORTED BY +C EQUIVALENCE GROUPS. +C + DO 200 I = 1, M + IWORK3(IWORK2(IWORK1(I))) = I + IWORK2(IWORK1(I)) = IWORK2(IWORK1(I)) + 1 + 200 CONTINUE +C +C REGENERATE IWORK2: POINTER INTO IWORK3 FOR EQUIVALENCE GROUPS +C + DO 220 I = NG + 1, 2, -1 + IWORK2(I) = IWORK2(I-1) + 220 CONTINUE + IWORK2(1) = 1 +C +C IWORK1: ROWS LENGTH IN NEW ORDERING +C + DO 240 L = 1, M + IWORK1(L) = IA(IWORK3(L)) - IA(IWORK3(L)+1) + 240 CONTINUE +C +C SORT ROWS BY DECREASING NUMBER OF NONZERO ELEMENTS. +C +C IPA: VECTOR OF NUMBER OF ROWS SORTED BY EQUIVALENCE +C GROUPS AND BY ROW LENGTH +C IPAT: AUXILIARY VECTOR NEED TO THE SORTER ROUTINES +C + L1 = IWORK2(2) - IWORK2(1) + DO 260 L = 1, L1 + IPA(L) = IWORK3(L) + 260 CONTINUE + if (debug) write(0,*) 'DVTFG: Group ',1,':',(ipa(l),l=1,l1) + DO 360 LEV = 2, NG +C +C LOOP ON GROUPS +C L1: LENGTH OF CURRENT GROUP +C L0: POINTER TO IPA TO THE FIRST LOCATIONS RESERVED +C FOR CURRENT GROUP +C + L1 = IWORK2(LEV+1) - IWORK2(LEV) + L0 = IWORK2(LEV) - 1 + CALL MRGSRT(L1,IWORK1(IWORK2(LEV)),IPAT,IRET) + IF (IRET.EQ.0) THEN + NP = IPAT(1) + DO 280 L = 1, L1 + IPA(L0+L) = IWORK3(L0+NP) + NP = IPAT(1+NP) + 280 CONTINUE + ELSE +C +C VECTOR ALREADY SORTED. NO CHANGE IS NEED +C + DO 320 L = 1, L1 + IPA(L0+L) = IWORK3(L0+L) + 320 CONTINUE + ENDIF + if (debug) write(0,*) 'DVTFG: Group ',lev, + + ':',(ipa(l0+l),l=1,l1) + 360 CONTINUE +C +C IPAT = IPA-1 +C + DO 380 I = 1, M + IPAT(IPA(I)) = I + 380 CONTINUE +C DO 400 I = 1, NG +C KLEN(I) = IWORK2(I+1) - IWORK2(I) + DO 400 I = 1, NG+1 + KLEN(I) = IWORK2(I) + 400 CONTINUE + END IF + RETURN + END diff --git a/src/serial/dp/gen_block.f b/src/serial/dp/gen_block.f new file mode 100644 index 00000000..f9c50bfa --- /dev/null +++ b/src/serial/dp/gen_block.f @@ -0,0 +1,40 @@ + SUBROUTINE GEN_BLOCK(M,NG,IA,AUX) + IMPLICIT NONE + + INCLUDE 'sparker.fh' + INTEGER M, NG + INTEGER IA(3,*), AUX(*) + + INTEGER BLOCK, I, N_ROWS + + N_ROWS = IA(1,2) - IA(1,1) + I = 2 + BLOCK = 2 + AUX(1) = 1 + + DO WHILE(.TRUE.) + IF (N_ROWS.GT.MAXJDROWS) THEN + AUX(BLOCK) = AUX(BLOCK-1)+MAXJDROWS + N_ROWS = N_ROWS-MAXJDROWS + BLOCK = BLOCK+1 + ELSE IF (N_ROWS.GT.0) THEN + AUX(BLOCK) = AUX(BLOCK-1)+N_ROWS + N_ROWS = 0 + BLOCK = BLOCK+1 + ELSE IF (I.LE.NG) THEN + N_ROWS = IA(1,I+1) - IA(1,I) + I = I+1 + ELSE + GOTO 998 + ENDIF + ENDDO + 998 CONTINUE + +C ... Copy AUX in IA(1,*) + + NG = BLOCK - 2 + DO I = 1, NG+1 + IA(1,I) = AUX(I) + ENDDO + + END diff --git a/src/serial/dp/partition.f b/src/serial/dp/partition.f new file mode 100644 index 00000000..8faf7aff --- /dev/null +++ b/src/serial/dp/partition.f @@ -0,0 +1,60 @@ + SUBROUTINE PARTITION(M, WORK, IA, N_BLOCK) + IMPLICIT NONE + + INCLUDE 'sparker.fh' + +C ...Scalar arguments... + + INTEGER M, N_BLOCK + +C ...Array arguments... + + INTEGER IA(3,*), WORK(*) + +C ...Local scalars... + + INTEGER I, NNZ_ROW, N_ROWS, N_ROWS_EQ, BLOCK + + I = 1 + N_ROWS = 0 + BLOCK = 2 + + WORK(M+1) = 1 + + IA(1,1) = 1 + + DO WHILE(.TRUE.) + IF (N_ROWS.GT.MAXJDROWS) THEN + IA(1,BLOCK) = IA(1,BLOCK-1)+MAXJDROWS + N_ROWS = N_ROWS-MAXJDROWS + BLOCK = BLOCK+1 + ELSE IF (N_ROWS.GE.MINJDROWS) THEN + IA(1,BLOCK) = IA(1,BLOCK-1)+N_ROWS + N_ROWS = 0 + BLOCK = BLOCK+1 + ELSE IF (I.LE.M) THEN + N_ROWS_EQ = 0 + NNZ_ROW = -WORK(I) + DO WHILE (NNZ_ROW.EQ.-WORK(I)) + N_ROWS_EQ = N_ROWS_EQ+1 + I=I+1 + ENDDO + N_ROWS = N_ROWS + N_ROWS_EQ + ELSE IF (N_ROWS.NE.0) THEN ! (I.GT.M) + IA(1,BLOCK) = IA(1,BLOCK-1)+N_ROWS + BLOCK = BLOCK+1 + GOTO 998 + ELSE + GOTO 998 + ENDIF + ENDDO + 998 CONTINUE + + N_BLOCK = BLOCK - 2 + + if (ia(1,n_block+1)-1 .ne. m) then + write(0,*) 'PARTITION: Something wrong',m, + + n_block,ia(1,n_block+1),ia(1,n_block) + endif + END + diff --git a/src/serial/dp/reordvn.f b/src/serial/dp/reordvn.f new file mode 100644 index 00000000..86948cd2 --- /dev/null +++ b/src/serial/dp/reordvn.f @@ -0,0 +1,175 @@ + subroutine reordvn(nnz,ar,ia1,ia2,idx) + integer nnz + integer ia1(*),ia2(*),idx(0:*) + double precision ar(*) + integer lp, kk, swapia1, swapia2, lswap + double precision swapar + + LP = IDX(0) + KK = 1 + 500 CONTINUE + IF ((LP.EQ.0).OR.(KK.GT.NNZ)) GOTO 800 + 600 CONTINUE + IF (LP.GE.KK) GOTO 700 + LP = IDX(LP) + GOTO 600 + 700 CONTINUE +C ... Swap of vectors IA2, IA1, AR ... + SWAPIA2 = IA2(KK) + SWAPIA1 = IA1(KK) + SWAPAR = AR(KK) + IA2(KK) = IA2(LP) + IA1(KK) = IA1(LP) + AR(KK) = AR(LP) + IA2(LP) = SWAPIA2 + IA1(LP) = SWAPIA1 + AR(LP) = SWAPAR + LSWAP = IDX(LP) + IDX(LP) = IDX(KK) + IDX(KK) = LP + LP = LSWAP + KK = KK+1 + GOTO 500 + 800 CONTINUE + return + end + subroutine ireordv1(nnz,ia1,idx) + integer nnz + integer ia1(*),idx(0:*) + integer lp, kk, swapia1, lswap + + LP = IDX(0) + KK = 1 + 500 CONTINUE + IF ((LP.EQ.0).OR.(KK.GT.NNZ)) GOTO 800 + 600 CONTINUE + IF (LP.GE.KK) GOTO 700 + LP = IDX(LP) + GOTO 600 + 700 CONTINUE +C ... Swap of vectors IA2, IA1, AR ... + SWAPIA1 = IA1(KK) + IA1(KK) = IA1(LP) + IA1(LP) = SWAPIA1 + LSWAP = IDX(LP) + IDX(LP) = IDX(KK) + IDX(KK) = LP + LP = LSWAP + KK = KK+1 + GOTO 500 + 800 CONTINUE + return + end + subroutine reordvn3(nnz,ar,ia1,ia2,ia3,idx) + integer nnz + integer ia1(*),ia2(*),ia3(*),idx(0:*) + double precision ar(*) + integer lp, kk, swapia1, swapia2, swapia3,lswap + double precision swapar + + LP = IDX(0) + KK = 1 + 500 CONTINUE + IF ((LP.EQ.0).OR.(KK.GT.NNZ)) GOTO 800 + 600 CONTINUE + IF (LP.GE.KK) GOTO 700 + LP = IDX(LP) + GOTO 600 + 700 CONTINUE +C ... Swap of vectors IA2, IA1, AR ... + SWAPIA3 = IA3(KK) + SWAPIA2 = IA2(KK) + SWAPIA1 = IA1(KK) + SWAPAR = AR(KK) + IA3(KK) = IA3(LP) + IA2(KK) = IA2(LP) + IA1(KK) = IA1(LP) + AR(KK) = AR(LP) + IA3(LP) = SWAPIA3 + IA2(LP) = SWAPIA2 + IA1(LP) = SWAPIA1 + AR(LP) = SWAPAR + LSWAP = IDX(LP) + IDX(LP) = IDX(KK) + IDX(KK) = LP + LP = LSWAP + KK = KK+1 + GOTO 500 + 800 CONTINUE + return + end + + subroutine zreordvn(nnz,ar,ia1,ia2,idx) + integer nnz + integer ia1(*),ia2(*),idx(0:*) + complex*16 ar(*) + integer lp, kk, swapia1, swapia2, lswap + complex*16 swapar + + LP = IDX(0) + KK = 1 + 500 CONTINUE + IF ((LP.EQ.0).OR.(KK.GT.NNZ)) GOTO 800 + 600 CONTINUE + IF (LP.GE.KK) GOTO 700 + LP = IDX(LP) + GOTO 600 + 700 CONTINUE +C ... Swap of vectors IA2, IA1, AR ... + SWAPIA2 = IA2(KK) + SWAPIA1 = IA1(KK) + SWAPAR = AR(KK) + IA2(KK) = IA2(LP) + IA1(KK) = IA1(LP) + AR(KK) = AR(LP) + IA2(LP) = SWAPIA2 + IA1(LP) = SWAPIA1 + AR(LP) = SWAPAR + LSWAP = IDX(LP) + IDX(LP) = IDX(KK) + IDX(KK) = LP + LP = LSWAP + KK = KK+1 + GOTO 500 + 800 CONTINUE + return + end + subroutine zreordvn3(nnz,ar,ia1,ia2,ia3,idx) + integer nnz + integer ia1(*),ia2(*),ia3(*),idx(0:*) + complex*16 ar(*) + integer lp, kk, swapia1, swapia2, swapia3,lswap + complex*16 swapar + + LP = IDX(0) + KK = 1 + 500 CONTINUE + IF ((LP.EQ.0).OR.(KK.GT.NNZ)) GOTO 800 + 600 CONTINUE + IF (LP.GE.KK) GOTO 700 + LP = IDX(LP) + GOTO 600 + 700 CONTINUE +C ... Swap of vectors IA2, IA1, AR ... + SWAPIA3 = IA3(KK) + SWAPIA2 = IA2(KK) + SWAPIA1 = IA1(KK) + SWAPAR = AR(KK) + IA3(KK) = IA3(LP) + IA2(KK) = IA2(LP) + IA1(KK) = IA1(LP) + AR(KK) = AR(LP) + IA3(LP) = SWAPIA3 + IA2(LP) = SWAPIA2 + IA1(LP) = SWAPIA1 + AR(LP) = SWAPAR + LSWAP = IDX(LP) + IDX(LP) = IDX(KK) + IDX(KK) = LP + LP = LSWAP + KK = KK+1 + GOTO 500 + 800 CONTINUE + return + end + diff --git a/src/serial/f77/daxpby.f b/src/serial/f77/daxpby.f new file mode 100644 index 00000000..45d13359 --- /dev/null +++ b/src/serial/f77/daxpby.f @@ -0,0 +1,169 @@ + subroutine daxpby(m, n, alpha, X, lldx, beta, Y, lldy, info) + double precision one, zero + parameter (one=1.d0,zero=0.d0) + integer n, m, lldx, lldy, info + double precision X(lldx,*), Y(lldy,*) + double precision alpha, beta + integer i, j + integer int_err(5) + double precision real_err(5) + character name*20 + name='daxpby' + + +C +C Error handling +C + info = 0 + if (m.lt.0) then + info=10 + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (n.lt.0) then + info=10 + int_err(1)=1 + int_err(2)=n + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (lldx.lt.max(1,m)) then + info=50 + int_err(1)=5 + int_err(2)=1 + int_err(3)=lldx + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (lldy.lt.max(1,m)) then + info=50 + int_err(1)=8 + int_err(2)=1 + int_err(3)=lldy + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if (alpha.eq.zero) then + if (beta.eq.zero) then + do j=1, n + do i=1,m + y(i,j) = zero + enddo + enddo + else if (beta.eq.one) then +c$$$ +c$$$ Do nothing! +c$$$ + + else if (beta.eq.-one) then + do j=1,n + do i=1,m + y(i,j) = - y(i,j) + enddo + enddo + else + do j=1,n + do i=1,m + y(i,j) = beta*y(i,j) + enddo + enddo + endif + + else if (alpha.eq.one) then + + if (beta.eq.zero) then + do j=1,n + do i=1,m + y(i,j) = x(i,j) + enddo + enddo + else if (beta.eq.one) then + do j=1,n + do i=1,m + y(i,j) = x(i,j) + y(i,j) + enddo + enddo + + else if (beta.eq.-one) then + do j=1,n + do i=1,m + y(i,j) = x(i,j) - y(i,j) + enddo + enddo + else + do j=1,n + do i=1,m + y(i,j) = x(i,j) + beta*y(i,j) + enddo + enddo + endif + + else if (alpha.eq.-one) then + + if (beta.eq.zero) then + do j=1,n + do i=1,m + y(i,j) = -x(i,j) + enddo + enddo + else if (beta.eq.one) then + do j=1,n + do i=1,m + y(i,j) = -x(i,j) + y(i,j) + enddo + enddo + + else if (beta.eq.-one) then + do j=1,n + do i=1,m + y(i,j) = -x(i,j) - y(i,j) + enddo + enddo + else + do j=1,n + do i=1,m + y(i,j) = -x(i,j) + beta*y(i,j) + enddo + enddo + endif + + else + + if (beta.eq.zero) then + do j=1,n + do i=1,m + y(i,j) = alpha*x(i,j) + enddo + enddo + else if (beta.eq.one) then + do j=1,n + do i=1,m + y(i,j) = alpha*x(i,j) + y(i,j) + enddo + enddo + + else if (beta.eq.-one) then + do j=1,n + do i=1,m + y(i,j) = alpha*x(i,j) - y(i,j) + enddo + enddo + else + do j=1,n + do i=1,m + y(i,j) = alpha*x(i,j) + beta*y(i,j) + enddo + enddo + endif + + endif + + return + + 9999 continue + call fcpsb_serror() + return + + end diff --git a/src/serial/f77/dcsmm.f b/src/serial/f77/dcsmm.f new file mode 100644 index 00000000..3e9434a4 --- /dev/null +++ b/src/serial/f77/dcsmm.f @@ -0,0 +1,345 @@ +C SUBROUTINE DCSMM(TRANS,M,N,K,ALPHA,PL,FIDA,DESCRA,A,IA1,IA2, +C INFOA,PR,B,LDB,BETA,C,LDC,WORK,LWORK,IERROR) +C Purpose +C ======= +C +C Computing matrix-matrix product +C C <-- ALPHA PL A PR B + BETA C or +C C <-- ALPHA PL At PR B + BETA C +C +C Parameters +C ========== +C +C TRANS - CHARACTER*1 +C On entry TRANS specifies if the routine operates with matrix A +C or with the transpose of A as follows: +C TRANS = 'N' -> use matrix A +C TRANS = 'T' or 'C' -> use A' (transpose of matrix A) +C Unchanged on exit. +C +C M - INTEGER +C On entry: number of rows of matrix A (A') and +C number of rows of matrix C +C Unchanged on exit. +C +C N - INTEGER +C On entry: number of columns of matrix B +C and number of columns of matrix C. +C Unchanged on exit. +C +C K - INTEGER +C On entry: number of columns of matrix A (A') and +C number of rows of matrix B +C Unchanged on exit. +C +C ALPHA - DOUBLE PRECISION +C On entry: multiplicative constant. +C Unchanged on exit. +C +C PL - INTEGER array of dimension (M) +C On entry PL specifies the row permutation of matrix A +C (PL(1) == 0 if no permutation). +C Unchanged on exit. +C +C FIDA - CHARACTER*5 +C On entry FIDA defines the format of the input sparse matrix. +C Unchanged on exit. +C +C DESCRA - CHARACTER*1 array of DIMENSION (9) +C On entry DESCRA describes the characteristics of the input +C sparse matrix. +C Unchanged on exit. +C +C A - DOUBLE PRECISION array of DIMENSION (*) +C On entry A specifies the values of the input sparse +C matrix. +C Unchanged on exit. +C +C IA1 - INTEGER array of dimension (*) +C On entry IA1 holds integer information on input sparse +C matrix. Actual information will depend on data format used. +C Unchanged on exit. +C +C IA2 - INTEGER array of dimension (*) +C On entry IA2 holds integer information on input sparse +C matrix. Actual information will depend on data format used. +C Unchanged on exit. +C +C INFOA - INTEGER array of length 10. +C On entry can hold auxiliary information on input matrices +C formats or environment of subsequent calls. +C Might be changed on exit. +C +C PR - INTEGER array of dimension (K) +C On entry PR specifies the column permutation of matrix A +C (PR(1) == 0 if no permutation). +C Unchanged on exit. +C +C B - DOUBLE PRECISION matrix of dimension (LDB,*) +C On entry: dense matrix. +C Unchanged on exit. +C +C LDB - INTEGER +C On entry: leading dimension of B +C Unchanged on exit. +C +C BETA - DOUBLE PRECISION +C On entry: multiplicative constant. +C Unchanged on exit. +C +C C - DOUBLE PRECISION matrix of dimension (LDC,*) +C On entry: dense matrix. +C On exit is updated with the matrix-matrix product. +C +C LDC - INTEGER +C On entry: leading dimension of C +C Unchanged on exit. +C +C WORK - DOUBLE PRECISION array of dimension (LWORK) +C On entry: work area. +C On exit INT(WORK(1)) contains the minimum value +C for LWORK satisfying DCSMM memory requirements. +C +C LWORK - INTEGER +C On entry LWORK specifies the dimension of WORK +C LWORK should be set as follows: +C LWORK = (LWORK for DxxxMM) + Pr*K*N + Pl*M*N +C where Pr (Pl) = 1 if right (left) permutation has to +C be performed, 0 otherwise. +C Unchanged on exit. +C +C IERROR - INTEGER +C On exit IERROR contains the value of error flag as follows: +C IERROR = 0 no error +C IERROR > 0 warning +C IERROR < 0 fatal error +C +C Local Variables +C =============== +C +C LWORKM - INTEGER +C Minimum work area dimension for DCSMM +C +C LWORKB - INTEGER +C Work area dimension for matrix B in subroutine DLPUPD +C +C LWORKC - INTEGER +C Work area dimension for matrix C in subroutine DLPUPD +C +C LWORKS - INTEGER +C Work area dimension for subroutine DSWMM +C +C P - INTEGER +C Pointer to work area +C +C LP - LOGICAL +C LP is true if left permutation is required +C +C RP - LOGICAL +C RP is true if right permutation is required +C +C Notes +C ===== +C Some tests have shown that it is more efficient to divide the +C sparse matrix-dense matrix multiplication step and the dense +C matrix permutation step, and it is more efficient to put +C together the left permutation and update (C <- xxx + BETA C) +C steps. So, the sequence of operations is: +C Right permutation DLPUPD +C Matrix-Matrix product DSWMM +C Left permutation and update DLPUPD +C In order to avoid useless memory transfer, the above scheme is +C simplified according to whether right and left permutation have to +C be performed. If left permutation is not required, the update step +C is performed in the sparse matrix-dense matrix multiplication kernel. +C +C It is not possible to call this subroutine with LWORK=0 to get # +C the minimal value for LWORK. This functionality needs a better # +C connection with DxxxMM # +C +C + SUBROUTINE DCSMM(TRANS,M,N,K,ALPHA,PL,FIDA,DESCRA,A,IA1,IA2, + & INFOA,PR,B,LDB,BETA,C,LDC,WORK,LWORK,IERROR) + IMPLICIT NONE +C .. Scalar Arguments .. + INTEGER M,N,K,LDB,LDC,LWORK, IERROR + CHARACTER TRANS + DOUBLE PRECISION ALPHA,BETA +C .. Array Arguments .. + INTEGER IA1(*),IA2(*),INFOA(*),PL(*),PR(*) + CHARACTER DESCRA*11, FIDA*5 + DOUBLE PRECISION A(*),B(LDB,*),C(LDC,*),WORK(*) +C .. Local Scalars .. + INTEGER LWORKM, LWORKB, LWORKC, LWORKS, P + LOGICAL LP, RP +C .. Local Array.. + INTEGER INT_VAL(5) + CHARACTER*20 NAME + DOUBLE PRECISION REAL_VAL(5) + CHARACTER*30 STRINGS(2) +C .. Parameters .. + DOUBLE PRECISION ZERO + INTEGER IONE + PARAMETER (ZERO=0.D0,IONE=1) +C .. External Subroutines .. + EXTERNAL DSWMM, DLPUPD, DSCAL, XERBLA +C .. Intrinsic Functions .. + INTRINSIC DBLE, IDINT + +C .. Executable Statements .. +C +C Check for argument errors +C + NAME = 'DCSMM\0' + IERROR = 0 + CALL FCPSB_ERRACTIONSAVE(ERR_ACT) + + IF (M.LT.0) THEN + IERROR = 10 + INT_VAL(1) = 2 + INT_VAL(2) = M + ELSE IF (K.LT.0) THEN + IERROR = 10 + INT_VAL(1) = 4 + INT_VAL(2) = K + ELSE IF (N.LT.0) THEN + IERROR = 10 + INT_VAL(1) = 3 + INT_VAL(2) = N + ELSE IF (TRANS.NE.'T' .AND. TRANS.NE.'N' .AND. TRANS.NE.'C') THEN + IERROR = 40 + INT_VAL(1) = 1 + STRINGS(1) = TRANS//'\0' + ELSE IF (LDB.LT.K) THEN + IERROR = 50 + INT_VAL(1) = 15 + INT_VAL(2) = 4 + INT_VAL(3) = LDB + INT_VAL(4) = K + ELSE IF (LDC.LT.M) THEN + IERROR = 50 + INT_VAL(1) = 18 + INT_VAL(2) = 2 + INT_VAL(3) = LDC + INT_VAL(4) = M + ENDIF + +C +C Error handling +C + IF(IERROR.NE.0) THEN + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + END IF + +C +C Inizializations +C + LP = PL(1).NE.0 + RP = PR(1).NE.0 + LWORKB = K*N + LWORKC = M*N + LWORKM = 0 + IF (RP) LWORKM = LWORKB + IF (LP) LWORKM = LWORKM + LWORKC + IF (LWORK.LT.LWORKM) THEN + IERROR = 60 + INT_VAL(1) = 20 + INT_VAL(2) = LWORKM + INT_VAL(3) = LWORK + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + LWORKS = LWORK - LWORKM + +C +C Check for M, N, K +C + IF(M.GT.0 .AND. N.GT.0 .AND. K.EQ.0) THEN +C +C Only C <-- BETA C required +C +C CALL DSCAL(M,BETA,C,IONE) + ELSE IF(M.LE.0 .OR. N.LE.0 .OR. K.LE.0) THEN + GOTO 9998 + ENDIF +C +C Switching on PR and PL +C + IF (LP .AND. RP) THEN +C +C Both right and left permutation required +C + P=LWORKB+1 + CALL DLPUPD(K,N,PR,B,LDB,ZERO,WORK,K) + CALL DSWMM(TRANS,M,N,K,ALPHA,FIDA,DESCRA,A,IA1,IA2,INFOA, + & WORK,K,ZERO,WORK(P),M,WORK(P+LWORKC),LWORKS,IERROR) + LWORKS = IDINT(WORK(P+LWORKC)) + IF(IERROR .NE. 0) THEN + IERROR=4011 + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + CALL DLPUPD(M,N,PL,WORK(P),M,BETA,C,LDC) + ELSE IF(.NOT.LP .AND. RP) THEN +C +C Only right permutation required +C + P=LWORKB+1 + CALL DLPUPD(K,N,PR,B,LDB,ZERO,WORK,K) + CALL DSWMM(TRANS,M,N,K,ALPHA,FIDA,DESCRA,A,IA1,IA2,INFOA, + & WORK,K,BETA,C,LDC,WORK(P),LWORKS,IERROR) + LWORKS = IDINT(WORK(P)) + IF(IERROR .NE. 0) THEN + IERROR=4011 + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + ELSE IF(.NOT.RP .AND. LP) THEN +C +C Only left permutation required +C + P=LWORKC+1 + CALL DSWMM(TRANS,M,N,K,ALPHA,FIDA,DESCRA,A,IA1,IA2,INFOA, + & B,LDB,ZERO,WORK,M,WORK(P),LWORKS,IERROR) + LWORKS = IDINT(WORK(P)) + IF(IERROR .NE. 0) THEN + IERROR=4011 + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + + CALL DLPUPD(M,N,PL,WORK,M,BETA,C,LDC) + ELSE IF(.NOT.RP .AND. .NOT.LP) THEN +C +C No permutations required +C + CALL DSWMM(TRANS,M,N,K,ALPHA,FIDA,DESCRA,A,IA1,IA2,INFOA, + & B,LDB,BETA,C,LDC,WORK,LWORKS,IERROR) + LWORKS = IDINT(WORK(1)) + IF(IERROR .NE. 0) THEN + IERROR=4011 + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + ENDIF + 9998 CONTINUE +C +C Return minimum workarea dimension +C + LWORKM = LWORKM + LWORKS + WORK(1) = DBLE(LWORKM) + + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + RETURN + + 9999 CONTINUE + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + + IF ( ERR_ACT .NE. 0 ) THEN + CALL FCPSB_SERROR() + RETURN + ENDIF + + RETURN + END diff --git a/src/serial/f77/dcsnmi.f b/src/serial/f77/dcsnmi.f new file mode 100644 index 00000000..cf042d7f --- /dev/null +++ b/src/serial/f77/dcsnmi.f @@ -0,0 +1,154 @@ +C DOUBLE PRECISION FUNCTION DCSNMI(TRANS,M,N,FIDA,DESCRA,A,IA1,IA2, & +C & INFOA,IERROR) +C Purpose +C ======= +C +C Computing matrix infinity norm +C nrmi <-- ||A||infty or +C nrmi <-- ||At||infty +C +C Parameters +C ========== +C +C TRANS - CHARACTER*1 +C On entry TRANS specifies if the routine operates with matrix A +C or with the transpose of A as follows: +C TRANS = 'N' -> use matrix A +C TRANS = 'T' or 'C' -> use A' (transpose of matrix A) +C Unchanged on exit. +C +C M - INTEGER +C On entry: number of rows of matrix A (A') +C Unchanged on exit. +C +C N - INTEGER +C On entry: number of columns of matrix A. +C Unchanged on exit. +C +C FIDA - CHARACTER*5 +C On entry FIDA defines the format of the input sparse matrix. +C Unchanged on exit. +C +C DESCRA - CHARACTER*1 array of DIMENSION (9) +C On entry DESCRA describes the characteristics of the input +C sparse matrix. +C Unchanged on exit. +C +C A - DOUBLE PRECISION array of DIMENSION (*) +C On entry A specifies the values of the input sparse +C matrix. +C Unchanged on exit. +C +C IA1 - INTEGER array of dimension (*) +C On entry IA1 holds integer information on input sparse +C matrix. Actual information will depend on data format used. +C Unchanged on exit. +C +C IA2 - INTEGER array of dimension (*) +C On entry IA2 holds integer information on input sparse +C matrix. Actual information will depend on data format used. +C Unchanged on exit. +C +C INFOA - INTEGER array of length 10. +C On entry can hold auxiliary information on input matrices +C formats or environment of subsequent calls. +C Might be changed on exit. +C +C IERROR - INTEGER +C On exit IERROR contains the value of error flag as follows: +C IERROR = 0 no error +C IERROR > 0 warning +C IERROR < 0 fatal error +C +C Notes +C ===== +C + DOUBLE PRECISION FUNCTION DCSNMI(TRANS,M,N,FIDA,DESCRA,A,IA1,IA2, + & INFOA,IERROR) + IMPLICIT NONE +C .. Scalar Arguments .. + INTEGER M,N, IERROR + CHARACTER TRANS +C .. Array Arguments .. + INTEGER IA1(*),IA2(*),INFOA(*) + CHARACTER DESCRA*11, FIDA*5 + DOUBLE PRECISION A(*) +C .. Local Array.. + INTEGER INT_VAL(5) + DOUBLE PRECISION REAL_VAL(5) + CHARACTER*30 NAME, STRINGS(2) +C .. External Subroutines .. + DOUBLE PRECISION DCRNRMI, DJDNRMI, DCOONRMI + EXTERNAL DCRNRMI, DJDNRMI, DCOONRMI +C .. Intrinsic Functions .. + INTRINSIC DBLE, IDINT + + CHARACTER*20 NAME +C .. Executable Statements .. +C +C Check for argument errors +C + IERROR = 0 + NAME = 'DCSNMI\0' + CALL FCPSB_ERRACTIONSAVE(ERR_ACT) + + IF (M.LT.0) THEN + IERROR = 10 + INT_VAL(1) = 2 + INT_VAL(2) = M + ELSE IF (N.LT.0) THEN + IERROR = 10 + INT_VAL(1) = 3 + INT_VAL(2) = N + ELSE IF (TRANS.NE.'T' .AND. TRANS.NE.'N' .AND. TRANS.NE.'C') THEN + IERROR = 40 + INT_VAL(1) = 1 + STRINGS(1) = TRANS//'\0' + ENDIF + +C +C Error handling +C + IF(IERROR.NE.0) THEN + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + +C +C Check for M, N, K +C + IF(M.LE.0 .OR. N.LE.0) THEN + DCSNMI = 0.D0 + GOTO 9999 + ENDIF + +C ... Compute infinity norm for matrix A ... + IF (FIDA(1:3).EQ.'CSR') THEN + DCSNMI = DCRNRMI(TRANS,M,N,DESCRA,A,IA1,IA2, + + INFOA,IERROR) + ELSE IF (FIDA(1:3).EQ.'JAD') THEN + DCSNMI = DJDNRMI(TRANS,M,N,DESCRA,A,IA1,IA2, + + INFOA,IERROR) + ELSE IF (FIDA(1:3).EQ.'COO') THEN + DCSNMI = DCOONRMI(TRANS,M,N,DESCRA,A,IA1,IA2, + + INFOA,IERROR) + ELSE +C +C This data structure not yet considered +C + IERROR = 3010 + ENDIF + + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + RETURN + + 9999 CONTINUE + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + + IF ( ERR_ACT .NE. 0 ) THEN + CALL FCPSB_SERROR() + RETURN + ENDIF + + RETURN + END diff --git a/src/serial/f77/dcsrp.f b/src/serial/f77/dcsrp.f new file mode 100644 index 00000000..f39aafc1 --- /dev/null +++ b/src/serial/f77/dcsrp.f @@ -0,0 +1,168 @@ +C SUBROUTINE DCSRP(TRANS,M,N,FIDA,DESCRA,IA1,IA2,INFOA, +C P,WORK,LWORK,IERROR) +C +C Purpose +C ======= +C +C Performing column permutation of a sparse matrix. +C +C Parameters +C ========== +C +C TRANS - CHARACTER*1 +C On entry TRANS specifies whether the routine will use +C matrix P or the transpose of P for the permutation as follows: +C TRANS = 'N' -> permute with matrix P +C TRANS = 'T' or 'C' -> permute the transpose of P +C Unchanged on exit. +C +C M - INTEGER +C On entry: number of rows of matrix A. +C Unchanged on exit. +C +C N - INTEGER +C On entry: number of columns of matrix A. +C Unchanged on exit. +C +C DESCRA - CHARACTER*5 array of DIMENSION (10) +C On entry DESCRA defines the format of the input sparse matrix. +C Unchanged on exit. +C +C IA1 - INTEGER array of dimension (*) +C On entry IA1 holds integer information on input sparse +C matrix. Actual information will depend on data format used. +C On exit contain integer information on permuted matrix. +C +C IA2 - INTEGER array of dimension (*) +C On entry IA2 holds integer information on input sparse +C matrix. Actual information will depend on data format used. +C On exit contain integer information on permuted matrix. +C +C INFOA - INTEGER array of dimension (10) +C On entry can hold auxiliary information on input matrices +C formats or environment of subsequent calls. +C Might be changed on exit. +C +C P - INTEGER array of dimension (M) +C On entry P specifies the column permutation of matrix A +C (P(1) == 0 if no permutation). +C Unchanged on exit. +C +C WORK - DOUBLE PRECISION array of dimension (LWORK) +C On entry: work area. +C On exit INT(WORK(1)) contains the minimum value +C for LWORK satisfying DCSRP memory requirements. +C +C LWORK - INTEGER +C On entry LWORK specifies the dimension of WORK +C Unchanged on exit. +C +C IERROR - INTEGER +C On exit IERROR contains the value of error flag as follows: +C IERROR = 0 no error +C IERROR > 0 warning +C IERROR < 0 fatal error +C WORK - DOUBLE PRECISION array of dimension (LWORK) +C Work area. +C +C LWORK - INTEGER +C On entry LWORK specifies the dimension of WORK. +C LWORK must be greater than zero. +C On exit LWORK is the maximum between the initial value and +C the minimum value satisfying DCSRP memory requirements. +C +C IERROR - INTEGER +C On exit IERROR contains the value of error flag as follows: +C IERROR = 0 no error +C IERROR = 4 error on dimension of vector WORK +C IERROR = 32 unknown flag TRANS +C IERROR = 64 LWORK <= 0 +C IERROR = 128 this data structure not yet considered +C +C Notes +C ===== +C It is not possible to call this subroutine with LWORK=0 to get +C the minimal value for LWORK. This functionality needs a better +C connection with DxxxMM +C +C + SUBROUTINE DCSRP(TRANS,M,N,FIDA,DESCRA,IA1,IA2,INFOA, + + P,WORK,LWORK,IERROR) + IMPLICIT NONE +C .. Scalar Arguments .. + INTEGER LWORK, M, N, IERROR + CHARACTER TRANS +C .. Array Arguments .. + DOUBLE PRECISION WORK(LWORK) + INTEGER IA1(*), IA2(*), INFOA(*), P(*), INT_VAL(5) + CHARACTER DESCRA*11, FIDA*5 +C .. External Subroutines .. + EXTERNAL DCSRRP, XERBLA + logical debug + parameter (debug=.false.) + + CHARACTER*20 NAME +C +C .. Executable Statements .. +C +C +C Check on M, N, TRANS +C + NAME = 'DCSRP\0' + IERROR = 0 + CALL FCPSB_ERRACTIONSAVE(ERR_ACT) + + IF (M.LE.0) THEN + IERROR = 1 + ELSE IF(N.LE.0) THEN + IERROR = 3 + ELSE IF(TRANS.NE.'N' .AND. TRANS.NE.'T' .AND. TRANS.NE.'C') THEN + IERROR = -1 + ENDIF +C +C Error handling +C + IF(IERROR.LT.0) THEN + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF +C +C Check for M, N, P +C + IF(M.LE.0 .OR. N.LE.0 .OR. P(1).EQ.0) THEN + GOTO 9999 + ENDIF +C +C Switching on FIDA +C +c$$$ write(0,*) 'DCSRP FORMAT: ',fida + IF (FIDA(1:3).EQ.'CSR') THEN +C +C Permuting CSR structure +C + CALL DCSRRP(TRANS,M,N,DESCRA,IA1,IA2,P,WORK,LWORK) + ELSE IF (FIDA(1:3).EQ.'JAD') THEN + if (debug) write(0,*) 'Calling djadrp',m,p(1),lwork + CALL DJADRP(TRANS,M,N,DESCRA,IA1,IA2,P,WORK,LWORK) + ELSE +C +C This data structure not yet considered +C + IERROR = 4 + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + RETURN + + 9999 CONTINUE + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + + IF ( ERR_ACT .NE. 0 ) THEN + CALL FCPSB_SERROR() + RETURN + ENDIF + + RETURN + END diff --git a/src/serial/f77/dcssm.f b/src/serial/f77/dcssm.f new file mode 100644 index 00000000..132afafe --- /dev/null +++ b/src/serial/f77/dcssm.f @@ -0,0 +1,349 @@ +C SUBROUTINE DCSSM(TRANS,M,N,ALPHA,UNITD,D,PL,FIDT,DESCRT,T,IT1,IT2, +C INFOT,PR,B,LDB,BETA,C,LDC,WORK,LWORK,IERROR) +C +C Purpose +C ======= +C +C Solving triangular systems of equations with multiple right-hand sides +C C <-- ALPHA PL D T-1 PR B + BETA C or +C C <-- ALPHA PL D T-t PR B + BETA C or +C C <-- ALPHA PL T-1 D PR B + BETA C or +C C <-- ALPHA PL T-t D PR B + BETA C +C +C Parameters +C ========== +C +C TRANS - CHARACTER*1 +C On entry TRANS specifies whether the routine operates with +C matrix T or with the transpose of T as follows: +C TRANS = 'N' -> use matrix T +C TRANS = 'T' or 'C' -> use T' (transpose of matrix T) +C Unchanged on exit. +C +C M - INTEGER +C On entry: number of rows and columns of matrix Ty +C and number of rows of matrices B and C. +C Unchanged on exit. +C +C N - INTEGER +C On entry: number of columns of matrices B and C +C (number of right-hand sides). +C Unchanged on exit. +C +C ALPHA - DOUBLE PRECISION +C On entry: multiplicative constant. +C Unchanged on exit. +C +C UNITD - CHARACTER*1 +C On entry UNITD specifies whether the diagonal matrix is unit +C or whether row or column scaling has to be performed, as follows: +C UNITD = 'U' -> unit matrix (no scaling) +C UNITD = 'L' -> scale on the left (row scaling) +C UNITD = 'R' -> scale on the right (column scaling) +C UNITD = 'B' -> scale on the right and on the left +C with D^1/2 +C Unchanged on exit. +C +C D - DOUBLE PRECISION array of dimension (M) +C On entry D specifies the main diagonal of the matrix used +C for scaling. +C Unchanged on exit. +C +C PL - INTEGER array of dimension (M) +C On entry PL specifies the row permutation of matrix T +C (PL(1) == 0 if no permutation). +C Unchanged on exit. +C +C FIDT - CHARACTER*5 +C On entry FIDT defines the format of the input sparse matrix. +C Unchanged on exit. +C +C DESCRT - CHARACTER*1 array of DIMENSION (9) +C On entry DESCRT describes the characteristics of the input +C sparse matrix. +C Unchanged on exit. +C +C +C T - DOUBLE PRECISION array of DIMENSION (*) +C On entry T specifies the values of the input sparse +C matrix. +C Unchanged on exit. +C +C IT1 - INTEGER array of dimension (*) +C On entry IT1 holds integer information on input sparse +C matrix. Actual information will depend on data format used. +C Unchanged on exit. +C +C IT2 - INTEGER array of dimension (*) +C On entry IT2 holds integer information on input sparse +C matrix. Actual information will depend on data format used. +C Unchanged on exit. +C +C INFOT - INTEGER array of dimension (10) +C On entry can hold auxiliary information on input matrices +C formats or environment of subsequent calls. +C Might be changed on exit. +C +C PR - INTEGER array of dimension (M) +C On entry PR specifies the column permutation of matrix T +C (PR(1) == 0 if no permutation). +C Unchanged on exit. +C +C B - DOUBLE PRECISION array of dimension (LDB,*) +C On entry: matrix of right-hand sides +C Unchanged on exit. +C +C LDB - INTEGER +C On entry: leading dimension of B. +C Unchanged on exit. +C +C BETA - DOUBLE PRECISION +C On entry: multiplicative constant. +C Unchanged on exit. +C +C C - DOUBLE PRECISION array of dimension (LDC,*) +C On exit: solutions of triangular systems +C +C LDC - INTEGER +C On entry: leading dimension of C. +C Unchanged on exit. +C +C WORK - DOUBLE PRECISION array of dimension (LWORK) +C On entry: work area. +C On exit INT(WORK(1)) contains the minimum value +C for LWORK satisfying DCSSM memory requirements. +C +C LWORK - INTEGER +C On entry LWORK specifies the dimension of WORK +C LWORK should be set as follows: +C LWORK = (LWORK for DxxxSM) + Pr*M*N + Pl*M*N +C where Pr íPlù = 1 if right íleftù permutation has to +C be performed, 0 otherwise. +C Unchanged on exit. +C +C IERROR - INTEGER +C On exit IERROR contains the value of error flag as follows: +C IERROR = 0 no error +C IERROR > 0 warning +C IERROR < 0 fatal error +C +C Local Variables +C =============== +C +C LWORKM - INTEGER +C Minimum work area dimension for DCSSM +C +C LWORKB - INTEGER +C Work area dimension for matrices B, C in subroutines DLPUPD +C +C LWORKS - INTEGER +C Work area dimension for subroutine DSWSM +C +C P - INTEGER +C Pointer to work area +C +C LP - LOGICAL +C LP is true if left permutation is required +C +C RP - LOGICAL +C RP is true if right permutation is required +C +C Notes +C ===== +C Some tests have shown that it is more efficient to divide the +C sparse matrix-dense matrix multiplication step and the dense +C matrix permutation and update (C <- xxx + BETA C) step. +C So, the sequence of operations is: +C Right permutation DLPUPD +C Matrix-Matrix product DSWSM +C Left permutation and update DLPUPD +C In order to avoid useless memory transfer, the above scheme is +C simplified according to whether right and left permutation +C have to be performed. +C +C + SUBROUTINE DCSSM(TRANS,M,N,ALPHA,UNITD,D, + + PL,FIDT,DESCRT,T,IT1,IT2,INFOT,PR, + + B,LDB,BETA,C,LDC,WORK,LWORK,IERROR) +C .. Scalar Arguments .. + IMPLICIT NONE + DOUBLE PRECISION ALPHA, BETA + INTEGER N, LDB, LDC, M, LWORK, IERROR + CHARACTER UNITD, TRANS +C .. Array Arguments .. + DOUBLE PRECISION T(*), B(LDB,*), C(LDC,*), D(*), WORK(*) + INTEGER IT1(*), IT2(*), INFOT(*), PL(*), PR(*) + CHARACTER DESCRT*11, FIDT*5 +C .. Local Scalars .. + INTEGER LWORKM, LWORKB, LWORKS, P + DOUBLE PRECISION ZERO + LOGICAL LP, RP +C .. Local Array.. + INTEGER INT_VAL(5) + DOUBLE PRECISION REAL_VAL(5) + CHARACTER*30 STRINGS(2) + CHARACTER NAME*30 +C .. Parameters .. + PARAMETER (ZERO=0.D0) + LOGICAL DEBUG + PARAMETER (DEBUG=.FALSE.) +C .. External Subroutines .. + EXTERNAL DSWSM, DLPUPD, XERBLA +C .. Intrinsic Functions .. + INTRINSIC DBLE, IDINT +C .. Executable Statements .. +C +C Check for argument errors +C + IERROR = 0 + NAME = 'DCSSM\0' + CALL FCPSB_ERRACTIONSAVE(ERR_ACT) + + IF (M.LT.0) THEN + IERROR = 10 + INT_VAL(1) = 2 + INT_VAL(2) = M + ELSE IF (N.LT.0) THEN + IERROR = 10 + INT_VAL(1) = 3 + INT_VAL(2) = N + ELSE IF (TRANS.NE.'T' .AND. TRANS.NE.'N' .AND. TRANS.NE.'C') THEN + IERROR = 40 + INT_VAL(1) = 1 + STRINGS(1) = TRANS//'\0' + ELSE IF (UNITD.NE.'U' .AND. UNITD.NE.'L' .AND. UNITD.NE.'R' & + & .AND. UNITD.NE.'B') THEN + IERROR = 40 + INT_VAL(1) = 5 + STRINGS(1) = UNITD//'\0' + ELSE IF (LDB.LT.M) THEN + IERROR = 50 + INT_VAL(1) = 16 + INT_VAL(2) = 2 + INT_VAL(3) = LDB + INT_VAL(4) = M + ELSE IF (LDC.LT.M) THEN + IERROR = 50 + INT_VAL(1) = 19 + INT_VAL(2) = 2 + INT_VAL(3) = LDC + INT_VAL(4) = M + ENDIF + +C +C Error handling +C + IF(IERROR.NE.0) THEN + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + +C +C Inizializations +C + LP = PL(1).NE.0 + RP = PR(1).NE.0 + LWORKB = M*N + LWORKM = 0 + IF (RP) LWORKM = LWORKB + IF (LP) LWORKM = LWORKM + LWORKB + P = LWORKB+1 + IF (LWORK.LT.LWORKM) THEN + IERROR = 60 + INT_VAL(1) = 21 + INT_VAL(2) = LWORKM + INT_VAL(3) = LWORK + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + LWORKS = LWORK - LWORKM + +C Check for M, N +C + IF (M.LE.0 .OR. N.LE.0) THEN + GOTO 9999 + ENDIF +C +C Switching on xP +C + IF (LP .AND. RP) THEN +C +C Both right and left permutations required +C + CALL DLPUPD(M,N,PR,B,LDB,BETA,WORK,M) + CALL DSWSM(TRANS,M,N,ALPHA,UNITD,D,FIDT,DESCRT,T,IT1,IT2, + & INFOT,WORK,M,ZERO,WORK(P),M,WORK(P+LWORKB),LWORK,IERROR) + LWORKS = IDINT(WORK(P+LWORKB)) + IF(IERROR .NE. 0) THEN + IF (IERROR.EQ.3010) THEN + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + ENDIF + CALL DLPUPD(M,N,PL,WORK(P),M,BETA,C,LDC) + ELSE IF(.NOT.LP .AND. RP) THEN +C +C Only right permutation required +C +c$$$ write(0,*) 'DCSSM: RP' + CALL DLPUPD(M,N,PR,B,LDB,BETA,WORK,M) + CALL DSWSM(TRANS,M,N,ALPHA,UNITD,D,FIDT,DESCRT,T,IT1,IT2, + & INFOT,WORK,M,ZERO,C,LDC,WORK(P),LWORK,IERROR) + LWORKS = IDINT(WORK(P)) + IF(IERROR .NE. 0) THEN + NAME = 'DCSSM\0' + IF (IERROR.EQ.3010) THEN + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + ENDIF + ELSE IF(.NOT.RP .AND. LP) THEN +C +C Only left permutation required +C +c$$$ write(0,*) 'DCSSM: LP' + CALL DSWSM(TRANS,M,N,ALPHA,UNITD,D,FIDT,DESCRT,T,IT1,IT2, + & INFOT,B,LDB,BETA,WORK,M,WORK(P),LWORK,IERROR) + LWORKS = IDINT(WORK(P)) + IF(IERROR .NE. 0) THEN + IF (IERROR.EQ.3010) THEN + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + ENDIF + CALL DLPUPD(M,N,PL,WORK,M,BETA,C,LDC) + ELSE IF(.NOT.RP .AND. .NOT.LP) THEN +C +C Only triangular systems solver required +C + if (debug) write(*,*) 'DCSSM ',m,n + if (debug) write(*,*) 'DCSSM ',m,n,ierror + CALL DSWSM(TRANS,M,N,ALPHA,UNITD,D,FIDT,DESCRT,T,IT1,IT2, + & INFOT,B,LDB,BETA,C,LDC,WORK,LWORK,IERROR) + LWORKS = IDINT(WORK(1)) + IF(IERROR .NE. 0) THEN + IF (IERROR.EQ.3010) THEN + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + ENDIF + ENDIF + +C +C Return minimum workarea dimension +C + LWORKM = LWORKM + LWORKS + WORK(1) = DBLE(LWORKM) + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + RETURN + + 9999 CONTINUE + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + + IF ( ERR_ACT .NE. 0 ) THEN + CALL FCPSB_SERROR() + RETURN + ENDIF + + RETURN + END diff --git a/src/serial/f77/dcsupd.f b/src/serial/f77/dcsupd.f new file mode 100644 index 00000000..e9fe3443 --- /dev/null +++ b/src/serial/f77/dcsupd.f @@ -0,0 +1,117 @@ + SUBROUTINE DCSUPD(M, N, FIDA, DESCRA, A, IA1, + + IA2, INFOA, IA, JA, FIDH, DESCRH, H, IH1, IH2, + + INFOH, IH, JH, FLAG, GLOB_TO_LOC, + + IWORK, LIWORK, IERROR) + IMPLICIT NONE +C .. Scalar Arguments .. + INTEGER IA, JA, IH, JH, M, N, + + IERROR, FLAG, LIWORK +C .. Array Arguments .. + INTEGER IA1(*),IA2(*),IH1(*),IH2(*), + + INFOA(*),INFOH(*),IWORK(*), + + GLOB_TO_LOC(*) + CHARACTER DESCRA*11,DESCRH*11, FIDA*5, FIDH*5 + DOUBLE PRECISION A(*),H(*) +C .. Local Array.. + integer int_val(5) + double precision real_val(5) + character*20 name, strings(2) + +C .. Executable Statements .. +C +C +C Check parameters +C + IERROR = 0 + NAME = 'DCSUPD\0' + CALL FCPSB_ERRACTIONSAVE(ERR_ACT) + + IF (M.LT.0) THEN + IERROR = 10 + INT_VAL(1) = 1 + INT_VAL(2) = M + ELSE IF (N.LT.0) THEN + IERROR = 10 + INT_VAL(1) = 2 + INT_VAL(2) = N + ENDIF +C +C Error handling +C + IF(IERROR.NE.0) THEN + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF +C +C Check for M, N +C + IF(M.LE.0 .OR. N.LE.0) THEN + GOTO 9999 + ENDIF + +C +C Switching on FIDA +C + IF (FIDA(1:3).EQ.'CSR') THEN + IF (FIDH(1:3).EQ.'CSR') THEN +C +C Submatrix H in CSR format into A matrix in CSR format +C + CALL DCRCRUPD(M, N, DESCRA, A, IA1, + + IA2, INFOA, IA, JA, DESCRH, H, IH1, IH2, + + INFOH, IH, JH, FLAG, GLOB_TO_LOC, + + IWORK, LIWORK, IERROR) + ELSE IF (FIDH(1:3).EQ.'COO') THEN +C +C Submatrix H in COO format into A matrix in CSR format +C + CALL DCOCRUPD(M, N, DESCRA, A, IA1, + + IA2, INFOA, IA, JA, DESCRH, H, IH1, IH2, + + INFOH, IH, JH, FLAG, GLOB_TO_LOC, + + IWORK, LIWORK, IERROR) + ELSE + write(*,*) 'FIDA', FIDA(1:3), 'FIDH', FIDH(1:3) + IERROR = 3010 + STRINGS(1) = FIDH(1:3) + NAME = 'DCSUPD\0' + ENDIF + ELSE IF (FIDA(1:3).EQ.'COO') THEN + IF (FIDH(1:3).EQ.'COO')THEN + CALL DCOCRUPD(M, N, DESCRA, A, IA1, + + IA2, INFOA, IA, JA, DESCRH, H, IH1, IH2, + + INFOH, IH, JH, FLAG, GLOB_TO_LOC, + + IWORK, LIWORK, IERROR) + ENDIF + ELSE IF (FIDA(1:3).EQ.'JAD') THEN + IF (FIDH(1:3).EQ.'COO')THEN + CALL DCOJDUPD(M, N, DESCRA, A, IA1, + + IA2, INFOA, IA, JA, DESCRH, H, IH1, IH2, + + INFOH, IH, JH, FLAG, GLOB_TO_LOC, + + IWORK, LIWORK, IERROR) + ENDIF +C IERROR = 3010 +C STRINGS(1) = FIDH(1:3) +C NAME = 'DCSUPD\0' + ENDIF + + IF(IERROR.NE.0) THEN + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + END IF + + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + RETURN + + 9999 CONTINUE + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + + IF ( ERR_ACT .NE. 0 ) THEN + CALL FCPSB_SERROR() + RETURN + ENDIF + + RETURN + END + + + diff --git a/src/serial/f77/dgelp.f b/src/serial/f77/dgelp.f new file mode 100644 index 00000000..96de3fb4 --- /dev/null +++ b/src/serial/f77/dgelp.f @@ -0,0 +1,168 @@ +C SUBROUTINE DGELP(TRANS,M,N,P,B,LDB,WORK,LWORK,IERROR) +C +C Purpose +C ======= +C Computing +C B <-- op(P) B +C where op(P) is permutation matrix P or its transpose. +C Notice: when B is a diagonal matrix and it is stored as +C a vector whose entry i is B(i,i), this routine computes +C B <-- P B P(-1) +C where P(-1) is the inverse of P. +C +C Parameters +C ========== +C +C TRANS - CHARACTER*1 +C On entry TRANS specifies whetherif the routine operates with +C matrix P or with the transpose of P as follows: +C TRANS = 'N' -> use matrix P +C TRANS = 'T' OR 'C' -> use the transpose of matrix P +C Unchanged on exit. +C +C M - INTEGER +C On entry: number of rows of matrix B. +C Unchanged on exit. +C +C N - INTEGER +C On entry: number of columns of matrix B. +C Unchanged on exit. +C +C P - INTEGER array of dimension (M) +C On entry P specifies the row permutation of matrix B +C (P(1) == 0 if no permutation). +C Unchanged on exit. +C +C B - DOUBLE PRECISION array of dimension (LDB,*) +C On entry: dense matrix. +C On exit: permuted matrix. +C +C LDB - INTEGER +C On entry: leading dimension of B. +C Unchanged on exit. +C +C WORK - DOUBLE PRECISION array of dimension (LWORK) +C On entry: work area. +C On exit INT(WORK(1)) contains the minimum value +C for LWORK satisfying DGELP memory requirements. +C +C LWORK - INTEGER +C On entry LWORK specifies the dimension of WORK +C LWORK should be set as follows: +C LWORK = M +C Unchanged on exit. +C +C IERROR - INTEGER +C On exit IERROR contains the value of error flag as follows: +C IERROR = 0 no error +C IERROR > 0 warning +C IERROR < 0 fatal error +C +C + SUBROUTINE DGELP(TRANS,M,N,P,B,LDB,WORK,LWORK,IERROR) + IMPLICIT NONE +C .. Scalar Arguments .. + INTEGER LDB, M, N, LWORK, IERROR + CHARACTER TRANS +C .. Array Arguments .. + DOUBLE PRECISION B(LDB,*), WORK(*) + INTEGER P(*) +C .. Local Scalars .. + INTEGER I, J + logical istran,isnotran +C .. Intrinsic Functions .. + INTRINSIC DBLE + logical lsame + external lsame + + character*20 name +c +C .. Executable Statements .. +C +C +C Check on M, N, LDB, LWORK, TRANS +C + NAME = 'DGELP\0' + IERROR = 0 + CALL FCPSB_ERRACTIONSAVE(ERR_ACT) + + isnotran = lsame(trans,'N') + istran = lsame(trans,'T') .or. lsame(trans,'C') + IF (M.LT.0) THEN + IERROR = 7 + ELSE IF(N.LT.0) THEN + IERROR = 3 + ELSE IF(M.GT.LDB) THEN + IERROR = -6 + ELSE IF (LWORK.LT.M) THEN + IF (LWORK.EQ.0) THEN +C +C Return minimum LWORK +C + IERROR = 8 + WORK(1) = DBLE(M) + GOTO 9998 + ELSE IF(LWORK.NE.0) THEN + IERROR = -8 + ENDIF + ELSE IF (.not.istran.and..not.isnotran) THEN + IERROR = -1 + ENDIF +C +C Error handling +C + IF(IERROR.LT.0) THEN + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF +C +C Check for M, N, P +C + IF(M.LE.0 .OR. N.LE.0 .OR. P(1).EQ.0) THEN + IERROR=5 + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF +C +C Switch on TRANS +C + IF (isnotran) THEN +C +C Permuting with P +C + DO 60 J = 1, N + DO 20 I = 1, M + WORK(I) = B(P(I),J) + 20 CONTINUE + DO 40 I = 1, M + B(I,J) = WORK(I) + 40 CONTINUE + 60 CONTINUE + ELSE IF (istran) THEN +C +C Permuting with the transpose of P. +C + DO 160 J = 1, N + DO 120 I = 1, M + WORK(P(I)) = B(I,J) + 120 CONTINUE + DO 140 I = 1, M + B(I,J) = WORK(I) + 140 CONTINUE + 160 CONTINUE + END IF + + 9998 CONTINUE + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + RETURN + + 9999 CONTINUE + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + + IF ( ERR_ACT .NE. 0 ) THEN + CALL FCPSB_SERROR() + RETURN + ENDIF + + RETURN + END diff --git a/src/serial/f77/dlpupd.f b/src/serial/f77/dlpupd.f new file mode 100644 index 00000000..212de883 --- /dev/null +++ b/src/serial/f77/dlpupd.f @@ -0,0 +1,84 @@ +C Subroutine DLPUPD(M,N,PERM,B,LDB,BETA,C,LDC,IERROR) +C Purpose +C ======= +C +C Computing C <-- PERM B + BETA C +C where PERM is a permutation matrix. +C +C Parameters +C ========== +C +C M - INTEGER +C On entry M specifies the number of rows of matrices +C B and C. +C Unchanged on exit. +C +C N - INTEGER +C On entry N specifies the number of columns of matrices +C B and C. +C Unchanged on exit. +C +C PERM - INTEGER array of dimension (N) +C On entry PERM specifies the values of a permutation matrix. +C Unchanged on exit. +C +C B - DOUBLE PRECISION matrix of dimension (LDB,*) +C On entry: dense matrix. +C Unchanged on exit. +C +C LDB - INTEGER +C On entry LDB holds the value of the leading dimension of B +C Unchanged on exit. +C +C BETA - DOUBLE PRECISION +C On entry: multiplicative constant. +C Unchanged on exit. +C +C C - DOUBLE PRECISION matrix of dimension (LDC,*) +C On entry: dense matrix. +C On exit is updated as shown above. +C +C LDC - INTEGER +C On entry LDC holds the value of the leading dimension of C +C Unchanged on exit. +C +C Note +C ==== +C All checks on argument are performed in the calling routines. +C +C + SUBROUTINE DLPUPD(M,N,PERM,B,LDB,BETA,C,LDC) +C .. Scalar Arguments .. + INTEGER M, N, LDB, LDC + DOUBLE PRECISION BETA +C .. Array Arguments .. + INTEGER PERM(*) + DOUBLE PRECISION B(LDB,*), C(LDC,*) +C .. Local Scalars .. + INTEGER I,J +C +C .. Executable Statements .. +C +C Switching on BETA +C + IF (BETA.NE.0.D0) THEN +C +C Performing left permutation and update +C + DO 40 J = 1, N + DO 30 I = 1, M + C(I,J) = B(PERM(I),J) + BETA*C(I,J) + 30 CONTINUE + 40 CONTINUE + ELSE IF(BETA.EQ.0.D0) THEN +C +C Performing right or left permutation +C + DO 160 J = 1, N + DO 150 I = 1, M + C(I,J) = B(PERM(I),J) + 150 CONTINUE + 160 CONTINUE + ENDIF + RETURN + END diff --git a/src/serial/f77/dswmm.f b/src/serial/f77/dswmm.f new file mode 100644 index 00000000..e7ad1a5f --- /dev/null +++ b/src/serial/f77/dswmm.f @@ -0,0 +1,179 @@ +C SUBROUTINE DSWMM(TRANS,M,N,K,ALPHA,FIDA,DESCRA,A,IA1,IA2, +C INFOA,B,LDB,BETA,C,LDC,WORK,LWORK,IERROR) +C Purpose +C ======= +C +C Computing C <-- ALPHA A B + BETA C or +C C <-- ALPHA At B + BETA C +C Called by DCSMM +C Actual computing performed by sparse Toolkit kernels. +C This routine selects the proper kernel for each +C data structure. +C +C Parameters +C ========== +C +C TRANS - CHARACTER*1 +C On entry TRANS specifies if the routine operates with matrix A +C or with the transpose of A as follows: +C TRANS = 'N' -> use matrix A +C TRANS = 'T' or 'C' -> use A' (transpose of matrix A) +C Unchanged on exit. +C +C M - INTEGER +C On entry: number of rows of matrix A (A') and +C number of rows of matrix C +C Unchanged on exit. +C +C N - INTEGER +C On entry: number of columns of matrix B +C and number of columns of matrix C. +C Unchanged on exit. +C +C K - INTEGER +C On entry: number of columns of matrix A (A') and +C number of rows of matrix B +C Unchanged on exit. +C +C ALPHA - DOUBLE PRECISION +C On entry: multiplicative constant. +C Unchanged on exit. +C +C FIDA - CHARACTER*5 +C On entry FIDA defines the format of the input sparse matrix. +C Unchanged on exit. +C +C DESCRA - CHARACTER*1 array of DIMENSION (9) +C On entry DESCRA describes the characteristics of the input +C sparse matrix. +C Unchanged on exit. +C +C A - DOUBLE PRECISION array of DIMENSION (*) +C On entry A specifies the values of the input sparse +C matrix. +C Unchanged on exit. +C +C IA1 - INTEGER array of dimension (*) +C On entry IA1 holds integer information on input sparse +C matrix. Actual information will depend on data format used. +C Unchanged on exit. +C +C IA2 - INTEGER array of dimension (*) +C On entry IA2 holds integer information on input sparse +C matrix. Actual information will depend on data format used. +C Unchanged on exit. +C +C INFOA - INTEGER array of length 10. +C On entry can hold auxiliary information on input matrices +C formats or environment of subsequent calls. +C Might be changed on exit. +C +C B - DOUBLE PRECISION matrix of dimension (LDB,*) +C On entry: dense matrix. +C Unchanged on exit. +C +C LDB - INTEGER +C On entry: leading dimension of B +C Unchanged on exit. +C +C BETA - DOUBLE PRECISION +C On entry: multiplicative constant. +C Unchanged on exit. +C +C C - DOUBLE PRECISION matrix of dimension (LDC,*) +C On entry: dense matrix. +C On exit is updated with the matrix-matrix product. +C +C LDC - INTEGER +C On entry: leading dimension of C +C Unchanged on exit. +C +C WORK - DOUBLE PRECISION array of dimension (LWORK) +C On entry: work area. +C On exit INT(WORK(1)) contains the minimum value +C for LWORK satisfying DSWMM memory requirements. +C +C LWORK - INTEGER +C On entry LWORK specifies the dimension of WORK +C Unchanged on exit. +C +C IERROR - INTEGER +C On exit IERROR contains the value of error flag as follows: +C IERROR = 0 no error +C IERROR > 0 warning +C IERROR < 0 fatal error +C +C Note +C ==== +C All checks on argument are performed in the calling routine. +C +C + SUBROUTINE DSWMM(TRANS,M,N,K,ALPHA,FIDA,DESCRA,A,IA1,IA2, & + & INFOA,B,LDB,BETA,C,LDC,WORK,LWORK,IERROR) +C .. Scalar Arguments .. + INTEGER M,N,K,LDB,LDC,LWORK,IERROR + CHARACTER TRANS + DOUBLE PRECISION ALPHA,BETA +C .. Array Arguments .. + INTEGER IA1(*),IA2(*),INFOA(*), INT_VAL(5) + CHARACTER DESCRA*11, FIDA*5 + DOUBLE PRECISION A(*),B(LDB,*),C(LDC,*),WORK(*) +C .. External Subroutines .. + EXTERNAL DCSRMM + + CHARACTER*20 NAME + +C .. Executable Statements .. +C + + NAME = 'DSWMM\0' + IERROR = 0 + CALL FCPSB_ERRACTIONSAVE(ERR_ACT) + +C Switching on FIDA: proper sparse BLAS routine is selected +C according to data structure +C + IF (FIDA(1:3).EQ.'CSR') THEN +C +C A, IA1, IA2 ---> AR, JA, IA +C VAL, INDX, PNTR +C INFOA(*) not used + + CALL DCSRMM(TRANS,M,N,K,ALPHA,DESCRA,A,IA1, + + IA2,B,LDB,BETA,C,LDC,WORK,LWORK,IERROR) + + ELSE IF (FIDA(1:3).EQ.'JAD') THEN +C +C INFOA(*) not used +C + CALL DJADMM(TRANS,M,N,K,ALPHA,DESCRA,A,IA1, + + IA2,B,LDB,BETA,C,LDC,WORK,IERROR) + + ELSE IF (FIDA(1:3).EQ.'COO') THEN +C +C + CALL DCOOMM(TRANS,M,N,K,ALPHA,DESCRA,A,IA1, + + IA2,INFOA,B,LDB,BETA,C,LDC,WORK) + ELSE +C +C This data structure not yet considered +C + IERROR = 3010 + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + + END IF + + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + RETURN + + 9999 CONTINUE + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + + IF ( ERR_ACT .NE. 0 ) THEN + CALL FCPSB_SERROR() + RETURN + ENDIF + + RETURN + END diff --git a/src/serial/f77/dswprt.f b/src/serial/f77/dswprt.f new file mode 100644 index 00000000..e88279d5 --- /dev/null +++ b/src/serial/f77/dswprt.f @@ -0,0 +1,181 @@ +C SUBROUTINE DSWMM(TRANS,M,N,K,ALPHA,FIDA,DESCRA,A,IA1,IA2, +C INFOA,B,LDB,BETA,C,LDC,WORK,LWORK,IERROR) +C Purpose +C ======= +C +C Computing C <-- ALPHA A B + BETA C or +C C <-- ALPHA At B + BETA C +C Called by DCSMM +C Actual computing performed by sparse Toolkit kernels. +C This routine selects the proper kernel for each +C data structure. +C +C Parameters +C ========== +C +C TRANS - CHARACTER*1 +C On entry TRANS specifies if the routine operates with matrix A +C or with the transpose of A as follows: +C TRANS = 'N' -> use matrix A +C TRANS = 'T' or 'C' -> use A' (transpose of matrix A) +C Unchanged on exit. +C +C M - INTEGER +C On entry: number of rows of matrix A (A') and +C number of rows of matrix C +C Unchanged on exit. +C +C N - INTEGER +C On entry: number of columns of matrix B +C and number of columns of matrix C. +C Unchanged on exit. +C +C K - INTEGER +C On entry: number of columns of matrix A (A') and +C number of rows of matrix B +C Unchanged on exit. +C +C ALPHA - DOUBLE PRECISION +C On entry: multiplicative constant. +C Unchanged on exit. +C +C FIDA - CHARACTER*5 +C On entry FIDA defines the format of the input sparse matrix. +C Unchanged on exit. +C +C DESCRA - CHARACTER*1 array of DIMENSION (9) +C On entry DESCRA describes the characteristics of the input +C sparse matrix. +C Unchanged on exit. +C +C A - DOUBLE PRECISION array of DIMENSION (*) +C On entry A specifies the values of the input sparse +C matrix. +C Unchanged on exit. +C +C IA1 - INTEGER array of dimension (*) +C On entry IA1 holds integer information on input sparse +C matrix. Actual information will depend on data format used. +C Unchanged on exit. +C +C IA2 - INTEGER array of dimension (*) +C On entry IA2 holds integer information on input sparse +C matrix. Actual information will depend on data format used. +C Unchanged on exit. +C +C INFOA - INTEGER array of length 10. +C On entry can hold auxiliary information on input matrices +C formats or environment of subsequent calls. +C Might be changed on exit. +C +C B - DOUBLE PRECISION matrix of dimension (LDB,*) +C On entry: dense matrix. +C Unchanged on exit. +C +C LDB - INTEGER +C On entry: leading dimension of B +C Unchanged on exit. +C +C BETA - DOUBLE PRECISION +C On entry: multiplicative constant. +C Unchanged on exit. +C +C C - DOUBLE PRECISION matrix of dimension (LDC,*) +C On entry: dense matrix. +C On exit is updated with the matrix-matrix product. +C +C LDC - INTEGER +C On entry: leading dimension of C +C Unchanged on exit. +C +C WORK - DOUBLE PRECISION array of dimension (LWORK) +C On entry: work area. +C On exit INT(WORK(1)) contains the minimum value +C for LWORK satisfying DSWMM memory requirements. +C +C LWORK - INTEGER +C On entry LWORK specifies the dimension of WORK +C Unchanged on exit. +C +C IERROR - INTEGER +C On exit IERROR contains the value of error flag as follows: +C IERROR = 0 no error +C IERROR > 0 warning +C IERROR < 0 fatal error +C +C Note +C ==== +C All checks on argument are performed in the calling routine. +C +C + SUBROUTINE DSWPRT(M,N,FIDA,DESCRA,A,IA1,IA2,INFOA,TITLE, + + IOUT,IERROR) +C .. Scalar Arguments .. + integer m,n,iout,ierror +c .. array arguments .. + integer ia1(*),ia2(*),infoa(*) + character descra*11, fida*5, title*(*) + double precision a(*) + integer png,pia,pja + + CHARACTER*20 NAME + +C .. Executable Statements .. + + NAME = 'DSWPRT\0' + IERROR = 0 + CALL FCPSB_ERRACTIONSAVE(ERR_ACT) + +C +C Switching on FIDA: proper sparse BLAS routine is selected +C according to data structure +C + IF (FIDA(1:3).EQ.'CSR') THEN +C +C A, IA1, IA2 ---> AR, JA, IA +C VAL, INDX, PNTR +C INFOA(*) not used + + CALL DCSRPRT(M,N,DESCRA,A,IA1,IA2,TITLE,IOUT) + ELSE IF (FIDA(1:3).EQ.'COO') THEN +C +C A, IA1, IA2 ---> AR, JA, IA +C VAL, INDX, PNTR +C INFOA(*) not used + CALL DCOOPRT(M,N,DESCRA,A,IA1,IA2,INFOA,TITLE,IOUT) + ELSE IF (FIDA(1:3).EQ.'JAD') THEN +C +C A, IA1, IA2 ---> AR, JA, IA +C VAL, INDX, PNTR +C INFOA(*) not used + PNG = IA2(1) + PIA = IA2(2) + PJA = IA2(3) + + CALL DJADPRT(M,N,IA2(PNG),A,IA1,IA2(PJA),IA2(PIA), + + TITLE,IOUT) + ELSE + + +C +C This data structure not yet considered +C + IERROR = 3010 + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + + END IF + + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + RETURN + + 9999 CONTINUE + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + + IF ( ERR_ACT .NE. 0 ) THEN + CALL FCPSB_SERROR() + RETURN + ENDIF + + RETURN + END diff --git a/src/serial/f77/dswsm.f b/src/serial/f77/dswsm.f new file mode 100644 index 00000000..13a6901c --- /dev/null +++ b/src/serial/f77/dswsm.f @@ -0,0 +1,205 @@ +C SUBROUTINE DSWSM(TRANS,M,N,ALPHA,UNITD,D,FIDT,DESCRT,T,IT1,IT2, +C INFOT,B,LDB,BETA,C,LDC,WORK,LWORK,IERROR) +C +C Purpose +C ======= +C +C Solving triangular systems of equations with multiple right-hand sides +C C <-- ALPHA D T-1 B + BETA C or +C C <-- ALPHA D T-t B + BETA C or +C C <-- ALPHA T-1 D B + BETA C or +C C <-- ALPHA T-t D B + BETA C +C Actual computing performed by sparse Toolkit kernels. +C This routine selects the proper kernel for each +C data structure. +C +C Parameters +C ========== +C +C TRANS - CHARACTER*1 +C On entry TRANS specifies whether the routine operates with +C matrix T or with the transpose of T as follows: +C TRANS = 'N' -> use matrix T +C TRANS = 'T' or 'C' -> use T' (transpose of matrix T) +C Unchanged on exit. +C +C M - INTEGER +C On entry: number of rows and columns of matrix T +C and number of rows of matrices B and C. +C Unchanged on exit. +C +C N - INTEGER +C On entry: number of columns of matrices B and C +C (number of right-hand sides). +C Unchanged on exit. +C +C ALPHA - DOUBLE PRECISION +C On entry: multiplicative constant. +C Unchanged on exit. +C +C UNITD - CHARACTER*1 +C On entry UNITD specifies whether the diagonal matrix is unit +C or whether row or column scaling has to be performed, as follows: +C UNITD = 'U' -> unit matrix (no scaling) +C UNITD = 'L' -> scale on the left (row scaling) +C UNITD = 'R' -> scale on the right (column scaling) +C UNITD = 'B' -> scale on the right and on the left +C with D^1/2 +C Unchanged on exit. +C +C D - DOUBLE PRECISION array of dimension (M) +C On entry D specifies the main diagonal of the matrix used +C for scaling. +C Unchanged on exit. +C +C FIDT - CHARACTER*5 +C On entry FIDT defines the format of the input sparse matrix. +C Unchanged on exit. +C +C DESCRT - CHARACTER*1 array of DIMENSION (9) +C On entry DESCRT describes the characteristics of the input +C sparse matrix. +C Unchanged on exit. +C +C +C T - DOUBLE PRECISION array of DIMENSION (*) +C On entry T specifies the values of the input sparse +C matrix. +C Unchanged on exit. +C +C IT1 - INTEGER array of dimension (*) +C On entry IT1 holds integer information on input sparse +C matrix. Actual information will depend on data format used. +C Unchanged on exit. +C +C IT2 - INTEGER array of dimension (*) +C On entry IT2 holds integer information on input sparse +C matrix. Actual information will depend on data format used. +C Unchanged on exit. +C +C INFOT - INTEGER array of dimension (10) +C On entry can hold auxiliary information on input matrices +C formats or environment of subsequent calls. +C Might be changed on exit. +C +C B - DOUBLE PRECISION array of dimension (LDB,*) +C On entry: matrix of right-hand sides +C Unchanged on exit. +C +C LDB - INTEGER +C On entry: leading dimension of B. +C Unchanged on exit. +C +C BETA - DOUBLE PRECISION +C On entry: multiplicative constant. +C Unchanged on exit. +C +C C - DOUBLE PRECISION array of dimension (LDC,*) +C On exit: solutions of triangular systems +C +C LDC - INTEGER +C On entry: leading dimension of C. +C Unchanged on exit. +C +C WORK - DOUBLE PRECISION array of dimension (LWORK) +C On entry: work area. +C On exit INT(WORK(1)) contains the minimum value +C for LWORK satisfying DSWSM memory requirements. +C +C LWORK - INTEGER +C On entry LWORK specifies the dimension of WORK +C Unchanged on exit. +C +C IERROR - INTEGER +C On exit IERROR contains the value of error flag as follows: +C IERROR = 0 no error +C IERROR > 0 warning +C IERROR < 0 fatal error +C +C Note +C ==== +C All checks on argument are performed in the calling routine. +C +C + SUBROUTINE DSWSM(TRANS,M,N,ALPHA,UNITD,D,FIDT,DESCRT,T,IT1,IT2, + & INFOT,B,LDB,BETA,C,LDC,WORK,LWORK,IERROR) +C .. Scalar Arguments .. + INTEGER M, N, LDB, LDC, LWORK, IERROR + CHARACTER UNITD, TRANS + DOUBLE PRECISION ALPHA, BETA +C .. Array Arguments .. + INTEGER IT1(*), IT2(*), INFOT(*) + CHARACTER DESCRT*11, FIDT*5 + DOUBLE PRECISION T(*), B(LDB,*), C(LDC,*), D(*), WORK(*) +C .. Local Scalars .. + INTEGER ONE +C .. Parameters .. + PARAMETER (ONE=1) +C .. External Subroutines .. + EXTERNAL DCSRSM, DCOPY + LOGICAL DEBUG + PARAMETER (DEBUG=.FALSE.) + + CHARACTER*20 NAME + +C .. Executable Statements .. + + NAME = 'DSWSM\0' + IERROR = 0 + CALL FCPSB_ERRACTIONSAVE(ERR_ACT) + +C +C Check for identity matrix +C + IF(DESCRT(1:1).EQ.'D' .AND. DESCRT(3:3).EQ.'U') THEN + CALL DCOPY(M,B,ONE,C,ONE) + GOTO 9998 + ENDIF + + if (debug) write(*,*) 'DSWSM ',m,n,ierror,' ',unitd +C +C Switching on FIDT: proper sparse BLAS routine is selected +C according to data structure +C + IF (FIDT(1:3).EQ.'CSR') THEN +C +C T, IT1, IT2 ---> AR, JA, IA +C VAL, INDX, PNTR +C INFOT(*) not used +C + CALL DCSRSM(TRANS,M,N,UNITD,D,ALPHA,DESCRT,T,IT1, + & IT2,B,LDB,BETA,C,LDC,WORK,LWORK) + ELSE IF (FIDT(1:3).EQ.'JAD') THEN + + CALL DJADSM(TRANS,M,N,D,UNITD,0,ALPHA,DESCRT,T,IT1,IT2, + + 0,B,LDB,BETA,C,LDC,WORK) + + ELSE IF (FIDT(1:3).EQ.'COO') THEN + + CALL DCOOSM(TRANS,M,N,UNITD,D,ALPHA,DESCRT,T,IT1,IT2,INFOT, + + B,LDB,BETA,C,LDC,WORK,LWORK,IERROR) + + ELSE +C +C This data structure not yet considered +C + IERROR = 3010 + CALL FCPSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + + END IF + + 9998 CONTINUE + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + RETURN + + 9999 CONTINUE + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + + IF ( ERR_ACT .NE. 0 ) THEN + CALL FCPSB_SERROR() + RETURN + ENDIF + + RETURN + END diff --git a/src/serial/f77/smmp.f b/src/serial/f77/smmp.f new file mode 100644 index 00000000..e31cac61 --- /dev/null +++ b/src/serial/f77/smmp.f @@ -0,0 +1,416 @@ +c======================================================================= +c Sparse Matrix Multiplication Package +c +c Randolph E. Bank and Craig C. Douglas +c +c na.bank@na-net.ornl.gov and na.cdouglas@na-net.ornl.gov +c +c Compile this with the following command (or a similar one): +c +c f77 -c -O smmp.f +c +c======================================================================= + subroutine symbmm + * (n, m, l, + * ia, ja, diaga, + * ib, jb, diagb, + * ic, jc, diagc, + * index) + use realloc +c + integer ia(*), ja(*), diaga, + * ib(*), jb(*), diagb, + * diagc, + * index(*) + integer, pointer :: ic(:),jc(:) + integer :: nze, info +c +c symbolic matrix multiply c=a*b +c +c$$$ write(0,*) 'SYMBMM: ',n,m,l,ib(m+1)-1,jb(ib(m+1)-1) + if (size(ic) < n+1) then + call psrealloc(n+1,ic,info) + endif + maxlmn = max(l,m,n) + do 10 i=1,maxlmn + index(i)=0 + 10 continue + if (diagc.eq.0) then + ic(1)=1 + else + ic(1)=n+2 + endif + minlm = min(l,m) + minmn = min(m,n) +c +c main loop +c + do 50 i=1,n +c$$$ write(0,*) 'SYMBMM: 1 loop ',i,n,ia(i),ia(i+1) + istart=-1 + length=0 +c +c merge row lists +c + do 30 jj=ia(i),ia(i+1) +c a = d + ... + if (jj.eq.ia(i+1)) then + if (diaga.eq.0 .or. i.gt.minmn) goto 30 + j = i + else + j=ja(jj) + endif +c b = d + ... + if (index(j).eq.0 .and. diagb.eq.1 .and. j.le.minlm)then + index(j)=istart + istart=j + length=length+1 + endif + if ((j<1).or.(j>m)) then + write(0,*) ' SymbMM: Problem with A ',i,jj,j,m + endif + do 20 k=ib(j),ib(j+1)-1 + if ((jb(k)<1).or.(jb(k)>maxlmn)) then + write(0,*) 'Problem in SYMBMM 1:',j,k,jb(k),maxlmn + else + if(index(jb(k)).eq.0) then + index(jb(k))=istart + istart=jb(k) + length=length+1 + endif + endif + 20 continue + 30 continue +c +c row i of jc +c + if (diagc.eq.1 .and. index(i).ne.0) length = length - 1 + ic(i+1)=ic(i)+length + + if (ic(i+1) > size(jc)) then + if (n > (2*i)) then + nze = max(ic(i+1), ic(i)*((n+i-1)/i)) + else + nze = max(ic(i+1), nint((dble(ic(i))*(dble(n)/i))) ) + endif + call psrealloc(nze,jc,info) + end if + do 40 j= ic(i),ic(i+1)-1 + if (diagc.eq.1 .and. istart.eq.i) then + istart = index(istart) + index(i) = 0 + endif + jc(j)=istart + istart=index(istart) + index(jc(j))=0 + 40 continue + call isr(length,jc(ic(i))) + index(i) = 0 + 50 continue +c$$$ write(0,*) 'SYMBMM: on exit',ic(n+1)-1,jc(ic(n+1)-1) + return + end + subroutine numbmm(n, m, l, + * ia, ja, diaga, a, + * ib, jb, diagb, b, + * ic, jc, diagc, c, + * temp) +c + integer ia(*), ja(*), diaga, + * ib(*), jb(*), diagb, + * ic(*), jc(*), diagc +c + real(kind(1.d0)) a(*), b(*), c(*), temp(*) +c +c numeric matrix multiply c=a*b +c + maxlmn = max(l,m,n) + do 10 i = 1,maxlmn + temp(i) = 0. + 10 continue + minlm = min(l,m) + minln = min(l,n) + minmn = min(m,n) +c +c c = a*b +c + do 50 i = 1,n + do 30 jj = ia(i),ia(i+1) +c a = d + ... + if (jj.eq.ia(i+1)) then + if (diaga.eq.0 .or. i.gt.minmn) goto 30 + j = i + ajj = a(i) + else + j=ja(jj) + ajj = a(jj) + endif +c b = d + ... + if (diagb.eq.1 .and. j.le.minlm) + * temp(j) = temp(j) + ajj * b(j) + if ((j<1).or.(j>m)) then + write(0,*) ' NUMBMM: Problem with A ',i,jj,j,m + endif + + do 20 k = ib(j),ib(j+1)-1 + if((jb(k)<1).or. (jb(k) > maxlmn)) then + write(0,*) ' NUMBMM: jb problem',j,k,jb(k),maxlmn + else + temp(jb(k)) = temp(jb(k)) + ajj * b(k) + endif + 20 continue + 30 continue +c c = d + ... + if (diagc.eq.1 .and. i.le.minln) then + c(i) = temp(i) + temp(i) = 0. + endif +c$$$ if (mod(i,100)==1) +c$$$ + write(0,*) ' NUMBMM: Fixing row ',i,ic(i),ic(i+1)-1 + do 40 j = ic(i),ic(i+1)-1 + if((jc(j)<1).or. (jc(j) > maxlmn)) then + write(0,*) ' NUMBMM: output problem',i,j,jc(j),maxlmn + else + c(j) = temp(jc(j)) + temp(jc(j)) = 0. + endif + 40 continue + 50 continue + return + end + subroutine transp + * (n, m, + * ia, ja, diaga, a, + * ib, jb, b, + * move) +c + integer ia(*), ja(*), diaga, + * ib(*), jb(*), + * move +c + real(kind(1.d0)) a(*), b(*) +c +c compute b = a(transpose) +c +c first make ib +c + do i=1,m+1 + ib(i)=0 + enddo + if (move.eq.1) then + do i =1,m+1 + b(i) = 0. + enddo + endif + if (diaga.eq.1) then + ib(1)=m + 2 + else + ib(1)=1 + endif +c +c count indices for each column +c + do 30 i=1,n + do 20 j=ia(i),ia(i+1)-1 + ib(ja(j)+1)=ib(ja(j)+1)+1 + 20 continue + 30 continue + do i=1,m + ib(i+1)=ib(i)+ib(i+1) + enddo +c +c now make jb +c + do 60 i=1,n + do 50 j=ia(i),ia(i+1)-1 + index=ja(j) + jb(ib(index))=i + if (move.eq.1) b(ib(index)) = a(j) + ib(index)=ib(index)+1 + 50 continue + 60 continue +c +c now fixup ib +c + do i=m,2,-1 + ib(i)=ib(i-1) + end do + if (diaga.eq.1) then + if (move.eq.1) then + j = min(n,m) + do i = 1,j + b(i) = a(i) + enddo + endif + ib(1)=m + 2 + else + ib(1)=1 + endif + return + end + subroutine ytobs + * (n, + * ia, ja, diaga, syma, a, + * ib, jb, b, + * move) +c + integer ia(*), ja(*), diaga, syma, + * ib(*), jb(*), move +c + real(kind(1.d0)) a(*), b(*) +c +c create the bank-smith data structures b from the +c corresponding yale data structures a +c + do 10 i=1,n + 10 ib(i+1)=ia(i+1)-ia(i) +c +c look for upper triangular entries and duplicate entries +c + do 50 i=1,n + do 40 jj=ia(i),ia(i+1)-1 + j=ja(jj) + if (i.eq.j) then + ib(i+1)=ib(i+1)-1 + ja(jj) = -j + endif + if(j.gt.i) then + ib(i+1)=ib(i+1)-1 + ib(j+1)=ib(j+1)+1 +c +c check for duplicates +c + do 20 k=ia(j),ia(j+1)-1 + if(ja(k).eq.i) then + ib(j+1)=ib(j+1)-1 + ja(jj)=-j + go to 30 + endif + 20 continue + 30 continue + endif + 40 continue + 50 continue +c +c compute ib +c + ib(1)=n + 2 + do 60 i=1,n + 60 ib(i+1)=ib(i+1)+ib(i) +c +c initialize b if move = 1 +c + if (move.eq.1) then + lshift = 0 + if (syma.eq.0) lshift = ib(n+1) - ib(1) + do 62 ii = 1,ib(n+1)+lshift-1 + 62 b(ii) = 0. + if (diaga.eq.1) then + do 64 ii = 1,n + 64 b(ii) = a(ii) + endif + endif +c +c compute jb +c + do 80 i=1,n + do 70 jj=ia(i),ia(i+1)-1 + j=ja(jj) + if(j.gt.i) then + jb(ib(j))=i + if (move.eq.1) b(ib(j)) = a(jj) + ib(j)=ib(j)+1 + else + if(j.le.0) then + ja(jj)=-j + if (move.eq.1 .and. i.eq.-j) b(i) = a(jj) + else + jb(ib(i))=j + if (move.eq.1) b(ib(i)+lshift) = a(jj) + ib(i)=ib(i)+1 + endif + endif + 70 continue + 80 continue +c +c fixup ib +c + do 90 i=n,2,-1 + 90 ib(i)=ib(i-1) + ib(1)=n + 2 + return + end + subroutine bstoy + * (n, + * ia, ja, syma, a, + * ib, jb, diagb, b, + * move) +c + integer ia(*), ja(*), syma, + * ib(*), jb(*), diagb, + * move +c + real(kind(1.d0)) a(*), b(*) +c +c create the yale data structures b from the +c corresponding bank-smith data structures a +c +c compute ib +c + if (diagb.eq.1) then + ib(1) = n + 2 + icor = 0 + if (move.eq.1) then + lshift = 0 + if (syma.eq.0) lshift = ia(n+1) - ia(1) + do 2 i = 1,n + 2 b(i) = a(i) + endif + else + ib(1) = 1 + icor = 1 + endif + do 10 i=1,n + 10 ib(i+1)=ia(i+1)-ia(i)+icor + do 30 i=1,n + do 20 j=ia(i),ia(i+1)-1 + ib(ja(j)+1)=ib(ja(j)+1)+1 + 20 continue + 30 continue +c + do 40 i=1,n + 40 ib(i+1)=ib(i+1)+ib(i) + if (diagb.eq.0) then + do 45 i = 1,n + jb(ib(i)) = i + if (move.eq.1) b(ib(i)) = a(i) + 45 ib(i) = ib(i) + 1 + endif +c +c now compute jb +c + do 60 i=1,n + do 50 jj=ia(i),ia(i+1)-1 + j = ja(jj) + jb(ib(j))=i + jb(ib(i))=j + if (move.eq.1) then + b(ib(j)) = a(jj) + b(ib(i)) = a(jj+lshift) + endif + ib(i)=ib(i)+1 + ib(j)=ib(j)+1 + 50 continue + 60 continue +c +c fixup ib +c + do 70 i=n,2,-1 + 70 ib(i)=ib(i-1) + if (diagb.eq.1) then + ib(1)=n+2 + else + ib(1)=1 + endif + return + end diff --git a/src/serial/imsr.f90 b/src/serial/imsr.f90 new file mode 100644 index 00000000..e114f57c --- /dev/null +++ b/src/serial/imsr.f90 @@ -0,0 +1,53 @@ +! File: imsr.f90 + ! Subroutine: + ! Parameters:subroutine imsr(n,x) + integer :: n + integer :: x(n) + + integer, allocatable :: iaux(:) + + integer :: iswap, iret, info, lp, k + integer :: lswap + + if (n<0) then + write(0,*) 'Error: IMSR: N<0' + return + endif + + if (n<=1) return + + allocate(iaux(0:n+1),stat=info) + if (info/=0) then + write(0,*) 'IMSR: memory allocation failed',info + return + endif + + + call mrgsrt(n,x,iaux,iret) + + if (iret == 0) then + lp = iaux(0) + k = 1 + do + if ((lp==0).or.(k>n)) exit + do + if (lp >= k) exit + lp = iaux(lp) + end do + iswap = x(lp) + x(lp) = x(k) + x(k) = iswap + lswap = iaux(lp) + iaux(lp) = iaux(k) + iaux(k) = lswap + lp = lswap + k = k + 1 + enddo + end if + + deallocate(iaux,stat=info) + if (info/=0) then + write(0,*) 'IMSR: memory deallocation failed',info + endif + return +end subroutine imsr diff --git a/src/serial/imsrx.f90 b/src/serial/imsrx.f90 new file mode 100644 index 00000000..8052c111 --- /dev/null +++ b/src/serial/imsrx.f90 @@ -0,0 +1,64 @@ +! File: imsrx.f90 + ! Subroutine: + ! Parameters:subroutine imsrx(n,x,indx) + integer :: n + integer :: x(n) + integer :: indx(n) + + integer, allocatable :: iaux(:) + + integer :: iswap, iret, info, lp, k + integer :: lswap, ixswap + + if (n<0) then + write(0,*) 'Error: IMSRX: N<0' + return + endif + + if (n==0) return + if (n==1) then + indx(1) = 1 + return + endif + + allocate(iaux(0:n+1),stat=info) + if (info/=0) then + write(0,*) 'IMSRX: memory allocation failed',info + return + endif + + do k=1,n + indx(k) = k + enddo + + call mrgsrt(n,x,iaux,iret) + + if (iret /= 1) then + lp = iaux(0) + k = 1 + do + if ((lp==0).or.(k>n)) exit + do + if (lp >= k) exit + lp = iaux(lp) + end do + iswap = x(lp) + x(lp) = x(k) + x(k) = iswap + ixswap = indx(lp) + indx(lp) = indx(k) + indx(k) = ixswap + lswap = iaux(lp) + iaux(lp) = iaux(k) + iaux(k) = lswap + lp = lswap + k = k + 1 + enddo + end if + + deallocate(iaux,stat=info) + if (info/=0) then + write(0,*) 'IMSRX: memory deallocation failed',info + endif + return +end subroutine imsrx diff --git a/src/serial/jad/Makefile b/src/serial/jad/Makefile new file mode 100644 index 00000000..1defe97e --- /dev/null +++ b/src/serial/jad/Makefile @@ -0,0 +1,38 @@ +include ../../../../Make.inc +# +# The object files +# + +FOBJS = dcojdupd.o djadmm.o djadmv.o djadsm.o djadsv.o djdnrmi.o djadnr.o djadprt.o\ + djadmv2.o djadmv3.o djadmv4.o + +OBJS=$(FOBJS) + +# +# Where the library should go, and how it is called. +# Note that we are regenerating most of libsparker.a on the fly. +#LIBDIR=../../LIB +#LIBNAME=libsparker.a +LIBFILE=$(LIBDIR)/$(LIBNAME) +SPARKERDIR=.. +INCDIRS=-I. -I$(SPARKERDIR) -I$(LIBDIR) + +# +# No change should be needed below +# + +default: lib + +lib: $(OBJS) + $(AR) $(LIBFILE) $(OBJS) + $(RANLIB) $(LIBFILE) + +$(FOBJS): $(SPARKERDIR)/sparker.fh + +clean: cleanobjs + +veryclean: cleanobjs + +cleanobjs: + /bin/rm -f $(OBJS) + diff --git a/src/serial/jad/dcojdupd.f b/src/serial/jad/dcojdupd.f new file mode 100644 index 00000000..6d007bc3 --- /dev/null +++ b/src/serial/jad/dcojdupd.f @@ -0,0 +1,48 @@ + SUBROUTINE DCOJDUPD(M, N, DESCRA, A, IA1, + + IA2, INFOA, IA, JA, DESCRH, H, IH1, IH2, + + INFOH, IH, JH, FLAG, GLOB_TO_LOC, + + IWORK, LIWORK, IERROR) +C +C .. Matrix A to be updated is required to be stored with +C .. column indices belonging to the same row ordered. +C .. Block H to be inserted don't need to be stored in such a way. +C +C Flag = 0: put elements to 0.0D0; +C Flag = 1: replace elements with new value; +C Flag = 2: sum block value to elements; +C + IMPLICIT NONE + include 'sparker.fh' +C .. Scalar Arguments .. + INTEGER IA, JA, IH, JH, M, N, + + IERROR, FLAG, LIWORK +C .. Array Arguments .. + INTEGER IA1(*),IA2(*),IH1(*),IH2(*), + + INFOA(*),INFOH(*),IWORK(*), + + GLOB_TO_LOC(*) + CHARACTER DESCRA*11,DESCRH*11 + DOUBLE PRECISION A(*),H(*) +C .. Local scalars .. + INTEGER J, NNZ, IP1, NNZI +C .. Local arrays .. + IERROR = 0 + IF (IBITS(INFOA(UPD_),2,1).EQ.1) THEN +C +C Smart update capability +C + IP1 = INFOA(UPD_PNT_) + NNZ = IA1(IP1+NNZ_) + NNZI = INFOH(1) + DO J = 1, NNZI + NNZ = NNZ + 1 + A(NNZ) = H(J) + ENDDO + IA1(IP1+NNZ_) = NNZ + ELSE + IERROR = 2 + ENDIF + 9999 CONTINUE + RETURN + END + + diff --git a/src/serial/jad/djadmm.f b/src/serial/jad/djadmm.f new file mode 100644 index 00000000..363e5e28 --- /dev/null +++ b/src/serial/jad/djadmm.f @@ -0,0 +1,113 @@ + SUBROUTINE DJADMM(TRANSA,M,K,N,ALPHA,DESCRA,AR, + * JA,IA,B,LDB,BETA,C,LDC,WORK,IERROR) + + IMPLICIT NONE +C +C +C .. Scalar Arguments .. + DOUBLE PRECISION ALPHA, BETA + INTEGER K, LDB, LDC, M, N,IERROR + CHARACTER TRANSA +C .. Array Arguments .. + DOUBLE PRECISION AR(*), B(LDB,*), C(LDC,*), WORK(*) + INTEGER IA(*), JA(*) + CHARACTER DESCRA*11 +C .. Local Scalars .. + integer PIA, PJA, PNG + INTEGER I, J, KB,NB, ERR_ACT + CHARACTER DIAG, TRANS +c .. Local Arrays .. + CHARACTER*20 NAME + INTEGER INT_VAL(5) +C .. Executable Statements .. +C + + NAME = 'DJADMM\0' + IERROR = 0 + CALL FCPSB_ERRACTIONSAVE(ERR_ACT) + + IF (DESCRA(1:1).EQ.'G') TRANS = TRANSA + IF (DESCRA(1:1).EQ.'S' .AND. DESCRA(2:2).EQ.'U') TRANS = 'U' + IF (DESCRA(1:1).EQ.'S' .AND. DESCRA(2:2).EQ.'L') TRANS = 'L' + IF (DESCRA(1:1).EQ.'D') THEN + IF (DESCRA(3:3).EQ.'U') THEN + DO 40 I = 1, K + DO 20 J = 1, M + C(J,I) = BETA*C(J,I) + ALPHA*B(J,I) + 20 CONTINUE + 40 CONTINUE + ELSE + DO 80 I = 1, K + DO 60 J = 1, M + C(J,I) = BETA*C(J,I) + ALPHA*AR(J)*B(J,I) + 60 CONTINUE + 80 CONTINUE + END IF + RETURN + END IF + IF (TRANS.EQ.'T'.or.TRANS.EQ.'C') THEN + IERROR = 3015 + RETURN + ENDIF +C + IF (DESCRA(3:3).EQ.'N') DIAG = 'N' + IF (DESCRA(3:3).EQ.'U') DIAG = 'U' + PNG = IA(1) + PIA = IA(2) + PJA = IA(3) +* write(6,*)'M N K',m,n,k + + NB = 4 + KB = MOD(K,NB) + + SELECT CASE(KB) + CASE (1) + IF (K==1) THEN + CALL DJADMV(DIAG,M,N,ALPHA,IA(PNG), + + AR,JA,IA(PJA),IA(PIA),B(1,1),BETA,C(1,1),IERROR) + ELSE + CALL DJADMV2(DIAG,M,N,ALPHA,IA(PNG), + + AR,JA,IA(PJA),IA(PIA),B(1,1),LDB,BETA,C(1,1),LDC,IERROR) + CALL DJADMV3(DIAG,M,N,ALPHA,IA(PNG), + + AR,JA,IA(PJA),IA(PIA),B(1,3),LDB,BETA,C(1,3),LDC,IERROR) + KB = KB + 4 + ENDIF + CASE(2) + CALL DJADMV2(DIAG,M,N,ALPHA,IA(PNG), + + AR,JA,IA(PJA),IA(PIA),B(1,1),LDB,BETA,C(1,1),LDC,IERROR) + CASE(3) + CALL DJADMV3(DIAG,M,N,ALPHA,IA(PNG), + + AR,JA,IA(PJA),IA(PIA),B(1,1),LDB,BETA,C(1,1),LDC,IERROR) + + END SELECT + + IF(IERROR.NE.0) THEN + INT_VAL(1)=IERROR + CALL FCPSB_ERRPUSH(4012,NAME,INT_VAL) + GOTO 9999 + END IF + + DO I=KB+1,K,NB + CALL DJADMV4(DIAG,M,N,ALPHA,IA(PNG), + + AR,JA,IA(PJA),IA(PIA),B(1,I),LDB,BETA,C(1,I),LDC,IERROR) + END DO + + IF(IERROR.NE.0) THEN + INT_VAL(1)=IERROR + CALL FCPSB_ERRPUSH(4012,NAME,INT_VAL) + GOTO 9999 + END IF + + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + RETURN + + 9999 CONTINUE + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + + IF ( ERR_ACT .NE. 0 ) THEN + CALL FCPSB_SERROR() + RETURN + ENDIF + + RETURN + END diff --git a/src/serial/jad/djadmv.f b/src/serial/jad/djadmv.f new file mode 100644 index 00000000..983e54e5 --- /dev/null +++ b/src/serial/jad/djadmv.f @@ -0,0 +1,322 @@ +*********************************************************************** +* PROCEDURAL LOGIC SECTION * +* SUBROUTINE DJADMV (DIAG,NROW,NCOL,ALPHA,NG,A,KA,JA,IA,X,BETA,Y) * +* DOUBLE PRECISION ZERO * +* PARAMETER (ZERO=0.0D0) * +* DOUBLE PRECISION ACC * +* INTEGER I, J, K, IPX, IPG * +* LOGICAL UNI * +*C .. Executable Statements .. * +*C * +*C * +* IF (DIAG.EQ.'U') THEN * +* DO 10 I = 1, M * +* Y(I) = BETA*Y(I) + ALPHA*X(I) * +* 10 CONTINUE * +* ELSE * +* DO 20 I = 1, M * +* Y(I) = BETA*Y(I) * +* 20 CONTINUE * +* END IF * +* * +* IF (ALPHA.EQ.ZERO) THEN * +* RETURN * +* END IF * +*C * +*C * +*C DO 200 IPG = 1, NG * +* DO 50 K = IA(2,IPG), IA(3,IPG)-1 * +* IPX = IA(1,IPG) * +* DO 40 I = JA(K), JA(K+1) - 1 * +* Y(IPX) = Y(IPX) + ALPHA*A(I)*X(KA(I)) * +* IPX = IPX + 1 * +* 40 CONTINUE * +* 50 CONTINUE * +*C * +*C * +* IPX = IA(1,IPG) * +* DO 70 K = IA(3,IPG), IA(2,IPG+1)-1 * +* DO 60 I = JA(K), JA(K+1) - 1 * +* Y(IPX) = Y(IPX) + ALPHA*A(I)*X(KA(I)) * +* 60 CONTINUE * +* IPX = IPX + 1 * +* 70 CONTINUE * +* 200 CONTINUE * +*C * +* RETURN * +*C * +*C * +* END * +* * +* * +*********************************************************************** + SUBROUTINE DJADMV (DIAG,NROW,NCOL,ALPHA,NG,A,KA,JA,IA,X, + + BETA,Y,IERROR) + IMPLICIT NONE + DOUBLE PRECISION A(*),X(*),Y(*),ALPHA,BETA,ZERO + INTEGER IA(3,*),KA(*),JA(*),NCOL,NROW,NG,IERROR + CHARACTER DIAG + PARAMETER (ZERO=0.0D0) + INTEGER I, K, IPX, IPG, I0, IN + INTEGER NPG + DOUBLE PRECISION Y0, Y1, Y2, Y3, Y4, Y5, Y6, Y7, + + Y8, Y9, Y10, Y11, Y12, Y13, Y14, Y15 +c .. Executable Statements .. +c +c + IERROR=0 + IF (DIAG.EQ.'U') THEN + IF (BETA.EQ.ZERO) THEN + DO I = 1, NROW + Y(I) = ALPHA*X(I) + ENDDO + ELSE + DO 10 I = 1, NROW + Y(I) = BETA*Y(I) + ALPHA*X(I) + 10 CONTINUE + ENDIF + ELSE + IF (BETA.EQ.ZERO) THEN + DO I = 1, NROW + Y(I) = 0.D0 + ENDDO + ELSE + DO 20 I = 1, NROW + Y(I) = BETA*Y(I) + 20 CONTINUE + END IF + ENDIF + + IF (ALPHA.EQ.ZERO) THEN + RETURN + END IF +c +c + DO 200 IPG = 1, NG + K = IA(2,IPG) + NPG = JA(K+1)-JA(K) + +c$$$ WRITE(0,*)NPG + + IF (NPG.EQ.4) THEN + IPX = IA(1,IPG) + Y0 = ZERO + Y1 = ZERO + Y2 = ZERO + Y3 = ZERO + K = IA(2,IPG) + I0 = JA(K) + K = IA(3,IPG)-1 + IN = JA(K) + DO I = I0, IN+3, 4 + Y0 = Y0 + A(I+0)*X(KA(I+0)) + Y1 = Y1 + A(I+1)*X(KA(I+1)) + Y2 = Y2 + A(I+2)*X(KA(I+2)) + Y3 = Y3 + A(I+3)*X(KA(I+3)) + ENDDO + Y(IPX+0) = Y(IPX+0) + ALPHA*Y0 + Y(IPX+1) = Y(IPX+1) + ALPHA*Y1 + Y(IPX+2) = Y(IPX+2) + ALPHA*Y2 + Y(IPX+3) = Y(IPX+3) + ALPHA*Y3 + + ELSE IF (NPG.EQ.5) THEN + + IPX = IA(1,IPG) + Y0 = ZERO + Y1 = ZERO + Y2 = ZERO + Y3 = ZERO + Y4 = ZERO + K = IA(2,IPG) + I0 = JA(K) + K = IA(3,IPG)-1 + IN = JA(K) + DO I = I0, IN+4, 5 + Y0 = Y0 + A(I+0)*X(KA(I+0)) + Y1 = Y1 + A(I+1)*X(KA(I+1)) + Y2 = Y2 + A(I+2)*X(KA(I+2)) + Y3 = Y3 + A(I+3)*X(KA(I+3)) + Y4 = Y4 + A(I+4)*X(KA(I+4)) + ENDDO + Y(IPX+0) = Y(IPX+0) + ALPHA*Y0 + Y(IPX+1) = Y(IPX+1) + ALPHA*Y1 + Y(IPX+2) = Y(IPX+2) + ALPHA*Y2 + Y(IPX+3) = Y(IPX+3) + ALPHA*Y3 + Y(IPX+4) = Y(IPX+4) + ALPHA*Y4 + + ELSE IF (NPG.EQ.6) THEN + + IPX = IA(1,IPG) + Y0 = ZERO + Y1 = ZERO + Y2 = ZERO + Y3 = ZERO + Y4 = ZERO + Y5 = ZERO + K = IA(2,IPG) + I0 = JA(K) + K = IA(3,IPG)-1 + IN = JA(K) + DO I = I0, IN+5, 6 + Y0 = Y0 + A(I+0)*X(KA(I+0)) + Y1 = Y1 + A(I+1)*X(KA(I+1)) + Y2 = Y2 + A(I+2)*X(KA(I+2)) + Y3 = Y3 + A(I+3)*X(KA(I+3)) + Y4 = Y4 + A(I+4)*X(KA(I+4)) + Y5 = Y5 + A(I+5)*X(KA(I+5)) + ENDDO + Y(IPX+0) = Y(IPX+0) + ALPHA*Y0 + Y(IPX+1) = Y(IPX+1) + ALPHA*Y1 + Y(IPX+2) = Y(IPX+2) + ALPHA*Y2 + Y(IPX+3) = Y(IPX+3) + ALPHA*Y3 + Y(IPX+4) = Y(IPX+4) + ALPHA*Y4 + Y(IPX+5) = Y(IPX+5) + ALPHA*Y5 + + ELSE IF (NPG.EQ.7) THEN + + IPX = IA(1,IPG) + Y0 = ZERO + Y1 = ZERO + Y2 = ZERO + Y3 = ZERO + Y4 = ZERO + Y5 = ZERO + Y6 = ZERO + K = IA(2,IPG) + I0 = JA(K) + K = IA(3,IPG)-1 + IN = JA(K) + DO I = I0, IN+6, 7 + Y0 = Y0 + A(I+0)*X(KA(I+0)) + Y1 = Y1 + A(I+1)*X(KA(I+1)) + Y2 = Y2 + A(I+2)*X(KA(I+2)) + Y3 = Y3 + A(I+3)*X(KA(I+3)) + Y4 = Y4 + A(I+4)*X(KA(I+4)) + Y5 = Y5 + A(I+5)*X(KA(I+5)) + Y6 = Y6 + A(I+6)*X(KA(I+6)) + ENDDO + Y(IPX+0) = Y(IPX+0) + ALPHA*Y0 + Y(IPX+1) = Y(IPX+1) + ALPHA*Y1 + Y(IPX+2) = Y(IPX+2) + ALPHA*Y2 + Y(IPX+3) = Y(IPX+3) + ALPHA*Y3 + Y(IPX+4) = Y(IPX+4) + ALPHA*Y4 + Y(IPX+5) = Y(IPX+5) + ALPHA*Y5 + Y(IPX+6) = Y(IPX+6) + ALPHA*Y6 + + ELSE IF (NPG.EQ.8) THEN + + IPX = IA(1,IPG) + Y0 = ZERO + Y1 = ZERO + Y2 = ZERO + Y3 = ZERO + Y4 = ZERO + Y5 = ZERO + Y6 = ZERO + Y7 = ZERO + K = IA(2,IPG) + I0 = JA(K) + K = IA(3,IPG)-1 + IN = JA(K) + DO I = I0, IN+7, 8 + Y0 = Y0 + A(I+0)*X(KA(I+0)) + Y1 = Y1 + A(I+1)*X(KA(I+1)) + Y2 = Y2 + A(I+2)*X(KA(I+2)) + Y3 = Y3 + A(I+3)*X(KA(I+3)) + Y4 = Y4 + A(I+4)*X(KA(I+4)) + Y5 = Y5 + A(I+5)*X(KA(I+5)) + Y6 = Y6 + A(I+6)*X(KA(I+6)) + Y7 = Y7 + A(I+7)*X(KA(I+7)) + ENDDO + Y(IPX+0) = Y(IPX+0) + ALPHA*Y0 + Y(IPX+1) = Y(IPX+1) + ALPHA*Y1 + Y(IPX+2) = Y(IPX+2) + ALPHA*Y2 + Y(IPX+3) = Y(IPX+3) + ALPHA*Y3 + Y(IPX+4) = Y(IPX+4) + ALPHA*Y4 + Y(IPX+5) = Y(IPX+5) + ALPHA*Y5 + Y(IPX+6) = Y(IPX+6) + ALPHA*Y6 + Y(IPX+7) = Y(IPX+7) + ALPHA*Y7 + + ELSE IF (NPG.EQ.16) THEN + + IPX = IA(1,IPG) + Y0 = ZERO + Y1 = ZERO + Y2 = ZERO + Y3 = ZERO + Y4 = ZERO + Y5 = ZERO + Y6 = ZERO + Y7 = ZERO + Y8 = ZERO + Y9 = ZERO + Y10 = ZERO + Y11 = ZERO + Y12 = ZERO + Y13 = ZERO + Y14 = ZERO + Y15 = ZERO + K = IA(2,IPG) + I0 = JA(K) + K = IA(3,IPG)-1 + IN = JA(K) + DO I = I0, IN+15, 16 + Y0 = Y0 + A(I+0)*X(KA(I+0)) + Y1 = Y1 + A(I+1)*X(KA(I+1)) + Y2 = Y2 + A(I+2)*X(KA(I+2)) + Y3 = Y3 + A(I+3)*X(KA(I+3)) + Y4 = Y4 + A(I+4)*X(KA(I+4)) + Y5 = Y5 + A(I+5)*X(KA(I+5)) + Y6 = Y6 + A(I+6)*X(KA(I+6)) + Y7 = Y7 + A(I+7)*X(KA(I+7)) + Y8 = Y8 + A(I+8)*X(KA(I+8)) + Y9 = Y9 + A(I+9)*X(KA(I+9)) + Y10 = Y10 + A(I+10)*X(KA(I+10)) + Y11 = Y11 + A(I+11)*X(KA(I+11)) + Y12 = Y12 + A(I+12)*X(KA(I+12)) + Y13 = Y13 + A(I+13)*X(KA(I+13)) + Y14 = Y14 + A(I+14)*X(KA(I+14)) + Y15 = Y15 + A(I+15)*X(KA(I+15)) + ENDDO + Y(IPX+0) = Y(IPX+0) + ALPHA*Y0 + Y(IPX+1) = Y(IPX+1) + ALPHA*Y1 + Y(IPX+2) = Y(IPX+2) + ALPHA*Y2 + Y(IPX+3) = Y(IPX+3) + ALPHA*Y3 + Y(IPX+4) = Y(IPX+4) + ALPHA*Y4 + Y(IPX+5) = Y(IPX+5) + ALPHA*Y5 + Y(IPX+6) = Y(IPX+6) + ALPHA*Y6 + Y(IPX+7) = Y(IPX+7) + ALPHA*Y7 + Y(IPX+8) = Y(IPX+8) + ALPHA*Y8 + Y(IPX+9) = Y(IPX+9) + ALPHA*Y9 + Y(IPX+10) = Y(IPX+10) + ALPHA*Y10 + Y(IPX+11) = Y(IPX+11) + ALPHA*Y11 + Y(IPX+12) = Y(IPX+12) + ALPHA*Y12 + Y(IPX+13) = Y(IPX+13) + ALPHA*Y13 + Y(IPX+14) = Y(IPX+14) + ALPHA*Y14 + Y(IPX+15) = Y(IPX+15) + ALPHA*Y15 + + ELSE + + DO K = IA(2,IPG), IA(3,IPG)-1 + IPX = IA(1,IPG) + DO I = JA(K), JA(K+1) - 1 + Y(IPX) = Y(IPX) + ALPHA*A(I)*X(KA(I)) + IPX = IPX + 1 + ENDDO + ENDDO + END IF + +c CSR Product + + IPX = IA(1,IPG) + DO 70 K = IA(3,IPG), IA(2,IPG+1)-1 + DO 60 I = JA(K), JA(K+1) - 1 + Y(IPX) = Y(IPX) + ALPHA*A(I)*X(KA(I)) + 60 CONTINUE + IPX = IPX + 1 + 70 CONTINUE + 200 CONTINUE +c + RETURN + END + diff --git a/src/serial/jad/djadmv2.f b/src/serial/jad/djadmv2.f new file mode 100644 index 00000000..f2359be6 --- /dev/null +++ b/src/serial/jad/djadmv2.f @@ -0,0 +1,328 @@ +*********************************************************************** +* PROCEDURAL LOGIC SECTION * +* SUBROUTINE DJADMV (DIAG,NROW,NCOL,ALPHA,NG,A,KA,JA,IA,X,BETA,Y) * +* DOUBLE PRECISION ZERO * +* PARAMETER (ZERO=0.0D0) * +* DOUBLE PRECISION ACC * +* INTEGER I, J, K, IPX, IPG * +* LOGICAL UNI * +*C .. Executable Statements .. * +*C * +*C * +* IF (DIAG.EQ.'U') THEN * +* DO 10 I = 1, M * +* Y(I) = BETA*Y(I) + ALPHA*X(I) * +* 10 CONTINUE * +* ELSE * +* DO 20 I = 1, M * +* Y(I) = BETA*Y(I) * +* 20 CONTINUE * +* END IF * +* * +* IF (ALPHA.EQ.ZERO) THEN * +* RETURN * +* END IF * +*C * +*C * +*C DO 200 IPG = 1, NG * +* DO 50 K = IA(2,IPG), IA(3,IPG)-1 * +* IPX = IA(1,IPG) * +* DO 40 I = JA(K), JA(K+1) - 1 * +* Y(IPX) = Y(IPX) + ALPHA*A(I)*X(KA(I)) * +* IPX = IPX + 1 * +* 40 CONTINUE * +* 50 CONTINUE * +*C * +*C * +* IPX = IA(1,IPG) * +* DO 70 K = IA(3,IPG), IA(2,IPG+1)-1 * +* DO 60 I = JA(K), JA(K+1) - 1 * +* Y(IPX) = Y(IPX) + ALPHA*A(I)*X(KA(I)) * +* 60 CONTINUE * +* IPX = IPX + 1 * +* 70 CONTINUE * +* 200 CONTINUE * +*C * +* RETURN * +*C * +*C * +* END * +* * +* * +*********************************************************************** + SUBROUTINE DJADMV2(DIAG,NROW,NCOL,ALPHA,NG,A,KA,JA,IA, + + X,LDX,BETA,Y,LDY, IERROR) + IMPLICIT NONE + INTEGER IA(3,*),KA(*),JA(*),NCOL,NROW,NG,LDX,LDY,IERROR + DOUBLE PRECISION A(*),X(LDX,*),Y(LDY,*),ALPHA,BETA + CHARACTER DIAG + DOUBLE PRECISION ZERO + PARAMETER (ZERO=0.0D0) + INTEGER I, J, K, IPX, IPG, I0, IN + INTEGER NPG + integer nb + parameter (nb=2) + DOUBLE PRECISION Y0(NB), Y1(NB), Y2(NB), Y3(NB), Y4(NB), + + Y5(NB), Y6(NB), Y7(NB), Y8(NB), Y9(NB), Y10(NB), Y11(NB), + + Y12(NB), Y13(NB), Y14(NB), Y15(NB) +c .. Executable Statements .. +c +c +c$$$ write(0,*) 'djadmv2:',diag,alpha,beta,nb + IERROR=0 + IF (DIAG.EQ.'U') THEN + IF (BETA.EQ.ZERO) THEN + DO I = 1, NROW + Y(I,1:NB) = ALPHA*X(I,1:NB) + ENDDO + ELSE + DO 10 I = 1, NROW + Y(I,1:NB) = BETA*Y(I,1:NB) + ALPHA*X(I,1:NB) + 10 CONTINUE + ENDIF + ELSE + IF (BETA.EQ.ZERO) THEN + DO I = 1, NROW + Y(I,1:NB) = 0.D0 + ENDDO + ELSE + DO 20 I = 1, NROW + Y(I,1:NB) = BETA*Y(I,1:NB) + 20 CONTINUE + END IF + ENDIF + + IF (ALPHA.EQ.ZERO) THEN + RETURN + END IF +c +c$$$ write(0,*) 'djadmv2:',diag,alpha,beta + + do 200 ipg = 1, ng + k = ia(2,ipg) + npg = ja(k+1)-ja(k) + +c$$$ write(0,*) 'djadmv2:',npg + + if (npg.eq.4) then + ipx = ia(1,ipg) + y0(1:nb) = zero + y1(1:nb) = zero + y2(1:nb) = zero + y3(1:nb) = zero + k = ia(2,ipg) + i0 = ja(k) + k = ia(3,ipg)-1 + in = ja(k) + do i = i0, in+3, 4 + y0(1:nb) = y0(1:nb) + a(i+0)*x(ka(i+0),1:nb) + y1(1:nb) = y1(1:nb) + a(i+1)*x(ka(i+1),1:nb) + y2(1:nb) = y2(1:nb) + a(i+2)*x(ka(i+2),1:nb) + y3(1:nb) = y3(1:nb) + a(i+3)*x(ka(i+3),1:nb) + enddo + y(ipx+0,1:nb) = y(ipx+0,1:nb) + alpha*y0(1:nb) + y(ipx+1,1:nb) = y(ipx+1,1:nb) + alpha*y1(1:nb) + y(ipx+2,1:nb) = y(ipx+2,1:nb) + alpha*y2(1:nb) + y(ipx+3,1:nb) = y(ipx+3,1:nb) + alpha*y3(1:nb) + + else if (npg.eq.5) then + + ipx = ia(1,ipg) + y0(1:nb) = zero + y1(1:nb) = zero + y2(1:nb) = zero + y3(1:nb) = zero + y4(1:nb) = zero + k = ia(2,ipg) + i0 = ja(k) + k = ia(3,ipg)-1 + in = ja(k) + do i = i0, in+4, 5 + y0(1:nb) = y0(1:nb) + a(i+0)*x(ka(i+0),1:nb) + y1(1:nb) = y1(1:nb) + a(i+1)*x(ka(i+1),1:nb) + y2(1:nb) = y2(1:nb) + a(i+2)*x(ka(i+2),1:nb) + y3(1:nb) = y3(1:nb) + a(i+3)*x(ka(i+3),1:nb) + y4(1:nb) = y4(1:nb) + a(i+4)*x(ka(i+4),1:nb) + enddo + y(ipx+0,1:nb) = y(ipx+0,1:nb) + alpha*y0(1:nb) + y(ipx+1,1:nb) = y(ipx+1,1:nb) + alpha*y1(1:nb) + y(ipx+2,1:nb) = y(ipx+2,1:nb) + alpha*y2(1:nb) + y(ipx+3,1:nb) = y(ipx+3,1:nb) + alpha*y3(1:nb) + y(ipx+4,1:nb) = y(ipx+4,1:nb) + alpha*y4(1:nb) + + else if (npg.eq.6) then + + ipx = ia(1,ipg) + y0(1:nb) = zero + y1(1:nb) = zero + y2(1:nb) = zero + y3(1:nb) = zero + y4(1:nb) = zero + y5(1:nb) = zero + k = ia(2,ipg) + i0 = ja(k) + k = ia(3,ipg)-1 + in = ja(k) + do i = i0, in+5, 6 + y0(1:nb) = y0(1:nb) + a(i+0)*x(ka(i+0),1:nb) + y1(1:nb) = y1(1:nb) + a(i+1)*x(ka(i+1),1:nb) + y2(1:nb) = y2(1:nb) + a(i+2)*x(ka(i+2),1:nb) + y3(1:nb) = y3(1:nb) + a(i+3)*x(ka(i+3),1:nb) + y4(1:nb) = y4(1:nb) + a(i+4)*x(ka(i+4),1:nb) + y5(1:nb) = y5(1:nb) + a(i+5)*x(ka(i+5),1:nb) + enddo + y(ipx+0,1:nb) = y(ipx+0,1:nb) + alpha*y0(1:nb) + y(ipx+1,1:nb) = y(ipx+1,1:nb) + alpha*y1(1:nb) + y(ipx+2,1:nb) = y(ipx+2,1:nb) + alpha*y2(1:nb) + y(ipx+3,1:nb) = y(ipx+3,1:nb) + alpha*y3(1:nb) + y(ipx+4,1:nb) = y(ipx+4,1:nb) + alpha*y4(1:nb) + y(ipx+5,1:nb) = y(ipx+5,1:nb) + alpha*y5(1:nb) + + else if (npg.eq.7) then + + ipx = ia(1,ipg) + y0(1:nb) = zero + y1(1:nb) = zero + y2(1:nb) = zero + y3(1:nb) = zero + y4(1:nb) = zero + y5(1:nb) = zero + y6(1:nb) = zero + k = ia(2,ipg) + i0 = ja(k) + k = ia(3,ipg)-1 + in = ja(k) + do i = i0, in+6, 7 + y0(1:nb) = y0(1:nb) + a(i+0)*x(ka(i+0),1:nb) + y1(1:nb) = y1(1:nb) + a(i+1)*x(ka(i+1),1:nb) + y2(1:nb) = y2(1:nb) + a(i+2)*x(ka(i+2),1:nb) + y3(1:nb) = y3(1:nb) + a(i+3)*x(ka(i+3),1:nb) + y4(1:nb) = y4(1:nb) + a(i+4)*x(ka(i+4),1:nb) + y5(1:nb) = y5(1:nb) + a(i+5)*x(ka(i+5),1:nb) + y6(1:nb) = y6(1:nb) + a(i+6)*x(ka(i+6),1:nb) + enddo + y(ipx+0,1:nb) = y(ipx+0,1:nb) + alpha*y0(1:nb) + y(ipx+1,1:nb) = y(ipx+1,1:nb) + alpha*y1(1:nb) + y(ipx+2,1:nb) = y(ipx+2,1:nb) + alpha*y2(1:nb) + y(ipx+3,1:nb) = y(ipx+3,1:nb) + alpha*y3(1:nb) + y(ipx+4,1:nb) = y(ipx+4,1:nb) + alpha*y4(1:nb) + y(ipx+5,1:nb) = y(ipx+5,1:nb) + alpha*y5(1:nb) + y(ipx+6,1:nb) = y(ipx+6,1:nb) + alpha*y6(1:nb) + + else if (npg.eq.8) then + + ipx = ia(1,ipg) + y0(1:nb) = zero + y1(1:nb) = zero + y2(1:nb) = zero + y3(1:nb) = zero + y4(1:nb) = zero + y5(1:nb) = zero + y6(1:nb) = zero + y7(1:nb) = zero + k = ia(2,ipg) + i0 = ja(k) + k = ia(3,ipg)-1 + in = ja(k) + do i = i0, in+7, 8 + y0(1:nb) = y0(1:nb) + a(i+0)*x(ka(i+0),1:nb) + y1(1:nb) = y1(1:nb) + a(i+1)*x(ka(i+1),1:nb) + y2(1:nb) = y2(1:nb) + a(i+2)*x(ka(i+2),1:nb) + y3(1:nb) = y3(1:nb) + a(i+3)*x(ka(i+3),1:nb) + y4(1:nb) = y4(1:nb) + a(i+4)*x(ka(i+4),1:nb) + y5(1:nb) = y5(1:nb) + a(i+5)*x(ka(i+5),1:nb) + y6(1:nb) = y6(1:nb) + a(i+6)*x(ka(i+6),1:nb) + y7(1:nb) = y7(1:nb) + a(i+7)*x(ka(i+7),1:nb) + enddo + y(ipx+0,1:nb) = y(ipx+0,1:nb) + alpha*y0(1:nb) + y(ipx+1,1:nb) = y(ipx+1,1:nb) + alpha*y1(1:nb) + y(ipx+2,1:nb) = y(ipx+2,1:nb) + alpha*y2(1:nb) + y(ipx+3,1:nb) = y(ipx+3,1:nb) + alpha*y3(1:nb) + y(ipx+4,1:nb) = y(ipx+4,1:nb) + alpha*y4(1:nb) + y(ipx+5,1:nb) = y(ipx+5,1:nb) + alpha*y5(1:nb) + y(ipx+6,1:nb) = y(ipx+6,1:nb) + alpha*y6(1:nb) + y(ipx+7,1:nb) = y(ipx+7,1:nb) + alpha*y7(1:nb) + + else if (npg.eq.16) then + + ipx = ia(1,ipg) + y0(1:nb) = zero + y1(1:nb) = zero + y2(1:nb) = zero + y3(1:nb) = zero + y4(1:nb) = zero + y5(1:nb) = zero + y6(1:nb) = zero + y7(1:nb) = zero + y8(1:nb) = zero + y9(1:nb) = zero + y10(1:nb) = zero + y11(1:nb) = zero + y12(1:nb) = zero + y13(1:nb) = zero + y14(1:nb) = zero + y15(1:nb) = zero + k = ia(2,ipg) + i0 = ja(k) + k = ia(3,ipg)-1 + in = ja(k) + do i = i0, in+15, 16 + y0(1:nb) = y0(1:nb) + a(i+0)*x(ka(i+0),1:nb) + y1(1:nb) = y1(1:nb) + a(i+1)*x(ka(i+1),1:nb) + y2(1:nb) = y2(1:nb) + a(i+2)*x(ka(i+2),1:nb) + y3(1:nb) = y3(1:nb) + a(i+3)*x(ka(i+3),1:nb) + y4(1:nb) = y4(1:nb) + a(i+4)*x(ka(i+4),1:nb) + y5(1:nb) = y5(1:nb) + a(i+5)*x(ka(i+5),1:nb) + y6(1:nb) = y6(1:nb) + a(i+6)*x(ka(i+6),1:nb) + y7(1:nb) = y7(1:nb) + a(i+7)*x(ka(i+7),1:nb) + y8(1:nb) = y8(1:nb) + a(i+8)*x(ka(i+8),1:nb) + y9(1:nb) = y9(1:nb) + a(i+9)*x(ka(i+9),1:nb) + y10(1:nb) = y10(1:nb) + a(i+10)*x(ka(i+10),1:nb) + y11(1:nb) = y11(1:nb) + a(i+11)*x(ka(i+11),1:nb) + y12(1:nb) = y12(1:nb) + a(i+12)*x(ka(i+12),1:nb) + y13(1:nb) = y13(1:nb) + a(i+13)*x(ka(i+13),1:nb) + y14(1:nb) = y14(1:nb) + a(i+14)*x(ka(i+14),1:nb) + y15(1:nb) = y15(1:nb) + a(i+15)*x(ka(i+15),1:nb) + enddo + y(ipx+0,1:nb) = y(ipx+0,1:nb) + alpha*y0(1:nb) + y(ipx+1,1:nb) = y(ipx+1,1:nb) + alpha*y1(1:nb) + y(ipx+2,1:nb) = y(ipx+2,1:nb) + alpha*y2(1:nb) + y(ipx+3,1:nb) = y(ipx+3,1:nb) + alpha*y3(1:nb) + y(ipx+4,1:nb) = y(ipx+4,1:nb) + alpha*y4(1:nb) + y(ipx+5,1:nb) = y(ipx+5,1:nb) + alpha*y5(1:nb) + y(ipx+6,1:nb) = y(ipx+6,1:nb) + alpha*y6(1:nb) + y(ipx+7,1:nb) = y(ipx+7,1:nb) + alpha*y7(1:nb) + y(ipx+8,1:nb) = y(ipx+8,1:nb) + alpha*y8(1:nb) + y(ipx+9,1:nb) = y(ipx+9,1:nb) + alpha*y9(1:nb) + y(ipx+10,1:nb) = y(ipx+10,1:nb) + alpha*y10(1:nb) + y(ipx+11,1:nb) = y(ipx+11,1:nb) + alpha*y11(1:nb) + y(ipx+12,1:nb) = y(ipx+12,1:nb) + alpha*y12(1:nb) + y(ipx+13,1:nb) = y(ipx+13,1:nb) + alpha*y13(1:nb) + y(ipx+14,1:nb) = y(ipx+14,1:nb) + alpha*y14(1:nb) + y(ipx+15,1:nb) = y(ipx+15,1:nb) + alpha*y15(1:nb) + + else + + do k = ia(2,ipg), ia(3,ipg)-1 + ipx = ia(1,ipg) + do i = ja(k), ja(k+1) - 1 + y(ipx,1:nb) = y(ipx,1:nb) + alpha*a(i)*x(ka(i),1:nb) + ipx = ipx + 1 + enddo + enddo + end if + +c csr product + + ipx = ia(1,ipg) + do 70 k = ia(3,ipg), ia(2,ipg+1)-1 + do 60 i = ja(k), ja(k+1) - 1 + y(ipx,1:nb) = y(ipx,1:nb) + alpha*a(i)*x(ka(i),1:nb) + 60 continue + ipx = ipx + 1 + 70 continue + 200 continue +c + return + end + diff --git a/src/serial/jad/djadmv3.f b/src/serial/jad/djadmv3.f new file mode 100644 index 00000000..91e594f5 --- /dev/null +++ b/src/serial/jad/djadmv3.f @@ -0,0 +1,328 @@ +*********************************************************************** +* PROCEDURAL LOGIC SECTION * +* SUBROUTINE DJADMV (DIAG,NROW,NCOL,ALPHA,NG,A,KA,JA,IA,X,BETA,Y) * +* DOUBLE PRECISION ZERO * +* PARAMETER (ZERO=0.0D0) * +* DOUBLE PRECISION ACC * +* INTEGER I, J, K, IPX, IPG * +* LOGICAL UNI * +*C .. Executable Statements .. * +*C * +*C * +* IF (DIAG.EQ.'U') THEN * +* DO 10 I = 1, M * +* Y(I) = BETA*Y(I) + ALPHA*X(I) * +* 10 CONTINUE * +* ELSE * +* DO 20 I = 1, M * +* Y(I) = BETA*Y(I) * +* 20 CONTINUE * +* END IF * +* * +* IF (ALPHA.EQ.ZERO) THEN * +* RETURN * +* END IF * +*C * +*C * +*C DO 200 IPG = 1, NG * +* DO 50 K = IA(2,IPG), IA(3,IPG)-1 * +* IPX = IA(1,IPG) * +* DO 40 I = JA(K), JA(K+1) - 1 * +* Y(IPX) = Y(IPX) + ALPHA*A(I)*X(KA(I)) * +* IPX = IPX + 1 * +* 40 CONTINUE * +* 50 CONTINUE * +*C * +*C * +* IPX = IA(1,IPG) * +* DO 70 K = IA(3,IPG), IA(2,IPG+1)-1 * +* DO 60 I = JA(K), JA(K+1) - 1 * +* Y(IPX) = Y(IPX) + ALPHA*A(I)*X(KA(I)) * +* 60 CONTINUE * +* IPX = IPX + 1 * +* 70 CONTINUE * +* 200 CONTINUE * +*C * +* RETURN * +*C * +*C * +* END * +* * +* * +*********************************************************************** + SUBROUTINE DJADMV3(DIAG,NROW,NCOL,ALPHA,NG,A,KA,JA,IA, + + X,LDX,BETA,Y,LDY,IERROR) + IMPLICIT NONE + INTEGER IA(3,*),KA(*),JA(*),NCOL,NROW,NG,LDX,LDY,IERROR + DOUBLE PRECISION A(*),X(LDX,*),Y(LDY,*),ALPHA,BETA + CHARACTER DIAG + DOUBLE PRECISION ZERO + PARAMETER (ZERO=0.0D0) + INTEGER I, K, IPX, IPG, I0, IN + INTEGER NPG + integer nb + parameter (nb=3) + DOUBLE PRECISION Y0(NB), Y1(NB), Y2(NB), Y3(NB), Y4(NB), + + Y5(NB), Y6(NB), Y7(NB), Y8(NB), Y9(NB), Y10(NB), Y11(NB), + + Y12(NB), Y13(NB), Y14(NB), Y15(NB) +c .. Executable Statements .. +c +c +c$$$ write(0,*) 'djadmv2:',diag,alpha,beta,nb + IERROR=0 + IF (DIAG.EQ.'U') THEN + IF (BETA.EQ.ZERO) THEN + DO I = 1, NROW + Y(I,1:NB) = ALPHA*X(I,1:NB) + ENDDO + ELSE + DO 10 I = 1, NROW + Y(I,1:NB) = BETA*Y(I,1:NB) + ALPHA*X(I,1:NB) + 10 CONTINUE + ENDIF + ELSE + IF (BETA.EQ.ZERO) THEN + DO I = 1, NROW + Y(I,1:NB) = 0.D0 + ENDDO + ELSE + DO 20 I = 1, NROW + Y(I,1:NB) = BETA*Y(I,1:NB) + 20 CONTINUE + END IF + ENDIF + + IF (ALPHA.EQ.ZERO) THEN + RETURN + END IF +c +c$$$ write(0,*) 'djadmv2:',diag,alpha,beta + + do 200 ipg = 1, ng + k = ia(2,ipg) + npg = ja(k+1)-ja(k) + +c$$$ write(0,*) 'djadmv2:',npg + + if (npg.eq.4) then + ipx = ia(1,ipg) + y0(1:nb) = zero + y1(1:nb) = zero + y2(1:nb) = zero + y3(1:nb) = zero + k = ia(2,ipg) + i0 = ja(k) + k = ia(3,ipg)-1 + in = ja(k) + do i = i0, in+3, 4 + y0(1:nb) = y0(1:nb) + a(i+0)*x(ka(i+0),1:nb) + y1(1:nb) = y1(1:nb) + a(i+1)*x(ka(i+1),1:nb) + y2(1:nb) = y2(1:nb) + a(i+2)*x(ka(i+2),1:nb) + y3(1:nb) = y3(1:nb) + a(i+3)*x(ka(i+3),1:nb) + enddo + y(ipx+0,1:nb) = y(ipx+0,1:nb) + alpha*y0(1:nb) + y(ipx+1,1:nb) = y(ipx+1,1:nb) + alpha*y1(1:nb) + y(ipx+2,1:nb) = y(ipx+2,1:nb) + alpha*y2(1:nb) + y(ipx+3,1:nb) = y(ipx+3,1:nb) + alpha*y3(1:nb) + + else if (npg.eq.5) then + + ipx = ia(1,ipg) + y0(1:nb) = zero + y1(1:nb) = zero + y2(1:nb) = zero + y3(1:nb) = zero + y4(1:nb) = zero + k = ia(2,ipg) + i0 = ja(k) + k = ia(3,ipg)-1 + in = ja(k) + do i = i0, in+4, 5 + y0(1:nb) = y0(1:nb) + a(i+0)*x(ka(i+0),1:nb) + y1(1:nb) = y1(1:nb) + a(i+1)*x(ka(i+1),1:nb) + y2(1:nb) = y2(1:nb) + a(i+2)*x(ka(i+2),1:nb) + y3(1:nb) = y3(1:nb) + a(i+3)*x(ka(i+3),1:nb) + y4(1:nb) = y4(1:nb) + a(i+4)*x(ka(i+4),1:nb) + enddo + y(ipx+0,1:nb) = y(ipx+0,1:nb) + alpha*y0(1:nb) + y(ipx+1,1:nb) = y(ipx+1,1:nb) + alpha*y1(1:nb) + y(ipx+2,1:nb) = y(ipx+2,1:nb) + alpha*y2(1:nb) + y(ipx+3,1:nb) = y(ipx+3,1:nb) + alpha*y3(1:nb) + y(ipx+4,1:nb) = y(ipx+4,1:nb) + alpha*y4(1:nb) + + else if (npg.eq.6) then + + ipx = ia(1,ipg) + y0(1:nb) = zero + y1(1:nb) = zero + y2(1:nb) = zero + y3(1:nb) = zero + y4(1:nb) = zero + y5(1:nb) = zero + k = ia(2,ipg) + i0 = ja(k) + k = ia(3,ipg)-1 + in = ja(k) + do i = i0, in+5, 6 + y0(1:nb) = y0(1:nb) + a(i+0)*x(ka(i+0),1:nb) + y1(1:nb) = y1(1:nb) + a(i+1)*x(ka(i+1),1:nb) + y2(1:nb) = y2(1:nb) + a(i+2)*x(ka(i+2),1:nb) + y3(1:nb) = y3(1:nb) + a(i+3)*x(ka(i+3),1:nb) + y4(1:nb) = y4(1:nb) + a(i+4)*x(ka(i+4),1:nb) + y5(1:nb) = y5(1:nb) + a(i+5)*x(ka(i+5),1:nb) + enddo + y(ipx+0,1:nb) = y(ipx+0,1:nb) + alpha*y0(1:nb) + y(ipx+1,1:nb) = y(ipx+1,1:nb) + alpha*y1(1:nb) + y(ipx+2,1:nb) = y(ipx+2,1:nb) + alpha*y2(1:nb) + y(ipx+3,1:nb) = y(ipx+3,1:nb) + alpha*y3(1:nb) + y(ipx+4,1:nb) = y(ipx+4,1:nb) + alpha*y4(1:nb) + y(ipx+5,1:nb) = y(ipx+5,1:nb) + alpha*y5(1:nb) + + else if (npg.eq.7) then + + ipx = ia(1,ipg) + y0(1:nb) = zero + y1(1:nb) = zero + y2(1:nb) = zero + y3(1:nb) = zero + y4(1:nb) = zero + y5(1:nb) = zero + y6(1:nb) = zero + k = ia(2,ipg) + i0 = ja(k) + k = ia(3,ipg)-1 + in = ja(k) + do i = i0, in+6, 7 + y0(1:nb) = y0(1:nb) + a(i+0)*x(ka(i+0),1:nb) + y1(1:nb) = y1(1:nb) + a(i+1)*x(ka(i+1),1:nb) + y2(1:nb) = y2(1:nb) + a(i+2)*x(ka(i+2),1:nb) + y3(1:nb) = y3(1:nb) + a(i+3)*x(ka(i+3),1:nb) + y4(1:nb) = y4(1:nb) + a(i+4)*x(ka(i+4),1:nb) + y5(1:nb) = y5(1:nb) + a(i+5)*x(ka(i+5),1:nb) + y6(1:nb) = y6(1:nb) + a(i+6)*x(ka(i+6),1:nb) + enddo + y(ipx+0,1:nb) = y(ipx+0,1:nb) + alpha*y0(1:nb) + y(ipx+1,1:nb) = y(ipx+1,1:nb) + alpha*y1(1:nb) + y(ipx+2,1:nb) = y(ipx+2,1:nb) + alpha*y2(1:nb) + y(ipx+3,1:nb) = y(ipx+3,1:nb) + alpha*y3(1:nb) + y(ipx+4,1:nb) = y(ipx+4,1:nb) + alpha*y4(1:nb) + y(ipx+5,1:nb) = y(ipx+5,1:nb) + alpha*y5(1:nb) + y(ipx+6,1:nb) = y(ipx+6,1:nb) + alpha*y6(1:nb) + + else if (npg.eq.8) then + + ipx = ia(1,ipg) + y0(1:nb) = zero + y1(1:nb) = zero + y2(1:nb) = zero + y3(1:nb) = zero + y4(1:nb) = zero + y5(1:nb) = zero + y6(1:nb) = zero + y7(1:nb) = zero + k = ia(2,ipg) + i0 = ja(k) + k = ia(3,ipg)-1 + in = ja(k) + do i = i0, in+7, 8 + y0(1:nb) = y0(1:nb) + a(i+0)*x(ka(i+0),1:nb) + y1(1:nb) = y1(1:nb) + a(i+1)*x(ka(i+1),1:nb) + y2(1:nb) = y2(1:nb) + a(i+2)*x(ka(i+2),1:nb) + y3(1:nb) = y3(1:nb) + a(i+3)*x(ka(i+3),1:nb) + y4(1:nb) = y4(1:nb) + a(i+4)*x(ka(i+4),1:nb) + y5(1:nb) = y5(1:nb) + a(i+5)*x(ka(i+5),1:nb) + y6(1:nb) = y6(1:nb) + a(i+6)*x(ka(i+6),1:nb) + y7(1:nb) = y7(1:nb) + a(i+7)*x(ka(i+7),1:nb) + enddo + y(ipx+0,1:nb) = y(ipx+0,1:nb) + alpha*y0(1:nb) + y(ipx+1,1:nb) = y(ipx+1,1:nb) + alpha*y1(1:nb) + y(ipx+2,1:nb) = y(ipx+2,1:nb) + alpha*y2(1:nb) + y(ipx+3,1:nb) = y(ipx+3,1:nb) + alpha*y3(1:nb) + y(ipx+4,1:nb) = y(ipx+4,1:nb) + alpha*y4(1:nb) + y(ipx+5,1:nb) = y(ipx+5,1:nb) + alpha*y5(1:nb) + y(ipx+6,1:nb) = y(ipx+6,1:nb) + alpha*y6(1:nb) + y(ipx+7,1:nb) = y(ipx+7,1:nb) + alpha*y7(1:nb) + + else if (npg.eq.16) then + + ipx = ia(1,ipg) + y0(1:nb) = zero + y1(1:nb) = zero + y2(1:nb) = zero + y3(1:nb) = zero + y4(1:nb) = zero + y5(1:nb) = zero + y6(1:nb) = zero + y7(1:nb) = zero + y8(1:nb) = zero + y9(1:nb) = zero + y10(1:nb) = zero + y11(1:nb) = zero + y12(1:nb) = zero + y13(1:nb) = zero + y14(1:nb) = zero + y15(1:nb) = zero + k = ia(2,ipg) + i0 = ja(k) + k = ia(3,ipg)-1 + in = ja(k) + do i = i0, in+15, 16 + y0(1:nb) = y0(1:nb) + a(i+0)*x(ka(i+0),1:nb) + y1(1:nb) = y1(1:nb) + a(i+1)*x(ka(i+1),1:nb) + y2(1:nb) = y2(1:nb) + a(i+2)*x(ka(i+2),1:nb) + y3(1:nb) = y3(1:nb) + a(i+3)*x(ka(i+3),1:nb) + y4(1:nb) = y4(1:nb) + a(i+4)*x(ka(i+4),1:nb) + y5(1:nb) = y5(1:nb) + a(i+5)*x(ka(i+5),1:nb) + y6(1:nb) = y6(1:nb) + a(i+6)*x(ka(i+6),1:nb) + y7(1:nb) = y7(1:nb) + a(i+7)*x(ka(i+7),1:nb) + y8(1:nb) = y8(1:nb) + a(i+8)*x(ka(i+8),1:nb) + y9(1:nb) = y9(1:nb) + a(i+9)*x(ka(i+9),1:nb) + y10(1:nb) = y10(1:nb) + a(i+10)*x(ka(i+10),1:nb) + y11(1:nb) = y11(1:nb) + a(i+11)*x(ka(i+11),1:nb) + y12(1:nb) = y12(1:nb) + a(i+12)*x(ka(i+12),1:nb) + y13(1:nb) = y13(1:nb) + a(i+13)*x(ka(i+13),1:nb) + y14(1:nb) = y14(1:nb) + a(i+14)*x(ka(i+14),1:nb) + y15(1:nb) = y15(1:nb) + a(i+15)*x(ka(i+15),1:nb) + enddo + y(ipx+0,1:nb) = y(ipx+0,1:nb) + alpha*y0(1:nb) + y(ipx+1,1:nb) = y(ipx+1,1:nb) + alpha*y1(1:nb) + y(ipx+2,1:nb) = y(ipx+2,1:nb) + alpha*y2(1:nb) + y(ipx+3,1:nb) = y(ipx+3,1:nb) + alpha*y3(1:nb) + y(ipx+4,1:nb) = y(ipx+4,1:nb) + alpha*y4(1:nb) + y(ipx+5,1:nb) = y(ipx+5,1:nb) + alpha*y5(1:nb) + y(ipx+6,1:nb) = y(ipx+6,1:nb) + alpha*y6(1:nb) + y(ipx+7,1:nb) = y(ipx+7,1:nb) + alpha*y7(1:nb) + y(ipx+8,1:nb) = y(ipx+8,1:nb) + alpha*y8(1:nb) + y(ipx+9,1:nb) = y(ipx+9,1:nb) + alpha*y9(1:nb) + y(ipx+10,1:nb) = y(ipx+10,1:nb) + alpha*y10(1:nb) + y(ipx+11,1:nb) = y(ipx+11,1:nb) + alpha*y11(1:nb) + y(ipx+12,1:nb) = y(ipx+12,1:nb) + alpha*y12(1:nb) + y(ipx+13,1:nb) = y(ipx+13,1:nb) + alpha*y13(1:nb) + y(ipx+14,1:nb) = y(ipx+14,1:nb) + alpha*y14(1:nb) + y(ipx+15,1:nb) = y(ipx+15,1:nb) + alpha*y15(1:nb) + + else + + do k = ia(2,ipg), ia(3,ipg)-1 + ipx = ia(1,ipg) + do i = ja(k), ja(k+1) - 1 + y(ipx,1:nb) = y(ipx,1:nb) + alpha*a(i)*x(ka(i),1:nb) + ipx = ipx + 1 + enddo + enddo + end if + +c csr product + + ipx = ia(1,ipg) + do 70 k = ia(3,ipg), ia(2,ipg+1)-1 + do 60 i = ja(k), ja(k+1) - 1 + y(ipx,1:nb) = y(ipx,1:nb) + alpha*a(i)*x(ka(i),1:nb) + 60 continue + ipx = ipx + 1 + 70 continue + 200 continue +c + return + end + diff --git a/src/serial/jad/djadmv4.f b/src/serial/jad/djadmv4.f new file mode 100644 index 00000000..4f078e67 --- /dev/null +++ b/src/serial/jad/djadmv4.f @@ -0,0 +1,328 @@ +*********************************************************************** +* PROCEDURAL LOGIC SECTION * +* SUBROUTINE DJADMV (DIAG,NROW,NCOL,ALPHA,NG,A,KA,JA,IA,X,BETA,Y) * +* DOUBLE PRECISION ZERO * +* PARAMETER (ZERO=0.0D0) * +* DOUBLE PRECISION ACC * +* INTEGER I, J, K, IPX, IPG * +* LOGICAL UNI * +*C .. Executable Statements .. * +*C * +*C * +* IF (DIAG.EQ.'U') THEN * +* DO 10 I = 1, M * +* Y(I) = BETA*Y(I) + ALPHA*X(I) * +* 10 CONTINUE * +* ELSE * +* DO 20 I = 1, M * +* Y(I) = BETA*Y(I) * +* 20 CONTINUE * +* END IF * +* * +* IF (ALPHA.EQ.ZERO) THEN * +* RETURN * +* END IF * +*C * +*C * +*C DO 200 IPG = 1, NG * +* DO 50 K = IA(2,IPG), IA(3,IPG)-1 * +* IPX = IA(1,IPG) * +* DO 40 I = JA(K), JA(K+1) - 1 * +* Y(IPX) = Y(IPX) + ALPHA*A(I)*X(KA(I)) * +* IPX = IPX + 1 * +* 40 CONTINUE * +* 50 CONTINUE * +*C * +*C * +* IPX = IA(1,IPG) * +* DO 70 K = IA(3,IPG), IA(2,IPG+1)-1 * +* DO 60 I = JA(K), JA(K+1) - 1 * +* Y(IPX) = Y(IPX) + ALPHA*A(I)*X(KA(I)) * +* 60 CONTINUE * +* IPX = IPX + 1 * +* 70 CONTINUE * +* 200 CONTINUE * +*C * +* RETURN * +*C * +*C * +* END * +* * +* * +*********************************************************************** + SUBROUTINE DJADMV4(DIAG,NROW,NCOL,ALPHA,NG,A,KA,JA,IA, + + X,LDX,BETA,Y,LDY,IERROR) + IMPLICIT NONE + INTEGER IA(3,*),KA(*),JA(*),NCOL,NROW,NG,LDX,LDY,IERROR + DOUBLE PRECISION A(*),X(LDX,*),Y(LDY,*),ALPHA,BETA + CHARACTER DIAG + DOUBLE PRECISION ZERO + PARAMETER (ZERO=0.0D0) + INTEGER I, K, IPX, IPG, I0, IN + INTEGER NPG + integer nb + parameter (nb=4) + DOUBLE PRECISION Y0(NB), Y1(NB), Y2(NB), Y3(NB), Y4(NB), + + Y5(NB), Y6(NB), Y7(NB), Y8(NB), Y9(NB), Y10(NB), Y11(NB), + + Y12(NB), Y13(NB), Y14(NB), Y15(NB) +c .. Executable Statements .. +c +c +c$$$ write(0,*) 'djadmv2:',diag,alpha,beta,nb + IERROR=0 + IF (DIAG.EQ.'U') THEN + IF (BETA.EQ.ZERO) THEN + DO I = 1, NROW + Y(I,1:NB) = ALPHA*X(I,1:NB) + ENDDO + ELSE + DO 10 I = 1, NROW + Y(I,1:NB) = BETA*Y(I,1:NB) + ALPHA*X(I,1:NB) + 10 CONTINUE + ENDIF + ELSE + IF (BETA.EQ.ZERO) THEN + DO I = 1, NROW + Y(I,1:NB) = 0.D0 + ENDDO + ELSE + DO 20 I = 1, NROW + Y(I,1:NB) = BETA*Y(I,1:NB) + 20 CONTINUE + END IF + ENDIF + + IF (ALPHA.EQ.ZERO) THEN + RETURN + END IF +c +c$$$ write(0,*) 'djadmv2:',diag,alpha,beta + + do 200 ipg = 1, ng + k = ia(2,ipg) + npg = ja(k+1)-ja(k) + +c$$$ write(0,*) 'djadmv2:',npg + + if (npg.eq.4) then + ipx = ia(1,ipg) + y0(1:nb) = zero + y1(1:nb) = zero + y2(1:nb) = zero + y3(1:nb) = zero + k = ia(2,ipg) + i0 = ja(k) + k = ia(3,ipg)-1 + in = ja(k) + do i = i0, in+3, 4 + y0(1:nb) = y0(1:nb) + a(i+0)*x(ka(i+0),1:nb) + y1(1:nb) = y1(1:nb) + a(i+1)*x(ka(i+1),1:nb) + y2(1:nb) = y2(1:nb) + a(i+2)*x(ka(i+2),1:nb) + y3(1:nb) = y3(1:nb) + a(i+3)*x(ka(i+3),1:nb) + enddo + y(ipx+0,1:nb) = y(ipx+0,1:nb) + alpha*y0(1:nb) + y(ipx+1,1:nb) = y(ipx+1,1:nb) + alpha*y1(1:nb) + y(ipx+2,1:nb) = y(ipx+2,1:nb) + alpha*y2(1:nb) + y(ipx+3,1:nb) = y(ipx+3,1:nb) + alpha*y3(1:nb) + + else if (npg.eq.5) then + + ipx = ia(1,ipg) + y0(1:nb) = zero + y1(1:nb) = zero + y2(1:nb) = zero + y3(1:nb) = zero + y4(1:nb) = zero + k = ia(2,ipg) + i0 = ja(k) + k = ia(3,ipg)-1 + in = ja(k) + do i = i0, in+4, 5 + y0(1:nb) = y0(1:nb) + a(i+0)*x(ka(i+0),1:nb) + y1(1:nb) = y1(1:nb) + a(i+1)*x(ka(i+1),1:nb) + y2(1:nb) = y2(1:nb) + a(i+2)*x(ka(i+2),1:nb) + y3(1:nb) = y3(1:nb) + a(i+3)*x(ka(i+3),1:nb) + y4(1:nb) = y4(1:nb) + a(i+4)*x(ka(i+4),1:nb) + enddo + y(ipx+0,1:nb) = y(ipx+0,1:nb) + alpha*y0(1:nb) + y(ipx+1,1:nb) = y(ipx+1,1:nb) + alpha*y1(1:nb) + y(ipx+2,1:nb) = y(ipx+2,1:nb) + alpha*y2(1:nb) + y(ipx+3,1:nb) = y(ipx+3,1:nb) + alpha*y3(1:nb) + y(ipx+4,1:nb) = y(ipx+4,1:nb) + alpha*y4(1:nb) + + else if (npg.eq.6) then + + ipx = ia(1,ipg) + y0(1:nb) = zero + y1(1:nb) = zero + y2(1:nb) = zero + y3(1:nb) = zero + y4(1:nb) = zero + y5(1:nb) = zero + k = ia(2,ipg) + i0 = ja(k) + k = ia(3,ipg)-1 + in = ja(k) + do i = i0, in+5, 6 + y0(1:nb) = y0(1:nb) + a(i+0)*x(ka(i+0),1:nb) + y1(1:nb) = y1(1:nb) + a(i+1)*x(ka(i+1),1:nb) + y2(1:nb) = y2(1:nb) + a(i+2)*x(ka(i+2),1:nb) + y3(1:nb) = y3(1:nb) + a(i+3)*x(ka(i+3),1:nb) + y4(1:nb) = y4(1:nb) + a(i+4)*x(ka(i+4),1:nb) + y5(1:nb) = y5(1:nb) + a(i+5)*x(ka(i+5),1:nb) + enddo + y(ipx+0,1:nb) = y(ipx+0,1:nb) + alpha*y0(1:nb) + y(ipx+1,1:nb) = y(ipx+1,1:nb) + alpha*y1(1:nb) + y(ipx+2,1:nb) = y(ipx+2,1:nb) + alpha*y2(1:nb) + y(ipx+3,1:nb) = y(ipx+3,1:nb) + alpha*y3(1:nb) + y(ipx+4,1:nb) = y(ipx+4,1:nb) + alpha*y4(1:nb) + y(ipx+5,1:nb) = y(ipx+5,1:nb) + alpha*y5(1:nb) + + else if (npg.eq.7) then + + ipx = ia(1,ipg) + y0(1:nb) = zero + y1(1:nb) = zero + y2(1:nb) = zero + y3(1:nb) = zero + y4(1:nb) = zero + y5(1:nb) = zero + y6(1:nb) = zero + k = ia(2,ipg) + i0 = ja(k) + k = ia(3,ipg)-1 + in = ja(k) + do i = i0, in+6, 7 + y0(1:nb) = y0(1:nb) + a(i+0)*x(ka(i+0),1:nb) + y1(1:nb) = y1(1:nb) + a(i+1)*x(ka(i+1),1:nb) + y2(1:nb) = y2(1:nb) + a(i+2)*x(ka(i+2),1:nb) + y3(1:nb) = y3(1:nb) + a(i+3)*x(ka(i+3),1:nb) + y4(1:nb) = y4(1:nb) + a(i+4)*x(ka(i+4),1:nb) + y5(1:nb) = y5(1:nb) + a(i+5)*x(ka(i+5),1:nb) + y6(1:nb) = y6(1:nb) + a(i+6)*x(ka(i+6),1:nb) + enddo + y(ipx+0,1:nb) = y(ipx+0,1:nb) + alpha*y0(1:nb) + y(ipx+1,1:nb) = y(ipx+1,1:nb) + alpha*y1(1:nb) + y(ipx+2,1:nb) = y(ipx+2,1:nb) + alpha*y2(1:nb) + y(ipx+3,1:nb) = y(ipx+3,1:nb) + alpha*y3(1:nb) + y(ipx+4,1:nb) = y(ipx+4,1:nb) + alpha*y4(1:nb) + y(ipx+5,1:nb) = y(ipx+5,1:nb) + alpha*y5(1:nb) + y(ipx+6,1:nb) = y(ipx+6,1:nb) + alpha*y6(1:nb) + + else if (npg.eq.8) then + + ipx = ia(1,ipg) + y0(1:nb) = zero + y1(1:nb) = zero + y2(1:nb) = zero + y3(1:nb) = zero + y4(1:nb) = zero + y5(1:nb) = zero + y6(1:nb) = zero + y7(1:nb) = zero + k = ia(2,ipg) + i0 = ja(k) + k = ia(3,ipg)-1 + in = ja(k) + do i = i0, in+7, 8 + y0(1:nb) = y0(1:nb) + a(i+0)*x(ka(i+0),1:nb) + y1(1:nb) = y1(1:nb) + a(i+1)*x(ka(i+1),1:nb) + y2(1:nb) = y2(1:nb) + a(i+2)*x(ka(i+2),1:nb) + y3(1:nb) = y3(1:nb) + a(i+3)*x(ka(i+3),1:nb) + y4(1:nb) = y4(1:nb) + a(i+4)*x(ka(i+4),1:nb) + y5(1:nb) = y5(1:nb) + a(i+5)*x(ka(i+5),1:nb) + y6(1:nb) = y6(1:nb) + a(i+6)*x(ka(i+6),1:nb) + y7(1:nb) = y7(1:nb) + a(i+7)*x(ka(i+7),1:nb) + enddo + y(ipx+0,1:nb) = y(ipx+0,1:nb) + alpha*y0(1:nb) + y(ipx+1,1:nb) = y(ipx+1,1:nb) + alpha*y1(1:nb) + y(ipx+2,1:nb) = y(ipx+2,1:nb) + alpha*y2(1:nb) + y(ipx+3,1:nb) = y(ipx+3,1:nb) + alpha*y3(1:nb) + y(ipx+4,1:nb) = y(ipx+4,1:nb) + alpha*y4(1:nb) + y(ipx+5,1:nb) = y(ipx+5,1:nb) + alpha*y5(1:nb) + y(ipx+6,1:nb) = y(ipx+6,1:nb) + alpha*y6(1:nb) + y(ipx+7,1:nb) = y(ipx+7,1:nb) + alpha*y7(1:nb) + + else if (npg.eq.16) then + + ipx = ia(1,ipg) + y0(1:nb) = zero + y1(1:nb) = zero + y2(1:nb) = zero + y3(1:nb) = zero + y4(1:nb) = zero + y5(1:nb) = zero + y6(1:nb) = zero + y7(1:nb) = zero + y8(1:nb) = zero + y9(1:nb) = zero + y10(1:nb) = zero + y11(1:nb) = zero + y12(1:nb) = zero + y13(1:nb) = zero + y14(1:nb) = zero + y15(1:nb) = zero + k = ia(2,ipg) + i0 = ja(k) + k = ia(3,ipg)-1 + in = ja(k) + do i = i0, in+15, 16 + y0(1:nb) = y0(1:nb) + a(i+0)*x(ka(i+0),1:nb) + y1(1:nb) = y1(1:nb) + a(i+1)*x(ka(i+1),1:nb) + y2(1:nb) = y2(1:nb) + a(i+2)*x(ka(i+2),1:nb) + y3(1:nb) = y3(1:nb) + a(i+3)*x(ka(i+3),1:nb) + y4(1:nb) = y4(1:nb) + a(i+4)*x(ka(i+4),1:nb) + y5(1:nb) = y5(1:nb) + a(i+5)*x(ka(i+5),1:nb) + y6(1:nb) = y6(1:nb) + a(i+6)*x(ka(i+6),1:nb) + y7(1:nb) = y7(1:nb) + a(i+7)*x(ka(i+7),1:nb) + y8(1:nb) = y8(1:nb) + a(i+8)*x(ka(i+8),1:nb) + y9(1:nb) = y9(1:nb) + a(i+9)*x(ka(i+9),1:nb) + y10(1:nb) = y10(1:nb) + a(i+10)*x(ka(i+10),1:nb) + y11(1:nb) = y11(1:nb) + a(i+11)*x(ka(i+11),1:nb) + y12(1:nb) = y12(1:nb) + a(i+12)*x(ka(i+12),1:nb) + y13(1:nb) = y13(1:nb) + a(i+13)*x(ka(i+13),1:nb) + y14(1:nb) = y14(1:nb) + a(i+14)*x(ka(i+14),1:nb) + y15(1:nb) = y15(1:nb) + a(i+15)*x(ka(i+15),1:nb) + enddo + y(ipx+0,1:nb) = y(ipx+0,1:nb) + alpha*y0(1:nb) + y(ipx+1,1:nb) = y(ipx+1,1:nb) + alpha*y1(1:nb) + y(ipx+2,1:nb) = y(ipx+2,1:nb) + alpha*y2(1:nb) + y(ipx+3,1:nb) = y(ipx+3,1:nb) + alpha*y3(1:nb) + y(ipx+4,1:nb) = y(ipx+4,1:nb) + alpha*y4(1:nb) + y(ipx+5,1:nb) = y(ipx+5,1:nb) + alpha*y5(1:nb) + y(ipx+6,1:nb) = y(ipx+6,1:nb) + alpha*y6(1:nb) + y(ipx+7,1:nb) = y(ipx+7,1:nb) + alpha*y7(1:nb) + y(ipx+8,1:nb) = y(ipx+8,1:nb) + alpha*y8(1:nb) + y(ipx+9,1:nb) = y(ipx+9,1:nb) + alpha*y9(1:nb) + y(ipx+10,1:nb) = y(ipx+10,1:nb) + alpha*y10(1:nb) + y(ipx+11,1:nb) = y(ipx+11,1:nb) + alpha*y11(1:nb) + y(ipx+12,1:nb) = y(ipx+12,1:nb) + alpha*y12(1:nb) + y(ipx+13,1:nb) = y(ipx+13,1:nb) + alpha*y13(1:nb) + y(ipx+14,1:nb) = y(ipx+14,1:nb) + alpha*y14(1:nb) + y(ipx+15,1:nb) = y(ipx+15,1:nb) + alpha*y15(1:nb) + + else + + do k = ia(2,ipg), ia(3,ipg)-1 + ipx = ia(1,ipg) + do i = ja(k), ja(k+1) - 1 + y(ipx,1:nb) = y(ipx,1:nb) + alpha*a(i)*x(ka(i),1:nb) + ipx = ipx + 1 + enddo + enddo + end if + +c csr product + + ipx = ia(1,ipg) + do 70 k = ia(3,ipg), ia(2,ipg+1)-1 + do 60 i = ja(k), ja(k+1) - 1 + y(ipx,1:nb) = y(ipx,1:nb) + alpha*a(i)*x(ka(i),1:nb) + 60 continue + ipx = ipx + 1 + 70 continue + 200 continue +c + return + end + diff --git a/src/serial/jad/djadnr.f b/src/serial/jad/djadnr.f new file mode 100644 index 00000000..3fb5238c --- /dev/null +++ b/src/serial/jad/djadnr.f @@ -0,0 +1,57 @@ +C ... Compute infinity norma for sparse matrix in CSR Format ... + DOUBLE PRECISION FUNCTION DJADNR(TRANS,M,N,NG,A,KA,JA,IA, + + INFOA,IERROR) + IMPLICIT NONE + INCLUDE 'sparker.fh' +C .. Scalar Arguments .. + INTEGER M,N, IERROR, NG + CHARACTER TRANS +C .. Array Arguments .. + INTEGER KA(*),JA(*),IA(3,*),INFOA(*) + DOUBLE PRECISION A(*) +C ... Local Array .. + DOUBLE PRECISION NRMI_BLOCK(MAXJDROWS) +C ... Local Scalars .. + DOUBLE PRECISION NRMI + INTEGER I, K, IPG, NPG, IPX + + IERROR=0 + NRMI = 0.0 + + DO IPG = 1, NG + K = IA(2,IPG) + NPG = JA(K+1)- JA(K) + +C ... Initialize NRMI_BLOCK ... + DO I = 1, NPG + NRMI_BLOCK(I) = 0.0 + ENDDO + + DO K = IA(2,IPG), IA(3,IPG)-1 + IPX = 1 + DO I = JA(K), JA(K+1) - 1 + NRMI_BLOCK(IPX) = NRMI_BLOCK(IPX) + ABS(A(I)) + IPX = IPX + 1 + ENDDO + ENDDO + +C ... CSR Representation ... + + IPX = 1 + DO K = IA(3,IPG), IA(2,IPG+1)-1 + DO I = JA(K), JA(K+1) - 1 + NRMI_BLOCK(IPX) = NRMI_BLOCK(IPX) + ABS(A(I)) + ENDDO + IPX = IPX + 1 + ENDDO + +C ... Compute Max in Block ... + DO I = 1, NPG + IF (NRMI_BLOCK(I).GT.NRMI) THEN + NRMI = NRMI_BLOCK(I) + ENDIF + ENDDO + ENDDO + + DJADNR = NRMI + END diff --git a/src/serial/jad/djadprt.f b/src/serial/jad/djadprt.f new file mode 100644 index 00000000..36629b5b --- /dev/null +++ b/src/serial/jad/djadprt.f @@ -0,0 +1,63 @@ +c +c What if a wrong DESCRA is passed? +c WARNING: THIS CANNOT POSSIBLY WORK CORRECTLY BECAUSE +c IT DOES NOT ACCOUNT FOR ROW PERMUTATION. +* +* + SUBROUTINE DJADPRT(NROW,NCOL,NG,A,KA,JA,IA,TITLE,IOUT) +C +C +C .. Scalar Arguments .. + INTEGER IOUT +C .. Array Arguments .. + DOUBLE PRECISION A(*) + INTEGER IA(3,*), JA(*), KA(*) + CHARACTER DESCRA*11, TITLE*(*) +C .. Local Scalars .. + INTEGER I, K + + +C .. External Subroutines .. +C +C + + nnzero = ja(ia(2,ng+1)-1+1)-1 + + write(iout,fmt=998) + + write(iout,fmt=992) + write(iout,fmt=996) + write(iout,fmt=996) title + write(iout,fmt=995) 'Number of rows: ',nrow + write(iout,fmt=995) 'Number of columns: ',ncol + write(iout,fmt=995) 'Nonzero entries: ',nnzero + write(iout,fmt=996) + write(iout,fmt=992) + write(iout,*) nrow,ncol,nnzero + 998 format('%%MatrixMarket matrix coordinate real general') + 997 format('%%MatrixMarket matrix coordinate real symmetric') + 992 format('%======================================== ') + 996 format('% ',a) + 995 format('% ',a,i9,a,i9,a,i9) + 994 format(i6,1x,i6,1x,e16.8) + + do ipg=1, ng + do k = ia(2,ipg), ia(3,ipg)-1 + ipx = ia(1,ipg) + do i = ja(k), ja(k+1) - 1 + write(iout,994) ipx,ka(i),a(i) + ipx = ipx + 1 + enddo + enddo + + ipx = ia(1,ipg) + do k = ia(3,ipg), ia(2,ipg+1)-1 + do i = ja(k), ja(k+1) - 1 + write(iout,994) ipx,ka(i),a(i) + enddo + ipx = ipx + 1 + enddo + enddo + + return + end diff --git a/src/serial/jad/djadsm.f b/src/serial/jad/djadsm.f new file mode 100644 index 00000000..6f7a05e5 --- /dev/null +++ b/src/serial/jad/djadsm.f @@ -0,0 +1,104 @@ + SUBROUTINE DJADSM(TRANST,M,N,VDIAG,TDIAG,PERMQ,ALPHA,DESCRA, + + AR,JA,IA,PERMP,B,LDB,BETA,C,LDC,WORK) +C +C +C .. Scalar Arguments .. + INTEGER LDB, LDC, M, N + CHARACTER TDIAG, TRANST + DOUBLE PRECISION ALPHA, BETA +C .. Array Arguments .. + DOUBLE PRECISION AR(*), B(LDB,*), C(LDC,*), VDIAG(*), WORK(*) + INTEGER IA(*), JA(*), PERMP(*), PERMQ(*) + CHARACTER DESCRA*11 +C .. Local Scalars .. + INTEGER PIA, PJA, PNG + INTEGER I, K, ERR_ACT + CHARACTER UPLO,UNITD + logical debug + parameter (debug=.false.) + CHARACTER*20 NAME + INTEGER INT_VAL(5) +C .. Executable Statements .. +C + NAME = 'DJADSM\0' + IERROR = 0 + CALL FCPSB_ERRACTIONSAVE(ERR_ACT) + + IF((ALPHA.NE.1.D0) .OR. (BETA.NE.0.D0))then + IERROR=5 + CALL PSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + ENDIF + UPLO = '?' + IF (DESCRA(1:1).EQ.'T' .AND. DESCRA(2:2).EQ.'U') UPLO = 'U' + IF (DESCRA(1:1).EQ.'T' .AND. DESCRA(2:2).EQ.'L') UPLO = 'L' +C + IF (UPLO.EQ.'?') THEN + IERROR=5 + CALL PSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + END IF + + IF (DESCRA(3:3).NE.'U') THEN + IERROR=5 + CALL PSB_ERRPUSH(IERROR,NAME,INT_VAL) + GOTO 9999 + END IF + UNITD=DESCRA(3:3) +C +C B = INV(A)*B OR B=INV(A')*B +C + if (debug) write(0,*) 'DJADSM : ',m,n,' ',tdiag + + IF (TDIAG.EQ.'R') THEN + if (debug) write(0,*) 'DJADSM : Right Scale',m,n + DO I = 1, N + DO K = 1, M + B(K,I) = B(K,I)*VDIAG(K) + ENDDO + ENDDO + END IF + + PNG = IA(1) + PIA = IA(2) + PJA = IA(3) + + DO I = 1, N + CALL DJADSV(UNITD,M,IA(PNG), + + AR,JA,IA(PIA),IA(PJA),B(1,I),C(1,I),IERROR) + ENDDO + IF(IERROR.NE.0) THEN + INT_VAL(1)=IERROR + CALL FCPSB_ERRPUSH(4012,NAME,INT_VAL) + GOTO 9999 + END IF + + + if (debug) then + write(0,*) 'Check from DJADSM' + do k=1,m + write(0,*) k, b(k,1),c(k,1) + enddo + endif + + IF (TDIAG.EQ.'L') THEN + DO I = 1, N + DO K = 1, M + C(K,I) = C(K,I)*VDIAG(K) + ENDDO + ENDDO + END IF +c write(*,*) 'exit djadsm' + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + RETURN + + 9999 CONTINUE + CALL FCPSB_ERRACTIONRESTORE(ERR_ACT) + + IF ( ERR_ACT .NE. 0 ) THEN + CALL FCPSB_SERROR() + RETURN + ENDIF + + RETURN + END diff --git a/src/serial/jad/djadsv.f b/src/serial/jad/djadsv.f new file mode 100644 index 00000000..f337c610 --- /dev/null +++ b/src/serial/jad/djadsv.f @@ -0,0 +1,55 @@ +C This routine compute only the casew with Unit diagonal ... + SUBROUTINE DJADSV(UNITD,NROW,NG,A,KA,IA,JA,X,Y,IERROR) + IMPLICIT REAL*8 (A-H,O-Z) + DIMENSION IA(3,*),JA(*),KA(*),A(*),X(*),Y(*) + CHARACTER UNITD + INTEGER IERROR + + IERROR=0 + + IF (UNITD.EQ.'U') THEN + + IF (NG.EQ.0) THEN + DO I = 1, NROW + Y(I) = X(I) + ENDDO + ENDIF + + DO IPG=1,NG + DO I = IA(1,IPG),IA(1,IPG+1)-1 + Y(I) = X(I) + END DO +* +* LOOP ON COLUMNS +* --------------- +* + IP2 = IA(2,IPG) + DO K = IP2, IA(3,IPG)-1 + IPX = IA(1,IPG) + DO I = JA(K), JA(K+1)-1 + Y(IPX) = Y(IPX) - A(I)*Y(KA(I)) + IPX = IPX+1 + ENDDO + ENDDO +* +* +* LOOP ON ROWS +* --------------- +* + IPX = IA(1,IPG) + DO K = IA(3,IPG), IA(2,IPG+1)-1 + DO I = JA(K), JA(K+1)-1 + Y(IPX) = Y(IPX) - A(I)*Y(KA(I)) + ENDDO + IPX = IPX + 1 + ENDDO + +**************************************** + END DO !END LOOP ON IPG=1,NG +**************************************** + ELSE + WRITE(0,*) 'ERROR in DJADSV' + ENDIF + RETURN + END + diff --git a/src/serial/jad/djdnrmi.f b/src/serial/jad/djdnrmi.f new file mode 100644 index 00000000..54d874ed --- /dev/null +++ b/src/serial/jad/djdnrmi.f @@ -0,0 +1,28 @@ +C ... Compute infinity norm for sparse matrix in CSR Format ... + DOUBLE PRECISION FUNCTION DJDNRMI(TRANS,M,N,DESCRA,A,JA,IA, + + INFOA,IERROR) + IMPLICIT NONE +C .. Scalar Arguments .. + INTEGER M,N, IERROR + CHARACTER TRANS +C .. Array Arguments .. + INTEGER JA(*),IA(*),INFOA(*) + CHARACTER DESCRA*11 + DOUBLE PRECISION A(*) +C .. Local scalars .. + INTEGER PNG, PIA, PJA +C .. External routines .. + DOUBLE PRECISION DJADNR + EXTERNAL DJADNR + + IERROR=0 + PNG = IA(1) + PIA = IA(2) + PJA = IA(3) + + IF (DESCRA(1:1).EQ.'G') THEN + DJDNRMI = DJADNR(TRANS,M,N,IA(PNG), + + A,JA,IA(PJA),IA(PIA), + + INFOA,IERROR) + ENDIF + END diff --git a/src/serial/psb_cest.f90 b/src/serial/psb_cest.f90 new file mode 100644 index 00000000..f9a9bd04 --- /dev/null +++ b/src/serial/psb_cest.f90 @@ -0,0 +1,82 @@ +subroutine psb_cest(afmt, nnz, lia1, lia2, lar, up, info) + + use psb_error_mod + implicit none + + ! .. scalar arguments .. + integer :: nnz, lia1, lia2, lar, info + character :: up + ! .. array arguments.. + character(len=5) :: afmt + integer :: int_val(5), err_act + character(len=20) :: name + + name = 'cest' + call psb_erractionsave(err_act) + + if (afmt.eq.'???') then + afmt = fidef + endif + + if (up.eq.'y') then + if (afmt.eq.'JAD') then + lia1 = 2*(nnz + nnz/5) +1000 + lia2 = 2*(nnz + nnz/5) +1000 + lar = nnz + nnz/5 + else if (afmt.eq.'COO') then + lia1 = nnz + lia2 = 2*nnz + 1000 + lar = nnz + else if(afmt.eq.'CSR') then + lia1 = nnz + lia2 = 2*nnz + 1000 + lar = nnz + else + info = 3012 + call psb_errpush(info,name) + goto 9999 + endif + + else if (up.eq.'n') then + + if (afmt.eq.'jad') then + lia1 = nnz + nnz/5 + lia2 = nnz + nnz/5 + lar = nnz + nnz/5 + else if (afmt.eq.'coo') then + lia1 = nnz + lia2 = nnz + lar = nnz + else if(afmt.eq.'csr') then + lia1 = nnz + lia2 = nnz + lar = nnz + else + info = 3012 + call psb_errpush(info,name) + goto 9999 + endif + + else + + info = 3012 + call psb_errpush(info,name,int_val) + goto 9999 + + endif + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if ( err_act .ne. 0 ) then + call psb_error() + return + endif + + return + +end subroutine psb_cest + diff --git a/src/serial/psb_dcsdp.f90 b/src/serial/psb_dcsdp.f90 new file mode 100644 index 00000000..c869b078 --- /dev/null +++ b/src/serial/psb_dcsdp.f90 @@ -0,0 +1,387 @@ +! File: psb_dcsdp.f90 +! +! Subroutine: psb_dcsdp +! This subroutine performs the assembly of +! the local part of a sparse distributed matrix +! +! Parameters: +! a - type(). The input matrix to be assembled. +! b - type(). The assembled output matrix. +! info - integer. Eventually returns an error code. +! ifc - integer(optional). ??? +! check - character(optional). ??? +! trans - character(optional). ??? +! unitd - character(optional). ??? +! +subroutine psb_dcsdp(a, b,info,ifc,check,trans,unitd) + use psb_const_mod + use psb_error_mod + use psb_spmat_type + + implicit none + !....Parameters... + Type(psb_dspmat_type), intent(in) :: A + Type(psb_dspmat_type), intent(inout) :: B + Integer, intent(out) :: info + Integer, intent(in), optional :: ifc + character, intent(in), optional :: check,trans,unitd + + !...Locals... + real(kind(1.d0)) :: d(1) + real(kind(1.d0)), allocatable :: work(:) + type(psb_dspmat_type) :: temp_a + Integer :: nzr, ntry, ifc_,ierror, ia1_size,& + & ia2_size, aspk_size + integer :: ip1, ip2, nnz, iflag, ichk, nnzt,& + & ipc, i, count, err_act, ierrv(5) + character :: check_,trans_,unitd_ + Integer, Parameter :: maxtry=8 + logical, parameter :: debug=.false. + character(len=20) :: name, ch_err + + name='psb_dcsdp' + info = 0 + call psb_erractionsave(err_act) + + ntry=0 + if (present(ifc)) then + ifc_ = max(1,ifc) + else + ifc_ = 1 + endif + if (present(check)) then + check_ = check + else + check_ = 'N' + endif + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + endif + if (present(unitd)) then + unitd_ = unitd + else + unitd_ = 'U' + endif + + if (check_=='R') then + allocate(work(max(size(a%aspk),size(b%aspk))+1000),stat=info) + else + allocate(work(max(size(a%ia1),size(b%ia1),& + & size(a%ia2),size(b%ia2))+max(a%m,b%m)+1000),stat=info) + endif + if (info /= 0) then + info=2040 + call psb_errpush(info,name) + goto 9999 + end if + if (ifc_<1) then + write(0,*) 'dcsdp90 Error: invalid ifc ',ifc_ + info = -4 + call psb_errpush(info,name) + goto 9999 + endif + + if((check_=='Y').or.(check_=='C')) then + if(a%fida(1:3)=='CSR') then + call dcsrck(trans,a%m,a%k,a%descra,a%aspk,a%ia1,a%ia2,work,size(work),info) + if(info /= 0) then + info=4010 + ch_err='dcsrck' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + else + write(0,'("Check not yet suported for the ",a3," storage format")')a%fida(1:3) + end if + + end if + + if (check_/='R') then + ! ...matrix conversion... + b%m=a%m + b%k=a%k + select case (a%fida(1:3)) + + case ('CSR') + + select case (b%fida(1:3)) + + case ('CSR') + + + ia1_size=a%infoa(nnz_) + ia2_size=a%m+1 + aspk_size=a%infoa(nnz_) + call psb_spreall(b,ia1_size,ia2_size,aspk_size,info) + + call dcrcr(trans_, a%m, a%k, unitd_, d, a%descra, a%aspk,& + & a%ia1, a%ia2, a%infoa, b%pl, b%descra, b%aspk, b%ia1,& + & b%ia2, b%infoa, b%pr, size(b%aspk), size(b%ia1),& + & size(b%ia2), work, size(work), info) + + + if (info/=0) then + info=4010 + ch_err='dcrcr' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + case ('JAD') + + !...converting to JAD + !...output matrix may not be big enough + do + + call dcrjd(trans_, a%m, a%k, unitd_, d, a%descra, a%aspk,& + & a%ia1, a%ia2, a%infoa, b%pl, b%descra, b%aspk, b%ia1,& + & b%ia2, b%infoa, b%pr, size(b%aspk), size(b%ia1),& + & size(b%ia2), work, size(work), nzr, info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='dcrjd') + goto 9999 + endif + + ntry = ntry + 1 + if (debug) then + write(0,*) 'On out from dcrjad ',nzr,info + end if + if (nzr == 0) exit + if (ntry > maxtry ) then + write(0,*) 'Tried reallocating for DCRJAD for ',maxtry,': giving up now.' + info=2040 + call psb_errpush(info,name) + goto 9999 + endif + + call psb_spreall(b,nzr,info,ifc=ifc_) + if (info /= 0) then + info=2040 + call psb_errpush(info,name) + goto 9999 + endif + + end do + + if (info/=0) then + call psb_errpush(info,name) + goto 9999 + end if + + case ('COO') + + aspk_size=max(size(a%aspk),a%ia2(a%m+1)) + call psb_spreall(b,aspk_size,info) +!!$ write(0,*) 'From DCSDP90:',b%fida,size(b%aspk),info + call dcrco(trans_, a%m, a%k, unitd_, d, a%descra, a%aspk,& + & a%ia1, a%ia2, a%infoa, b%pl, b%descra, b%aspk, b%ia1,& + & b%ia2, b%infoa, b%pr, size(b%aspk), size(b%ia1),& + & size(b%ia2), work, size(work), info) + + if (info/=0) then + call psb_errpush(4010,name,a_err='dcrco') + goto 9999 + end if + + end select + + case ('COO','COI') + + select case (b%fida(1:3)) + + case ('CSR') + + call dcocr(trans_, a%m, a%k, unitd_, d, a%descra, a%aspk,& + & a%ia2, a%ia1, a%infoa, b%pl, b%descra, b%aspk, b%ia1,& + & b%ia2, b%infoa, b%pr, size(b%aspk), size(b%ia1),& + & size(b%ia2), work, 2*size(work), info) + + if (info/=0) then + call psb_errpush(4010,name,a_err='dcocr') + goto 9999 + end if + + case ('JAD') + + call psb_spall(temp_a, size(b%ia1),size(b%ia2),size(b%aspk),info) + if (info /= 0) then + info=2040 + call psb_errpush(info,name) + goto 9999 + endif + temp_a%m = a%m + temp_a%k = a%k + + !...Dirty trick: converting to CSR and then to JAD + + call dcocr(trans_, a%m, a%k, unitd_, d, a%descra, a%aspk,& + & a%ia2, a%ia1, a%infoa, temp_a%pl, temp_a%descra, & + & temp_a%aspk, temp_a%ia1, temp_a%ia2, temp_a%infoa, temp_a%pr, & + & size(temp_a%aspk), size(temp_a%ia1),& + & size(temp_a%ia2), work, 2*size(work), info) + + if (info/=0) then + call psb_errpush(4010,name,a_err='dcocr') + goto 9999 + end if + + do + call dcrjd(trans_, temp_a%m, temp_a%k, unitd_, d, temp_a%descra, & + & temp_a%aspk, temp_a%ia1, temp_a%ia2, temp_a%infoa, & + & b%pl, b%descra, b%aspk, b%ia1, b%ia2, b%infoa, b%pr, & + & size(b%aspk), size(b%ia1),& + & size(b%ia2), work, size(work), nzr, info) + if (info/=0) then + call psb_errpush(4010,name,a_err='dcrjd') + goto 9999 + end if + + ntry = ntry + 1 + if (debug) then + write(0,*) 'On out from dcrjad ',nzr,info + end if + if (nzr == 0) exit + if (ntry > maxtry ) then + write(0,*) 'Tried reallocating for DCRJAD for ',maxtry,& + & ': giving up now.' + info=2040 + call psb_errpush(info,name) + goto 9999 + endif + + call psb_spreall(b,nzr,info,ifc=ifc_) + if (info /= 0) then + info=2040 + call psb_errpush(info,name) + goto 9999 + endif + + end do + + + + case ('COO') + + call dcoco(trans_, a%m, a%k, unitd_, d, a%descra, a%aspk,& + & a%ia2, a%ia1, a%infoa, b%pl, b%descra, b%aspk, b%ia1,& + & b%ia2, b%infoa, b%pr, size(b%aspk), size(b%ia1),& + & size(b%ia2), work, 2*size(work), info) + if (info/=0) then + call psb_errpush(4010,name,a_err='dcoco') + goto 9999 + end if + + end select + + end select + + else if (check_=='R') then + !...Regenerating matrix + if (b%infoa(state_) /= spmat_upd) then + info = 8888 + call psb_errpush(info,name) + goto 9999 + endif + if (ibits(b%infoa(upd_),2,1).eq.0) then + ! + ! Nothing to be done...... + ! + info = 8888 + call psb_errpush(info,name) + goto 9999 + endif + + + if (b%fida(1:3)/='JAD') then + ip1 = b%infoa(upd_pnt_) + ip2 = b%ia2(ip1+ip2_) + nnz = b%ia2(ip1+nnz_) + iflag = b%ia2(ip1+iflag_) + ichk = b%ia2(ip1+ichk_) + nnzt = b%ia2(ip1+nnzt_) + if (debug) write(*,*) 'Regeneration start: ',& + & b%infoa(upd_),perm_update,nnz,nnzt ,iflag,info + + if ((ichk/=nnzt+iflag).or.(nnz/=nnzt)) then + info = 8889 + write(*,*) 'Regeneration start error: ',& + & b%infoa(upd_),perm_update,nnz,nnzt ,iflag,ichk + call psb_errpush(info,name) + goto 9999 + endif + do i= 1, nnz + work(i) = 0.d0 + enddo + if (iflag.eq.2) then + do i=1, nnz + work(b%ia2(ip2+i-1)) = b%aspk(i) + enddo + else if (iflag.eq.3) then + do i=1, nnz + work(b%ia2(ip2+i-1)) = b%aspk(i) + work(b%ia2(ip2+i-1)) + enddo + endif + do i=1, nnz + b%aspk(i) = work(i) + enddo + + else if (b%fida(1:3) == 'JAD') then + + ip1 = b%infoa(upd_pnt_) + ip2 = b%ia1(ip1+ip2_) + count = b%ia1(ip1+zero_) + ipc = b%ia1(ip1+ipc_) + nnz = b%ia1(ip1+nnz_) + iflag = b%ia1(ip1+iflag_) + ichk = b%ia1(ip1+ichk_) + nnzt = b%ia1(ip1+nnzt_) + if (debug) write(*,*) 'Regeneration start: ',& + & b%infoa(upd_),perm_update,nnz,nnzt,count, & + & iflag,info + + if ((ichk/=nnzt+iflag).or.(nnz/=nnzt)) then + info = 10 + write(*,*) 'Regeneration start error: ',& + & b%infoa(upd_),perm_update,nnz,nnzt ,iflag,ichk + call psb_errpush(info,name) + goto 9999 + endif + + do i= 1, nnz+count + work(i) = 0.d0 + enddo + if (iflag.eq.2) then + do i=1, nnz + work(b%ia1(ip2+i-1)) = b%aspk(i) + enddo + else if (iflag.eq.3) then + do i=1, nnz + work(b%ia1(ip2+i-1)) = b%aspk(i) + work(b%ia1(ip2+i-1)) + enddo + endif + do i=1, nnz+count + b%aspk(i) = work(i) + enddo + do i=1, count + b%aspk(b%ia1(ipc+i-1)) = 0.d0 + end do + endif + + + end if + b%infoa(state_) = spmat_asb + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end subroutine psb_dcsdp diff --git a/src/serial/psb_dcsmm.f90 b/src/serial/psb_dcsmm.f90 new file mode 100644 index 00000000..bec58a8a --- /dev/null +++ b/src/serial/psb_dcsmm.f90 @@ -0,0 +1,58 @@ +! File: psb_dcsmm.f90 +! Subroutine: +! Parameters: +subroutine psb_dcsmm(alpha,a,b,beta,c,info,trans) + use psb_spmat_type + use psb_error_mod + implicit none + + type(psb_dspmat_type) :: a + real(kind(1.d0)) :: alpha, beta, b(:,:), c(:,:) + integer :: info + character, optional :: trans + + real(kind(1.d0)), allocatable :: work(:) + character :: trans_ + integer :: iwsz,m,n,k,lb,lc,err_act + character(len=20) :: name, ch_err + + name='psb_dcsmm' + info = 0 + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + endif + + if (trans_=='N') then + m = a%m + k = a%k + else + k = a%m + m = a%k + end if + n = min(size(b,2),size(c,2)) + lb = size(b,1) + lc = size(c,1) + iwsz = 2*m*n + allocate(work(iwsz)) + + call dcsmm(trans_,m,n,k,alpha,& + & a%pl,a%fida,a%descra,a%aspk,a%ia1,a%ia2,a%infoa,a%pr,& + & b,lb,beta,c,lc,work,iwsz,info) + + deallocate(work) + call psb_erractionrestore(err_act) + + if(info.ne.0) then + if (err_act.eq.act_abort) then + call psb_error() + return + end if + end if + + return + +end subroutine psb_dcsmm diff --git a/src/serial/psb_dcsmv.f90 b/src/serial/psb_dcsmv.f90 new file mode 100644 index 00000000..14892736 --- /dev/null +++ b/src/serial/psb_dcsmv.f90 @@ -0,0 +1,58 @@ +! File: psb_dcsmv.f90 +! Subroutine: +! Parameters: + +subroutine psb_dcsmv(alpha,a,b,beta,c,info,trans) + use psb_spmat_type + implicit none + + type(psb_dspmat_type) :: a + real(kind(1.d0)) :: alpha, beta, b(:), c(:) + integer :: info + character, optional :: trans + + real(kind(1.d0)), allocatable :: work(:) + character :: trans_ + integer :: iwsz,m,n,k,lb,lc, err_act + character(len=20) :: name, ch_err + + name='psb_dcsmv' + info = 0 + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + endif + + if (trans_=='N') then + m = a%m + k = a%k + else + k = a%m + m = a%k + end if + n = 1 + lb = size(b,1) + lc = size(c,1) + iwsz = 2*m*n + allocate(work(iwsz)) + + call dcsmm(trans_,m,n,k,alpha,& + & a%pl,a%fida,a%descra,a%aspk,a%ia1,a%ia2,a%infoa,a%pr,& + & b,lb,beta,c,lc,work,iwsz,info) + + deallocate(work) + call psb_erractionrestore(err_act) + + if(info.ne.0) then + if (err_act.eq.act_abort) then + call psb_error() + return + end if + end if + + return + +end subroutine psb_dcsmv diff --git a/src/serial/psb_dcsnmi.f90 b/src/serial/psb_dcsnmi.f90 new file mode 100644 index 00000000..2f05e671 --- /dev/null +++ b/src/serial/psb_dcsnmi.f90 @@ -0,0 +1,60 @@ +! File: psb_dcsnmi.f90 +! Subroutine: +! Parameters: + +real(kind(1.d0)) function psb_dcsnmi(a,info,trans) + + use psb_spmat_type + use psb_error_mod + implicit none + + type(psb_dspmat_type), intent(in) :: a + integer, intent(out) :: info + character, optional :: trans + + interface + real(kind(1.d0)) function dcsnmi(trans,m,n,fida,descra,a,ia1,ia2,& + & infoa,ierror) + integer :: m,n, ierror + character :: trans + integer :: ia1(*),ia2(*),infoa(*) + character :: descra*11, fida*5 + real(kind(1.d0)) :: a(*) + end function dcsnmi + end interface + + integer :: err_act + character :: itrans + character(len=20) :: name, ch_err + + name='psb_dcsnmi' + call psb_erractionsave(err_act) + + if(present(trans)) then + itrans=trans + else + itrans='N' + end if + + dcsnmi90 = dcsnmi(itrans,a%m,a%k,a%fida,a%descra,a%aspk,a%ia1,a%ia2,a%infoa,info) + if(info/=0) then + dcsnmi90 = -1 + info=4010 + ch_err='dcsnmi' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end function psb_dcsnmi diff --git a/src/serial/psb_dcsprt.f90 b/src/serial/psb_dcsprt.f90 new file mode 100644 index 00000000..97e6888c --- /dev/null +++ b/src/serial/psb_dcsprt.f90 @@ -0,0 +1,111 @@ +! File: psb_dcsprt.f90 +! Subroutine: +! Parameters: + +!***************************************************************************** +!* * +!* Print out a matrix. * +!* Should really align with the F77 version under the SERIAL dir, which * +!* does a nice printout in MatrixMarket format; this would be a quick job. * +!* * +!* Handles both a shift in the row/col indices and a fuctional transform * +!* on the indices. * +!* * +!* * +!* * +!* * +!***************************************************************************** +subroutine psb_dcsprt(iout,a,iv,eirs,eics,head,ivr,ivc) + use psb_spmat_type + implicit none + + integer, intent(in) :: iout + type(psb_dspmat_type), intent(in) :: a + integer, intent(in), optional :: iv(:) + integer, intent(in), optional :: eirs,eics + character(len=*), optional :: head + integer, intent(in), optional :: ivr(:), ivc(:) + + character(len=*), parameter :: frmtr='(2(i6,1x),e16.8,2(i6,1x))' + integer :: irs,ics,i,j + + if (present(eirs)) then + irs = eirs + else + irs = 0 + endif + if (present(eics)) then + ics = eics + else + ics = 0 + endif + + if (present(head)) then + write(iout,'(a)') head + endif + + if (a%fida=='CSR') then + + write(iout,*) a%m,a%k,a%ia2(a%m+1)-1 + + if (present(iv)) then + do i=1, a%m + do j=a%ia2(i),a%ia2(i+1)-1 + write(iout,frmtr) iv(irs+i),iv(ics+a%ia1(j)),a%aspk(j) + enddo + enddo + else + if (present(ivr).and..not.present(ivc)) then + do i=1, a%m + do j=a%ia2(i),a%ia2(i+1)-1 + write(iout,frmtr) ivr(irs+i),(ics+a%ia1(j)),a%aspk(j) + enddo + enddo + else if (present(ivr).and.present(ivc)) then + do i=1, a%m + do j=a%ia2(i),a%ia2(i+1)-1 + write(iout,frmtr) ivr(irs+i),ivc(ics+a%ia1(j)),a%aspk(j) + enddo + enddo + else if (.not.present(ivr).and.present(ivc)) then + do i=1, a%m + do j=a%ia2(i),a%ia2(i+1)-1 + write(iout,frmtr) (irs+i),ivc(ics+a%ia1(j)),a%aspk(j) + enddo + enddo + else if (.not.present(ivr).and..not.present(ivc)) then + do i=1, a%m + do j=a%ia2(i),a%ia2(i+1)-1 + write(iout,frmtr) (irs+i),(ics+a%ia1(j)),a%aspk(j) + enddo + enddo + endif + endif + + else if (a%fida=='COO') then + + if (present(ivr).and..not.present(ivc)) then + write(iout,*) a%m,a%k,a%infoa(nnz_) + do j=1,a%infoa(nnz_) + write(iout,frmtr) ivr(a%ia1(j)),a%ia2(j),a%aspk(j) + enddo + else if (present(ivr).and.present(ivc)) then + write(iout,*) a%m,a%k,a%infoa(nnz_) + do j=1,a%infoa(nnz_) + write(iout,frmtr) ivr(a%ia1(j)),ivc(a%ia2(j)),a%aspk(j) + enddo + else if (.not.present(ivr).and.present(ivc)) then + write(iout,*) a%m,a%k,a%infoa(nnz_) + do j=1,a%infoa(nnz_) + write(iout,frmtr) a%ia1(j),ivc(a%ia2(j)),a%aspk(j) + enddo + else if (.not.present(ivr).and..not.present(ivc)) then + write(iout,*) a%m,a%k,a%infoa(nnz_) + do j=1,a%infoa(nnz_) + write(iout,frmtr) a%ia1(j),a%ia2(j),a%aspk(j) + enddo + endif + else + write(0,*) 'Feeling lazy today, format not implemented: "',a%fida,'"' + endif +end subroutine psb_dcsprt diff --git a/src/serial/psb_dcsrws.f90 b/src/serial/psb_dcsrws.f90 new file mode 100644 index 00000000..4745ef83 --- /dev/null +++ b/src/serial/psb_dcsrws.f90 @@ -0,0 +1,73 @@ +! File: psb_dcsrws.f90 +! Subroutine: +! Parameters: + +subroutine psb_dcsrws(rw,a,info,trans) + use psb_spmat_type + use psb_error_mod + implicit none + + type(psb_dspmat_type) :: a + real(kind(1.d0)), pointer :: rw(:) + integer :: info + character, optional :: trans + + Interface dcsrws + subroutine dcsrws(trans,m,n,fida,descra,a,ia1,ia2,& + & infoa,rowsum,ierror) + integer, intent(in) :: m,n + integer, intent(out) :: ierror + double precision, intent(in) :: a(*) + double precision, intent(out) :: rowsum(*) + integer, intent(in) :: ia1(*), ia2(*), infoa(*) + character, intent(in) :: descra*11,fida*5,trans*1 + end subroutine dcsrws + end interface + + character :: trans_ + integer :: iwsz,m,n,k,lb,lc,err_act + character(len=20) :: name, ch_err + + name='psb_dcsrws' + info = 0 + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + endif + + if (trans_=='N') then + m = a%m + k = a%k + else + k = a%m + m = a%k + end if + + if (size(rw) < m) then + call psb_realloc(m,rw,info) + if (info /= 0) then + info = 4000 + call psb_errpush(info,name) + goto 9999 + end if + end if + + call dcsrws(trans,m,k,a%fida,a%descra,& + & a%aspk,a%ia1,a%ia2,a%infoa,rw,info) + + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end subroutine psb_dcsrws diff --git a/src/serial/psb_dcssm.f90 b/src/serial/psb_dcssm.f90 new file mode 100644 index 00000000..120555fd --- /dev/null +++ b/src/serial/psb_dcssm.f90 @@ -0,0 +1,69 @@ +! File: psb_dcssm.f90 +! Subroutine: +! Parameters: + +subroutine psb_dcssm(alpha,t,b,beta,c,info,trans,unitd,d) + use psb_spmat_type + use psb_error_mod + implicit none + + type(psb_dspmat_type) :: t + real(kind(1.d0)) :: alpha, beta, b(:,:), c(:,:) + integer :: info + character, optional :: trans, unitd + real(kind(1.d0)), optional, target :: d(:) + + real(kind(1.d0)), allocatable :: work(:) + real(kind(1.d0)), pointer :: ddl(:) + character :: lt, lu + integer :: iwsz,m,n,lb,lc,err_act + character(len=20) :: name, ch_err + + name='psb_dcssm' + info = 0 + call psb_erractionsave(err_act) + + + if (present(trans)) then + lt = trans + else + lt = 'N' + endif + if (present(unitd)) then + lu = unitd + else + lu = 'U' + endif + if (present(d)) then + ddl => d + else + allocate(ddl(1)) + endif + + m = t%m + n = min(size(b,2),size(c,2)) + lb = size(b,1) + lc = size(c,1) + iwsz = 2*m*n + allocate(work(iwsz)) + + call dcssm(lt,m,n,alpha,lu,ddl,& + & t%pl,t%fida,t%descra,t%aspk,t%ia1,t%ia2,t%infoa,t%pr,& + & b,lb,beta,c,lc,work,iwsz,info) + + if (.not.present(d)) then + deallocate(ddl) + endif + deallocate(work) + call psb_erractionrestore(err_act) + + if(info.ne.0) then + if (err_act.eq.act_abort) then + call psb_error() + return + end if + end if + + return + +end subroutine psb_dcssm diff --git a/src/serial/psb_dcssv.f90 b/src/serial/psb_dcssv.f90 new file mode 100644 index 00000000..6b853d25 --- /dev/null +++ b/src/serial/psb_dcssv.f90 @@ -0,0 +1,69 @@ +! File: psb_dcssv.f90 +! Subroutine: +! Parameters: + +subroutine psb_dcssv(alpha,t,b,beta,c,info,trans,unitd,d) + use psb_spmat_type + use psb_error_mod + implicit none + + type(psb_dspmat_type) :: t + real(kind(1.d0)) :: alpha, beta, b(:), c(:) + integer :: info + character, optional :: trans, unitd + real(kind(1.d0)), optional, target :: d(:) + + real(kind(1.d0)), allocatable :: work(:) + real(kind(1.d0)), pointer :: ddl(:) + character :: lt, lu + integer :: iwsz,m,n,lb,lc,err_act + character(len=20) :: name, ch_err + + name='psb_dcssv' + info = 0 + call psb_erractionsave(err_act) + + + if (present(trans)) then + lt = trans + else + lt = 'N' + endif + if (present(unitd)) then + lu = unitd + else + lu = 'U' + endif + if (present(d)) then + ddl => d + else + allocate(ddl(1)) + endif + + m = t%m + n = 1 + lb = size(b,1) + lc = size(c,1) + iwsz = 2*m*n + allocate(work(iwsz)) + + call dcssm(lt,m,n,alpha,lu,ddl,& + & t%pl,t%fida,t%descra,t%aspk,t%ia1,t%ia2,t%infoa,t%pr,& + & b,lb,beta,c,lc,work,iwsz,info) + + if (.not.present(d)) then + deallocate(ddl) + endif + deallocate(work) + call psb_erractionrestore(err_act) + + if(info.ne.0) then + if (err_act.eq.act_abort) then + call psb_error() + return + end if + end if + + return + +end subroutine psb_dcssv diff --git a/src/serial/psb_dfixcoo.f90 b/src/serial/psb_dfixcoo.f90 new file mode 100644 index 00000000..0d95a8b5 --- /dev/null +++ b/src/serial/psb_dfixcoo.f90 @@ -0,0 +1,74 @@ +! File: psb_dfixcoo.f90 +! Subroutine: +! Parameters: + +Subroutine psb_dfixcoo(A,INFO) + use psb_spmat_type + implicit none + + !....Parameters... + Type(psb_dspmat_type), intent(inout) :: A + Integer, intent(out) :: info + + integer, allocatable :: iaux(:) + !locals + Integer :: nza, nzl,iret + integer :: i,j, irw, icl + logical, parameter :: debug=.false. + + info = 0 + if(debug) write(0,*)'fixcoo: ',size(a%ia1),size(a%ia2) + if (a%fida /= 'COO') then + write(0,*) 'Fixcoo Invalid input ',a%fida + info = -1 + return + end if + + nza = a%infoa(nnz_) + if (nza < 2) return + + allocate(iaux(nza+2),stat=info) + if (info /= 0) return + + call mrgsrt(nza,a%ia1,iaux,iret) + if (iret.eq.0) call reordvn(nza,a%aspk,a%ia1,a%ia2,iaux) + i = 1 + j = i + do while (i.le.nza) + do while ((a%ia1(j).eq.a%ia1(i))) + j = j+1 + if (j > nza) exit + enddo + nzl = j - i + call mrgsrt(nzl,a%ia2(i:i+nzl-1),iaux,iret) + if (iret.eq.0) & + & call reordvn(nzl,a%aspk(i:i+nzl-1),a%ia1(i:i+nzl-1),a%ia2(i:i+nzl-1),iaux) + i = j + enddo + + i = 1 + irw = a%ia1(i) + icl = a%ia2(i) + j = 1 + do + j = j + 1 + if (j > nza) exit + if ((a%ia1(j) == irw).and.(a%ia2(j) == icl)) then + a%aspk(i) = a%aspk(i) + a%aspk(j) + else + i = i+1 + a%aspk(i) = a%aspk(j) + a%ia1(i) = a%ia1(j) + a%ia2(i) = a%ia2(j) + irw = a%ia1(i) + icl = a%ia2(i) + endif + enddo + a%infoa(nnz_) = i + a%infoa(srtd_) = isrtdcoo + + if(debug) write(0,*)'FIXCOO: end second loop' + + deallocate(iaux) + return +end Subroutine psb_dfixcoo diff --git a/src/serial/psb_dipcoo2csr.f90 b/src/serial/psb_dipcoo2csr.f90 new file mode 100644 index 00000000..f84f1965 --- /dev/null +++ b/src/serial/psb_dipcoo2csr.f90 @@ -0,0 +1,158 @@ +! File: psb_dipcoo2csr.f90 +! Subroutine: +! Parameters: + +subroutine psb_dipcoo2csr(a,info,rwshr) + use psb_spmat_type + use psb_serial_mod, only : fixcoo + use psb_error_mod + implicit none + + !....Parameters... + Type(psb_dspmat_type), intent(inout) :: A + Integer, intent(out) :: info + logical, optional :: rwshr + + integer, pointer :: iaux(:), itemp(:) + !locals + logical :: rwshr_ + Integer :: nza, nr, i,j,irw, idl,err_act + Integer, Parameter :: maxtry=8 + logical, parameter :: debug=.false. + character(len=20) :: name, ch_err + + name='psb_ipcoo2csr' + info = 0 + call psb_erractionsave(err_act) + + if(debug) write(0,*)'Inside ipcoo2csr',a%fida,a%m + if (a%fida /= 'COO') then + write(0,*) 'ipcoo2csr Invalid input ',a%fida + info = -1 + call psb_errpush(info,name) + goto 9999 + end if + if (present(rwshr)) then + rwshr_ = rwshr + else + rwshr_ = .false. + end if + + call psb_fixcoo(a,info) + nr = a%m + nza = a%infoa(nnz_) + allocate(iaux(nr+1)) + if(debug) write(0,*)'DIPCOO2CSR: out of fixcoo',nza,nr,size(a%ia2),size(iaux) + + itemp => a%ia1 + a%ia1 => a%ia2 + a%ia2 => iaux + + ! + ! This routine can be used in two modes: + ! 1. Normal: just look at the row indices and trust them. This + ! implies putting in empty rows where needed. In this case you + ! can get in trouble if A%M < A%IA1(NZA) + ! 2. Shrink mode: disregard the actual value of the row indices, + ! just treat them as ident markers. In this case you can get in + ! trouble when the number of distinct row indices is greater + ! than A%M + ! + ! + + a%ia2(1) = 1 + + if (nza <= 0) then + do i=1,nr + a%ia2(i+1) = a%ia2(i) + end do + else + + if (rwshr_) then + + + j = 1 + i = 1 + irw = itemp(j) + + do j=1, nza + if (itemp(j) /= irw) then + a%ia2(i+1) = j + irw = itemp(j) + i = i + 1 + if (i>nr) then + write(0,*) 'IPCOO2CSR: RWSHR=.true. : ',& + & i, nr,' Expect trouble!' + exit + end if + endif + enddo +! write(0,*) 'Exit from loop',j,nza,i + do + if (i>=nr+1) exit + a%ia2(i+1) = j + i = i + 1 + end do + + else + + if (nr < itemp(nza)) then + write(0,*) 'IPCOO2CSR: RWSHR=.false. : ',& + &nr,itemp(nza),' Expect trouble!' + end if + + + j = 1 + i = 1 + irw = itemp(j) + + outer: do + inner: do + if (i >= irw) exit inner + if (i>nr) then + write(0,*) 'Strange situation: i>nr ',i,nr,j,nza,irw,idl + exit outer + end if + a%ia2(i+1) = a%ia2(i) + i = i + 1 + end do inner + j = j + 1 + if (j > nza) exit + if (itemp(j) /= irw) then + a%ia2(i+1) = j + irw = itemp(j) + i = i + 1 + endif + enddo outer + ! + ! Cleanup empty rows at the end + ! + if (j /= (nza+1)) then + write(0,*) 'IPCOO2CSR : Problem from loop :',j,nza + endif + do + if (i>=nr+1) exit + a%ia2(i+1) = j + i = i + 1 + end do + + endif + + end if + +!!$ write(0,*) 'IPcoo2csr end loop ',i,nr,a%ia2(nr+1),nza + a%fida='CSR' + + deallocate(itemp) + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end Subroutine psb_dipcoo2csr diff --git a/src/serial/psb_dipcsr2coo.f90 b/src/serial/psb_dipcsr2coo.f90 new file mode 100644 index 00000000..bef665aa --- /dev/null +++ b/src/serial/psb_dipcsr2coo.f90 @@ -0,0 +1,65 @@ +! File: psb_dipcsr2coo.f90 +! Subroutine: +! Parameters: + +Subroutine psb_dipcsr2coo(a,info) + use psb_spmat_type + use psb_error_mod + implicit none + + !....Parameters... + Type(psb_dspmat_type), intent(inout) :: A + Integer, intent(out) :: info + + integer, pointer :: iaux(:), itemp(:) + !locals + Integer :: nza, nr + integer :: i,j,err_act + logical, parameter :: debug=.false. + character(len=20) :: name, ch_err + + name='psb_dipcsr2coo' + info = 0 + call psb_erractionsave(err_act) + + if (a%fida /= 'CSR') then + info = 5 + call psb_errpush(info,name) + goto 9999 + end if + + nr = a%m + nza = a%ia2(nr+1) - 1 + allocate(iaux(nza),stat=info) + if (info /=0) then + write(0,*) 'Failed allocation ',info, nza + return + end if +!!$ write(0,*) 'ipcsr2coo ',a%m + itemp => a%ia2 + a%ia2 => a%ia1 + a%ia1 => iaux + + do i=1, nr + do j=itemp(i),itemp(i+1)-1 + a%ia1(j) = i + end do + end do + + a%fida='COO' + a%infoa(nnz_) = nza + a%infoa(srtd_) = isrtdcoo + deallocate(itemp) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end Subroutine psb_dipcsr2coo diff --git a/src/serial/psb_dneigh.f90 b/src/serial/psb_dneigh.f90 new file mode 100644 index 00000000..211a4e71 --- /dev/null +++ b/src/serial/psb_dneigh.f90 @@ -0,0 +1,104 @@ +! File: psb_dneigh.f90 +! Subroutine: +! Parameters: + +subroutine psb_dneigh(a,idx,neigh,n,info,lev) + + use psb_realloc_mod + use psb_spmat_type + implicit none + + + type(psb_dspmat_type), intent(in) :: a ! the sparse matrix + integer, intent(in) :: idx ! the index whose neighbours we want to find + integer, intent(out) :: n, info ! the number of neighbours and the info + integer, pointer :: neigh(:) ! the neighbours + integer, optional :: lev ! level of neighbours to find + + + integer :: level, dim, i, j, k, r, c, brow,& + & elem_pt, ii, n1, col_idx, ne, err_act + integer, parameter :: izero=0 + character(len=20) :: name, ch_err + + name='psb_dneigh' + info = 0 + call psb_erractionsave(err_act) + + if ((a%fida /= 'CSR')) then + info=135 + ch_err=a%fida(1:3) + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + n = 0 + info = 0 + if(present(lev)) then + if(lev.le.2) then + level=lev + else + write(0,'("Too many levels!!!")') + return + endif + else + level=1 + end if + + if(a%fida.eq.'CSR') then + + dim=0 + if(level.eq.1) dim=(a%ia2(idx+1)-a%ia2(idx)) + if(dim >size(neigh)) call psb_realloc(dim,neigh,info) + if(info.ne.izero) then + info=4010 + ch_err='psrealloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + endif + + n=0 + if(level.eq.1) then + do i=a%ia2(idx), a%ia2(idx+1)-1 + n=n+1 + neigh(n)=a%ia1(i) + end do + + else + + do i=a%ia2(idx), a%ia2(idx+1)-1 + + j=a%ia1(i) + if ((1<=j).and.(j<=a%m).and.(j.ne.idx)) then + + dim=dim+ a%ia2(j+1)-a%ia2(j) + if(dim.gt.size(neigh)) call psb_realloc(dim,neigh,info) + if(info.ne.izero) then + info=4010 + ch_err='psrealloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + endif + + do k=a%ia2(j), a%ia2(j+1)-1 + n=n+1 + neigh(n)=a%ia1(k) + end do + end if + end do + + end if + + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end subroutine psb_dneigh diff --git a/src/serial/psb_dnumbmm.f90 b/src/serial/psb_dnumbmm.f90 new file mode 100644 index 00000000..96a448df --- /dev/null +++ b/src/serial/psb_dnumbmm.f90 @@ -0,0 +1,21 @@ +! File: psb_dnumbmm.f90 +! Subroutine: +! Parameters: + +subroutine psb_dnumbmm(a,b,c) + use psb_spmat_type + implicit none + + type(psb_dspmat_type) :: a,b,c + real(kind(1.d0)), allocatable :: temp(:) + integer :: info + + allocate(temp(max(a%m,a%k,b%m,b%k)),stat=info) + + call psb_realloc(size(c%ia1),c%aspk,info) + call numbmm(a%m,a%k,b%k,a%ia2,a%ia1,0,a%aspk,& + & b%ia2,b%ia1,0,b%aspk,& + & c%ia2,c%ia1,0,c%aspk,temp) + deallocate(temp) + return +end subroutine psb_dnumbmm diff --git a/src/serial/psb_drwextd.f90 b/src/serial/psb_drwextd.f90 new file mode 100644 index 00000000..214b85ed --- /dev/null +++ b/src/serial/psb_drwextd.f90 @@ -0,0 +1,84 @@ +! File: psb_drwextd.f90 +! Subroutine: +! Parameters: + +subroutine psb_drwextd(nr,a,info,b) + use psb_spmat_type + use psb_error_mod + implicit none + + ! Extend matrix A up to NR rows with empty ones (i.e.: all zeroes) + integer, intent(in) :: nr + type(psb_dspmat_type), intent(inout) :: a + integer,intent(out) :: info + type(psb_dspmat_type), intent(in), optional :: b + integer :: i,j,ja,jb,err_act + character(len=20) :: name, ch_err + + name='psb_drwextd' + info = 0 + call psb_erractionsave(err_act) + + if (nr > a%m) then + + if (a%fida == 'CSR') then + call psb_realloc(nr+1,a%ia2,info) + if (present(b)) then + jb = b%ia2(b%m+1)-1 + call psb_realloc(size(a%ia1)+jb,a%ia1,info) + call psb_realloc(size(a%aspk)+jb,a%aspk,info) + do i=1, min(nr-a%m,b%m) + ! Should use spgtrow. + ! Don't care for the time being. + a%ia2(a%m+i+1) = a%ia2(a%m+i) + b%ia2(i+1) - b%ia2(i) + ja = a%ia2(a%m+i) + jb = b%ia2(i) + do + if (jb >= b%ia2(i+1)) exit + a%aspk(ja) = b%aspk(jb) + a%ia1(ja) = b%ia1(jb) + ja = ja + 1 + jb = jb + 1 + end do + end do + do j=i,nr-a%m + a%ia2(a%m+i+1) = a%ia2(a%m+i) + end do + + else + do i=a%m+2,nr+1 + a%ia2(i) = a%ia2(i-1) + end do + end if + a%m = nr + else if (a%fida == 'COO') then + if (present(b)) then + else + endif + a%m = nr + else if (a%fida == 'JAD') then + info=135 + ch_err=a%fida(1:3) + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + else + info=136 + ch_err=a%fida(1:3) + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end subroutine psb_drwextd diff --git a/src/serial/psb_dspgtdiag.f90 b/src/serial/psb_dspgtdiag.f90 new file mode 100644 index 00000000..e5988c1c --- /dev/null +++ b/src/serial/psb_dspgtdiag.f90 @@ -0,0 +1,73 @@ +! File: psb_dspgtdiag.f90 +! Subroutine: +! Parameters: + +!***************************************************************************** +!* * +!* Takes a specified row from matrix A and copies into matrix B (possibly * +!* appending to B). Output is always COO. Input might be anything, once * +!* we get to actually write the code..... * +!* * +!***************************************************************************** +subroutine psb_dspgtdiag(a,d,info) + ! Output is always in COO format into B, irrespective of + ! the input format + use psb_spmat_type + use psb_error_mod + use psb_const_mod + implicit none + + type(psb_dspmat_type), intent(in) :: a + real(kind(1.d0)), intent(inout) :: d(:) + integer, intent(out) :: info + + integer :: i,j,k,nr, nz, err_act + character(len=20) :: name, ch_err + + name='psb_dspgtdiag' + info = 0 + call psb_erractionsave(err_act) + + if (size(d) < min(a%k,a%m)) then + write(0,*) 'Insufficient space in DSPGTDIAG ', size(d),min(a%m,a%k) + end if + d(:) = 0.d0 + if (a%fida == 'CSR') then + + do i=1, min(a%m,a%k) + do j=a%ia2(i),a%ia2(i+1)-1 + if (a%ia1(j) == i) then + d(i) = a%aspk(j) + end if + end do + end do + + else if (a%fida == 'COO') then + + do i=1,a%infoa(nnz_) + j=a%ia1(i) + if ((j==a%ia2(i)).and.(j <= min(a%k,a%m)) .and.(j>0)) then + d(j) = a%aspk(i) + endif + enddo + + else if (a%fida == 'JAD') then + info=135 + ch_err=a%fida(1:3) + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end subroutine psb_dspgtdiag + diff --git a/src/serial/psb_dspgtrow.f90 b/src/serial/psb_dspgtrow.f90 new file mode 100644 index 00000000..102abeb9 --- /dev/null +++ b/src/serial/psb_dspgtrow.f90 @@ -0,0 +1,312 @@ +! File: psb_dspgtrow.f90 +! Subroutine: psb_dspgtrow +! Gets one or more rows from a sparse matrix. +! Parameters: + +!***************************************************************************** +!* * +!* Takes a specified row from matrix A and copies into matrix B (possibly * +!* appending to B). Output is always COO. Input might be anything, once * +!* we get to actually write the code..... * +!* * +!***************************************************************************** +subroutine psb_dspgtrow(irw,a,b,info,append,iren,lrw) + ! Output is always in COO format into B, irrespective of + ! the input format + use psb_spmat_type + use psb_const_mod + implicit none + + type(psb_dspmat_type), intent(in) :: a + integer, intent(in) :: irw + type(psb_dspmat_type), intent(inout) :: b + integer,intent(out) :: info + logical, intent(in), optional :: append + integer, intent(in), target, optional :: iren(:) + integer, intent(in), optional :: lrw + + logical :: append_ + integer, pointer :: iren_(:) + integer :: i,j,k,ip,jp,nr,idx, nz,iret,nzb, nza, lrw_, irw_, err_act + character(len=20) :: name, ch_err + + name='psb_dspgtrow' + info = 0 + call psb_erractionsave(err_act) + + irw_ = irw + if (present(lrw)) then + lrw_ = lrw + else + lrw_ = irw + endif + if (lrw_ < irw) then + write(0,*) 'SPGTROW input error: fixing lrw',irw,lrw_ + lrw_ = irw + end if + if (present(append)) then + append_=append + else + append_=.false. + endif + if (present(iren)) then + iren_=>iren + else + iren_ => null() + end if + + + if (append_) then + nzb = b%infoa(nnz_) + else + nzb = 0 + b%m = 0 + b%k = 0 + endif + + if (a%fida == 'CSR') then + call csr_dspgtrow(irw_,a,b,append_,iren_,lrw_) + + else if (a%fida == 'COO') then + call coo_dspgtrow(irw_,a,b,append_,iren_,lrw_) + + else if (a%fida == 'JAD') then + info=135 + ch_err=a%fida(1:3) + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + else + info=136 + ch_err=a%fida(1:3) + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +contains + + subroutine csr_dspgtrow(irw,a,b,append,iren,lrw) + + use psb_spmat_type + use psb_const_mod + implicit none + + type(psb_dspmat_type), intent(in) :: a + integer :: irw + type(psb_dspmat_type), intent(inout) :: b + logical, intent(in) :: append + integer, pointer :: iren(:) + integer :: lrw + + integer :: idx,i,j ,nr,nz,nzb + + if (a%pl(1) /= 0) then + write(0,*) 'Fatal error in SPGTROW: do not feed a permuted mat so far!',& + & a%pl(1) + idx = -1 + else + idx = irw + endif +!!$ write(0,*) 'csr_gtrow: ',irw,lrw,a%pl(1),idx + if (idx<0) then + write(0,*) ' spgtrow Error : idx no good ',idx + return + end if + nr = lrw - irw + 1 + nz = a%ia2(idx+nr) - a%ia2(idx) + if (append) then + nzb = b%infoa(nnz_) + else + nzb = 0 + endif + if (min(size(b%ia1),size(b%ia2),size(b%aspk)) < nzb+nz) then + call psb_spreall(b,nzb+nz,iret) + endif + b%fida='COO' +!!$ write(0,*) 'csr_gtrow: ',out_,b%fida,nzb + if (associated(iren)) then + k=0 + do i=irw,lrw + do j=a%ia2(i),a%ia2(i+1)-1 + k = k + 1 + b%aspk(nzb+k) = a%aspk(j) + b%ia1(nzb+k) = iren(i) + b%ia2(nzb+k) = iren(a%ia1(j)) + end do + enddo + else + k=0 +!!$ write(0,*) 'csr_gtrow: ilp',irw,lrw + do i=irw,lrw +!!$ write(0,*) 'csr_gtrow: jlp',a%ia2(i),a%ia2(i+1)-1 + do j=a%ia2(i),a%ia2(i+1)-1 + k = k + 1 + b%aspk(nzb+k) = a%aspk(j) + b%ia1(nzb+k) = i + b%ia2(nzb+k) = a%ia1(j) +!!$ write(0,*) 'csr_gtrow: in:',a%aspk(j),i,a%ia1(j) + end do + enddo + end if + b%infoa(nnz_) = nzb+nz + if (a%pr(1) /= 0) then + write(0,*) 'Feeling lazy today, Right Permutation will have to wait' + endif + b%m = b%m+lrw-irw+1 + b%k = max(b%k,a%k) + + end subroutine csr_dspgtrow + + subroutine coo_dspgtrow(irw,a,b,append,iren,lrw) + + use psb_spmat_type + use psb_const_mod + implicit none + + type(psb_dspmat_type), intent(in) :: a + integer :: irw + type(psb_dspmat_type), intent(inout) :: b + logical, intent(in) :: append + integer, pointer :: iren(:) + integer :: lrw + + nza = a%infoa(nnz_) + if (a%pl(1) /= 0) then + write(0,*) 'Fatal error in SPGTROW: do not feed a permuted mat so far!' + idx = -1 + else + idx = irw + endif + if (idx<0) then + write(0,*) ' spgtrow Error : idx no good ',idx + return + end if + + if (a%infoa(srtd_) == isrtdcoo) then +!!$ write(0,*) 'Gtrow_: srtd coo',irw + ! In this case we can do a binary search. + do + call ibsrch(ip,irw,nza,a%ia1) + if (ip /= -1) exit + irw = irw + 1 + if (irw > lrw) then + write(0,*) 'Warning : did not find any rows. Is this an error?' + exit + end if + end do + + if (ip /= -1) then + ! expand [ip,jp] to contain all row entries. + do + if (ip < 2) exit + if (a%ia1(ip-1) == irw) then + ip = ip -1 + else + exit + end if + end do + + end if + + do + call ibsrch(jp,lrw,nza,a%ia1) + if (jp /= -1) exit + lrw = lrw - 1 + if (irw > lrw) then + write(0,*) 'Warning : did not find any rows. Is this an error?' + exit + end if + end do + + if (jp /= -1) then + ! expand [ip,jp] to contain all row entries. + do + if (jp == nza) exit + if (a%ia1(jp+1) == lrw) then + jp = jp + 1 + else + exit + end if + end do + end if + if ((ip /= -1) .and.(jp /= -1)) then + ! Now do the copy. + nz = jp - ip +1 + if (size(b%ia1) < nzb+nz) then + call psb_spreall(b,nzb+nz,iret) + endif + b%fida='COO' + if (associated(iren)) then + do i=ip,jp + nzb = nzb + 1 + b%aspk(nzb) = a%aspk(i) + b%ia1(nzb) = iren(a%ia1(i)) + b%ia2(nzb) = iren(a%ia2(i)) + enddo + else + do i=ip,jp + nzb = nzb + 1 + b%aspk(nzb) = a%aspk(i) + b%ia1(nzb) = a%ia1(i) + b%ia2(nzb) = a%ia2(i) + enddo + end if + end if + + else + + nz = (nza*(lrw-irw+1))/max(a%m,1) + + if (size(b%ia1) < nzb+nz) then + call psb_spreall(b,nzb+nz,iret) + endif + + if (associated(iren)) then + k = 0 + do i=1,a%infoa(nnz_) + if ((a%ia1(i)>=irw).and.(a%ia1(i)<=lrw)) then + k = k + 1 + if (k > nz) then + nz = k + call psb_spreall(b,nzb+nz,iret) + end if + b%aspk(nzb+k) = a%aspk(i) + b%ia1(nzb+k) = iren(a%ia1(i)) + b%ia2(nzb+k) = iren(a%ia2(i)) + endif + enddo + else + k = 0 + do i=1,a%infoa(nnz_) + if ((a%ia1(i)>=irw).and.(a%ia1(i)<=lrw)) then + k = k + 1 + if (k > nz) then + nz = k + call psb_spreall(b,nzb+nz,iret) + end if + b%aspk(nzb+k) = a%aspk(i) + b%ia1(nzb+k) = (a%ia1(i)) + b%ia2(nzb+k) = (a%ia2(i)) + endif + enddo + end if + end if + + b%infoa(nnz_) = nzb + k + b%m = b%m+lrw-irw+1 + b%k = max(b%k,a%k) + end subroutine coo_dspgtrow + +end subroutine psb_dspgtrow + diff --git a/src/serial/psb_dspinfo.f90 b/src/serial/psb_dspinfo.f90 new file mode 100644 index 00000000..82a93b09 --- /dev/null +++ b/src/serial/psb_dspinfo.f90 @@ -0,0 +1,139 @@ +! File: psb_dspinfo.f90 +! Subroutine: +! Parameters: + +!***************************************************************************** +!* * +!* Extract info from sparse matrix A. The required info is always a single * +!* integer. Input FIDA might be anything, once * +!* we get to actually write the code..... * +!* * +!***************************************************************************** +subroutine psb_dspinfo(ireq,a,ires,info,iaux) + use psb_spmat_type + use psb_const_mod + use psb_error_mod + implicit none + + type(psb_dspmat_type), intent(in) :: a + integer, intent(in) :: ireq + integer, intent(out) :: ires, info + integer, intent(in), optional :: iaux + + integer :: i,j,k,ip,jp,nr,irw,nz, err_act + character(len=20) :: name, ch_err + + name='psb_dspinfo' + info = 0 + call psb_erractionsave(err_act) + + + if (ireq == nztotreq) then + if (a%fida == 'CSR') then + nr = a%m + ires = a%ia2(nr+1)-1 + else if ((a%fida == 'COO').or.(a%fida == 'COI')) then + ires = a%infoa(nnz_) + else if (a%fida == 'JAD') then + ires=-1 + info=135 + ch_err=a%fida(1:3) + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + else + ires=-1 + info=136 + ch_err=a%fida(1:3) + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + else if (ireq == nzrowreq) then + if (.not.present(iaux)) then + write(0,*) 'Need IAUX when ireq=nzrowreq' + ires=-1 + return + endif + irw = iaux + if (a%fida == 'CSR') then + ires = a%ia2(irw+1)-a%ia2(irw) + else if ((a%fida == 'COO').or.(a%fida == 'COI')) then + + if (a%infoa(srtd_) == isrtdcoo) then +!!$ write(0,*) 'Gtrow_: srtd coo',irw + ! In this case we can do a binary search. + nz = a%infoa(nnz_) + call ibsrch(ip,irw,nz,a%ia1) + jp = ip + ! expand [ip,jp] to contain all row entries. + do + if (ip < 2) exit + if (a%ia1(ip-1) == irw) then + ip = ip -1 + else + exit + end if + end do + + do + if (jp > nz) exit + if (a%ia1(jp) == irw) then + jp =jp + 1 + else + exit + endif + end do + ires = jp-ip + else + ires = count(a%ia1(1:a%infoa(nnz_))==irw) + endif +!!$ ires = 0 +!!$ do i=1, a%infoa(nnz_) +!!$ if (a%ia1(i) == irw) ires = ires + 1 +!!$ enddo + else if (a%fida == 'JAD') then + ires=-1 + info=135 + ch_err=a%fida(1:3) + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + else + ires=-1 + info=136 + ch_err=a%fida(1:3) + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + else if (ireq == nzsizereq) then + if (a%fida == 'CSR') then + ires = size(a%aspk) + else if ((a%fida == 'COO').or.(a%fida == 'COI')) then + ires = size(a%aspk) + else if (a%fida == 'JAD') then + ires = a%infoa(nnz_) + else + ires=-1 + info=136 + ch_err=a%fida(1:3) + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + else + write(0,*) 'Unknown request into SPINFO' + ires=-1 + endif + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end subroutine psb_dspinfo diff --git a/src/serial/psb_dspscal.f90 b/src/serial/psb_dspscal.f90 new file mode 100644 index 00000000..cdfe009f --- /dev/null +++ b/src/serial/psb_dspscal.f90 @@ -0,0 +1,67 @@ +! File: psb_dspscal.f90 +! Subroutine: +! Parameters: + +!***************************************************************************** +!* * +!* * +!***************************************************************************** +subroutine psb_dspscal(a,d,info) + ! the input format + use psb_spmat_type + use psb_error_mod + use psb_const_mod + implicit none + + type(psb_dspmat_type), intent(inout) :: a + integer, intent(out) :: info + real(kind(1.d0)), intent(in) :: d(:) + + integer :: i,j,k,nr, nz,err_act + character(len=20) :: name, ch_err + + name='psb_dspscal' + info = 0 + call psb_erractionsave(err_act) + + + if (a%fida == 'CSR') then + + do i=1, a%m + do j=a%ia2(i),a%ia2(i+1)-1 + a%aspk(j) = a%aspk(j) * d(i) + end do + end do + + else if (a%fida == 'COO') then + + do i=1,a%infoa(nnz_) + j=a%ia1(i) + a%aspk(i) = a%aspk(i) * d(j) + enddo + + else if (a%fida == 'JAD') then + info=135 + ch_err=a%fida(1:3) + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + else + info=136 + ch_err=a%fida(1:3) + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end subroutine psb_dspscal + diff --git a/src/serial/psb_dsymbmm.f90 b/src/serial/psb_dsymbmm.f90 new file mode 100644 index 00000000..527b10ac --- /dev/null +++ b/src/serial/psb_dsymbmm.f90 @@ -0,0 +1,39 @@ +! File: psb_dsymbmm.f90 +! Subroutine: +! Parameters: + +subroutine psb_dsymbmm(a,b,c) + use psb_spmat_type + implicit none + + type(psb_dspmat_type) :: a,b,c + integer, allocatable :: itemp(:) + integer :: nze,info + + interface + subroutine symbmm (n, m, l, ia, ja, diaga, & + & ib, jb, diagb, ic, jc, diagc, index) + integer n,m,l, ia(*), ja(*), diaga, ib(*), jb(*), diagb,& + & diagc, index(*) + integer, pointer :: ic(:),jc(:) + end subroutine symbmm + end interface + + if (b%m /= a%k) then + write(0,*) 'Mismatch in SYMBMM: ',a%m,a%k,b%m,b%k + endif + allocate(itemp(max(a%m,a%k,b%m,b%k)),stat=info) + nze = max(a%m+1,2*a%m) + call psb_spreall(c,nze,info) +!!$ write(0,*) 'SYMBMM90 ',size(c%pl),size(c%pr) + call symbmm(a%m,a%k,b%k,a%ia2,a%ia1,0,& + & b%ia2,b%ia1,0,& + & c%ia2,c%ia1,0,itemp) + c%pl(1) = 0 + c%pr(1) = 0 + c%m=a%m + c%k=b%k + c%fida='CSR' + deallocate(itemp) + return +end subroutine psb_dsymbmm diff --git a/src/serial/psb_dtransp.f90 b/src/serial/psb_dtransp.f90 new file mode 100644 index 00000000..6511b91d --- /dev/null +++ b/src/serial/psb_dtransp.f90 @@ -0,0 +1,59 @@ +! File: psb_dtransp.f90 +! Subroutine: +! Parameters: + +subroutine psb_dtransp(a,b,c,fmt) + use psb_spmat_type + use psb_serial_mod, only : ipcoo2csr, ipcsr2coo, fixcoo + implicit none + + type(psb_dspmat_type) :: a,b + integer, optional :: c + character(len=*), optional :: fmt + + character(len=5) :: fmt_ + integer ::c_, info, nz + integer, pointer :: itmp(:)=>null() + if (present(c)) then + c_=c + else + c_=1 + endif + if (present(fmt)) then + fmt_ = fmt + else + fmt_='CSR' + endif + if (associated(b%aspk)) call psb_spfree(b,info) + call psb_spclone(a,b,info) + + if (b%fida=='CSR') then + call psb_ipcsr2coo(b,info) + else if (b%fida=='COO') then + ! do nothing + else + write(0,*) 'Unimplemented case in TRANSP ' + endif +!!$ nz = b%infoa(nnz_) +!!$ write(0,*) 'TRANSP CHECKS:',a%m,a%k,& +!!$ &minval(b%ia1(1:nz)),maxval(b%ia1(1:nz)),& +!!$ &minval(b%ia2(1:nz)),maxval(b%ia2(1:nz)) + itmp => b%ia1 + b%ia1 => b%ia2 + b%ia2 => itmp + + b%m = a%k + b%k = a%m +!!$ write(0,*) 'Calling IPCOO2CSR from transp90 ',b%m,b%k + if (fmt_=='CSR') then + call psb_ipcoo2csr(b,info) + b%fida='CSR' + else if (fmt_=='COO') then + call psb_fixcoo(b,info) + b%fida='COO' + else + write(0,*) 'Unknown FMT in TRANSP : "',fmt_,'"' + endif + + return +end subroutine psb_dtransp diff --git a/src/serial/psbdcoins.f90 b/src/serial/psbdcoins.f90 new file mode 100644 index 00000000..a2a6319c --- /dev/null +++ b/src/serial/psbdcoins.f90 @@ -0,0 +1,220 @@ +! File: psbdcoins.f90 + ! Subroutine: + ! Parameters: +subroutine psb_dcoins(nz,ia,ja,val,a,gtl,imin,imax,jmin,jmax,info) + use typesp + use tools_const + use realloc + use string + use errormod + use f90serial, only : spinfo + implicit none + + integer, intent(in) :: nz, imin,imax,jmin,jmax + integer, intent(in) :: ia(:),ja(:),gtl(:) + real(kind(1.d0)), intent(in) :: val(:) + type(d_spmat), intent(inout) :: a + integer, intent(out) :: info + + character(len=5) :: ufida + integer :: i,j,ir,ic,nr,nc, ng, nza, isza,spstate, nnz,& + & ip1, nzl, err_act, int_err(5) + integer, parameter :: izero=0 + logical, parameter :: debug=.true. + character(len=20) :: name, ch_err + + name='psbdcoins' + info = 0 + call psb_erractionsave(err_act) + + info = 0 + if (nz <= 0) then + info = 10 + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = 35 + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = 35 + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = 35 + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + +!!$ ufida = toupper(a%fida) + call touppers(a%fida,ufida) + ng = size(gtl) + spstate = a%infoa(state_) + + select case(spstate) + case(spmat_bld) + if ((ufida /= 'COO').and.(ufida/='COI')) then + info = 134 + ch_err(1:3)=ufida(1:3) + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + call spinfo(nztotreq,a,nza,info) + call spinfo(nzsizereq,a,isza,info) + if(info.ne.izero) then + info=4010 + ch_err='spinfo' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + endif + + if ((nza+nz)>isza) then + call spreall(a,nza+nz,info) + if(info.ne.izero) then + info=4010 + ch_err='spreall' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + endif + endif + call psb_inner_ins(nz,ia,ja,val,nza,a%ia1,a%ia2,a%aspk,gtl,& + & imin,imax,jmin,jmax,info) + if(info.ne.izero) then + info=4010 + ch_err='psb_inner_ins' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + endif + if (debug) then + if ((nza - a%infoa(nnz_)) /= nz) then + write(0,*) 'PSB_COINS: insert discarded items ' + end if + end if + if ((nza - a%infoa(nnz_)) /= nz) then + a%infoa(del_bnd_) = nza + endif + a%infoa(nnz_) = nza + + case(spmat_upd) + + if (ibits(a%infoa(upd_),2,1).eq.1) then + ip1 = a%infoa(upd_pnt_) + nza = a%ia2(ip1+nnz_) + nzl = a%infoa(del_bnd_) + + call psb_inner_upd(nz,ia,ja,val,nza,a%aspk,gtl,& + & imin,imax,jmin,jmax,nzl,info) + if(info.ne.izero) then + info=4010 + ch_err='psb_inner_upd' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + endif +!!$ if (debug) then +!!$ if ((nza - a%ia2(ip1+nnz_)) /= nz) then +!!$ write(0,*) 'PSB_COINS: update discarded items ' +!!$ end if +!!$ end if + + a%ia2(ip1+nnz_) = nza + else + info = 2231 + goto 9999 + endif + + case default + info = 2232 + call psb_errpush(info,name) + goto 9999 + end select + return + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +contains + subroutine psb_inner_upd(nz,ia,ja,val,nza,aspk,gtl,imin,imax,jmin,jmax,nzl,info) + implicit none + + integer, intent(in) :: nz, imin,imax,jmin,jmax,nzl + integer, intent(in) :: ia(*),ja(*),gtl(*) + integer, intent(inout) :: nza + real(kind(1.d0)), intent(in) :: val(*) + real(kind(1.d0)), intent(inout) :: aspk(*) + integer, intent(out) :: info + integer :: i,ir,ic + + info = 0 + + if (nza >= nzl) then + do i=1, nz + nza = nza + 1 + a%aspk(nza) = val(i) + end do + else + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir >=1).and.(ir<=ng).and.(ic>=1).and.(ic<=ng)) then + ir = gtl(ir) + ic = gtl(ic) + if ((ir >=imin).and.(ir<=imax).and.(ic>=jmin).and.(ic<=jmax)) then + nza = nza + 1 + a%aspk(nza) = val(i) + end if + end if + end do + end if + + end subroutine psb_inner_upd + + subroutine psb_inner_ins(nz,ia,ja,val,nza,ia1,ia2,aspk,gtl,& + & imin,imax,jmin,jmax,info) + implicit none + + integer, intent(in) :: nz, imin,imax,jmin,jmax + integer, intent(in) :: ia(*),ja(*),gtl(*) + integer, intent(inout) :: nza,ia1(*),ia2(*) + real(kind(1.d0)), intent(in) :: val(*) + real(kind(1.d0)), intent(inout) :: aspk(*) + integer, intent(out) :: info + + integer :: i,ir,ic + + info = 0 + do i=1, nz + ir = ia(i) + ic = ja(i) + + if ((ir >=1).and.(ir<=ng).and.(ic>=1).and.(ic<=ng)) then + ir = gtl(ir) + ic = gtl(ic) + if ((ir >=imin).and.(ir<=imax).and.(ic>=jmin).and.(ic<=jmax)) then + nza = nza + 1 + a%ia1(nza) = ir + a%ia2(nza) = ic + a%aspk(nza) = val(i) + end if + end if + end do + + end subroutine psb_inner_ins +end subroutine psb_dcoins + diff --git a/src/serial/string_impl.f90 b/src/serial/string_impl.f90 new file mode 100644 index 00000000..7928a4c0 --- /dev/null +++ b/src/serial/string_impl.f90 @@ -0,0 +1,49 @@ +function tolowerc(string) + character(len=*), intent(in) :: string + character(len=len(string)) :: tolowerc + character(len=*), parameter :: lcase='abcdefghijklmnopqrstuvwxyz' + character(len=*), parameter :: ucase='ABCDEFGHIJKLMNOPQRSTUVWXYZ' + integer :: i,k + + do i=1,len(string) + k = index(ucase,string(i:i)) + if (k /=0 ) then + tolowerc(i:i) = lcase(k:k) + else + tolowerc(i:i) = string(i:i) + end if + enddo +end function tolowerc +function toupperc(string) + character(len=*), intent(in) :: string + character(len=len(string)) :: toupperc + character(len=*), parameter :: lcase='abcdefghijklmnopqrstuvwxyz' + character(len=*), parameter :: ucase='ABCDEFGHIJKLMNOPQRSTUVWXYZ' + integer :: i,k + + do i=1,len(string) + k = index(lcase,string(i:i)) + if (k /=0 ) then + toupperc(i:i) = ucase(k:k) + else + toupperc(i:i) = string(i:i) + end if + enddo +end function toupperc + +subroutine sub_toupperc(string,strout) + character(len=*), intent(in) :: string + character(len=*), intent(out) :: strout + character(len=*), parameter :: lcase='abcdefghijklmnopqrstuvwxyz' + character(len=*), parameter :: ucase='ABCDEFGHIJKLMNOPQRSTUVWXYZ' + integer :: i,k + + do i=1,len(string) + k = index(lcase,string(i:i)) + if (k /=0 ) then + strout(i:i) = ucase(k:k) + else + strout(i:i) = string(i:i) + end if + enddo +end subroutine sub_toupperc diff --git a/src/tools/Makefile b/src/tools/Makefile new file mode 100644 index 00000000..851a2681 --- /dev/null +++ b/src/tools/Makefile @@ -0,0 +1,24 @@ +include ../../Make.inc + +FOBJS = psb_dallc.o psb_dasb.o psb_dcsrp.o psb_descprt.o \ + psb_dfree.o psb_dgelp.o psb_dins.o psb_dptins.o \ + psb_dscall.o psb_dscalv.o psb_dscasb.o psb_dsccpy.o \ + psb_dscdec.o psb_dscfree.o psb_dscins.o psb_dscov.o \ + psb_dscren.o psb_dscrep.o psb_dspalloc.o psb_dspasb.o \ + psb_dspcnv.o psb_dspfree.o psb_dspins.o psb_dsprn.o \ + psb_dspupdate.o psb_glob_to_loc.o psb_ialloc.o psb_iasb.o \ + psb_ifree.o psb_iins.o psb_loc_to_glob.o psb_ptasb.o + +MPFOBJS = psb_descasb.o psb_dcsrovr.o + +INCDIRS = ../../lib . + +lib: mpfobjs $(FOBJS) + + +mpfobjs: + (make $(MPFOBJS) F90="$(MPF90)" FC="$(MPF90)" FCOPT="$(F90COPT)") + + +clean: + /bin/rm -f $(MPFOBJS) $(FOBJS) diff --git a/src/tools/psb_dallc.f90 b/src/tools/psb_dallc.f90 new file mode 100644 index 00000000..efd73427 --- /dev/null +++ b/src/tools/psb_dallc.f90 @@ -0,0 +1,291 @@ +! File: psb_dallc.f90 +! +! Function: psb_dalloc +! Allocates dense matrix for PSBLAS routines +! +! Parameters: +! m - number of rows. +! n - number of columns. +! x - the matrix to be allocated. +! desc_a - the communication descriptor. +! info - eventually returns an error code +! js - (optional) the starting column +subroutine psb_dalloc(m, n, x, desc_a, info, js) + !....allocate dense matrix for psblas routines..... + use psb_descriptor_type + use psb_const_mod + use psb_error_mod + + implicit none + + !....parameters... + integer, intent(in) :: m,n + real(kind(1.d0)), pointer :: x(:,:) + type(psb_desc_type), intent(inout) :: desc_a + integer :: info + integer, optional, intent(in) :: js + + !locals + integer :: nprow,npcol,myrow,mycol,err,n_col,n_row,i,j,jj,err_act + integer :: icontxt,dectype + integer :: int_err(5),temp(1),exch(3) + real(kind(1.d0)) :: real_err(5) + integer, allocatable:: prc_v(:) + character(len=20) :: name, ch_err + + info=0 + err=0 + int_err(1)=0 + call psb_erractionsave(err_act) + + icontxt=desc_a%matrix_data(psb_ctxt_) + + call blacs_gridinfo(icontxt, nprow, npcol, myrow, mycol) + ! ....verify blacs grid correctness.. + if (nprow.eq.-1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol.ne.1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name,int_err) + goto 9999 + endif + + dectype=desc_a%matrix_data(psb_dec_type_) + !... check m and n parameters.... + if (m.lt.0) then + info = 10 + int_err(1) = 1 + int_err(2) = m + call psb_errpush(info,name,int_err) + goto 9999 + else if (n.lt.0) then + info = 10 + int_err(1) = 2 + int_err(2) = n + call psb_errpush(info,name,int_err) + else if (.not.psb_is_ok_dec(dectype)) then + info = 3110 + call psb_errpush(info,name) + goto 9999 + else if (m.ne.desc_a%matrix_data(psb_n_)) then + info = 300 + int_err(1) = 1 + int_err(2) = m + int_err(3) = 4 + int_err(4) = psb_n_ + int_err(5) = desc_a%matrix_data(psb_n_) + call psb_errpush(info,name,int_err) + goto 9999 + endif + + if (present(js)) then + j=js + else + j=1 + endif + !global check on m and n parameters + if (myrow.eq.root) then + exch(1)=m + exch(2)=n + exch(3)=j + call igebs2d(icontxt,psb_all_,psb_topdef_, ithree,ione, exch, ithree) + else + call igebr2d(icontxt,psb_all_,psb_topdef_, ithree,ione, exch, ithree, root, 0) + if (exch(1).ne.m) then + info=550 + int_err(1)=1 + call psb_errpush(info,name,int_err) + goto 9999 + else if (exch(2).ne.n) then + info=550 + int_err(1)=2 + call psb_errpush(info,name,int_err) + goto 9999 + else if (exch(3).ne.j) then + info=550 + int_err(1)=3 + call psb_errpush(info,name,int_err) + goto 9999 + endif + endif + + !....allocate x ..... + if (psb_is_asb_dec(dectype).or.psb_is_upd_dec(dectype)) then + n_col = max(1,desc_a%matrix_data(psb_n_col_)) + allocate(x(n_col,j:j+n-1),stat=info) +! call sprealloc(n_col,j:j+n-1,x,info) + if (info.ne.0) then + info=4010 + ch_err='psb_sprealloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + endif + do jj=j,j+n-1 + do i=1,n_col + x(i,j) = 0.0d0 + end do + end do + else if (psb_is_bld_dec(dectype)) then + n_row = max(1,desc_a%matrix_data(psb_n_row_)) + allocate(x(n_row,j:j+n-1),stat=info) +! call sprealloc(n_row,j:j+n-1,x,info) + if (info.ne.0) then + info=4010 + ch_err='psb_sprealloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + endif + do jj=j,j+n-1 + do i=1,n_row + x(i,j) = 0.0d0 + end do + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return + +end subroutine psb_dalloc + + + +! Function: psb_dallocv +! Allocates dense matrix for PSBLAS routines +! +! Parameters: +! m - number of rows. +! x - the matrix to be allocated. +! desc_a - the communication descriptor. +! info - eventually returns an error code +subroutine psb_dallocv(m, x, desc_a,info) + !....allocate sparse matrix structure for psblas routines..... + use psb_descriptor_type + use psb_const_mod + use psb_realloc_mod + use psb_error_mod + + implicit none + + !....parameters... + integer, intent(in) :: m + real(kind(1.d0)), pointer :: x(:) + type(psb_desc_type), intent(in):: desc_a + integer :: info + + !locals + integer :: nprow,npcol,myrow,mycol,err,n_col,n_row,dectype,i,err_act + integer :: icontxt + integer :: int_err(5),temp(1),exch(2) + real(kind(1.d0)) :: real_err(5) + logical, parameter :: debug=.false. + character(len=20) :: name, ch_err + + info=0 + call psb_erractionsave(err_act) + + icontxt=desc_a%matrix_data(psb_ctxt_) + + call blacs_gridinfo(icontxt, nprow, npcol, myrow, mycol) + ! ....verify blacs grid correctness.. + if (nprow.eq.-1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol.ne.1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name,int_err) + goto 9999 + endif + + dectype=desc_a%matrix_data(psb_dec_type_) + if (debug) write(0,*) 'dall: dectype',dectype + if (debug) write(0,*) 'dall: is_ok? dectype',psb_is_ok_dec(dectype) + !... check m and n parameters.... + if (m.lt.0) then + info = 10 + int_err(1) = 1 + int_err(2) = m + call psb_errpush(info,name,int_err) + goto 9999 + else if (.not.psb_is_ok_dec(dectype)) then + info = 3110 + call psb_errpush(info,name) + goto 9999 + else if (m.ne.desc_a%matrix_data(psb_n_)) then + info = 300 + int_err(1) = 1 + int_err(2) = m + int_err(3) = 4 + int_err(4) = psb_n_ + int_err(5) = desc_a%matrix_data(psb_n_) + call psb_errpush(info,name,int_err) + goto 9999 + endif + + !global check on m and n parameters + if (myrow.eq.root) then + exch(1) = m + call igebs2d(icontxt,psb_all_,psb_topdef_, ione,ione, exch, ione) + else + call igebr2d(icontxt,psb_all_,psb_topdef_, ione,ione, exch, ione, root, 0) + if (exch(1) .ne. m) then + info = 550 + int_err(1) = 1 + call psb_errpush(info,name,int_err) + goto 9999 + endif + endif + + !....allocate x ..... + if (psb_is_asb_dec(dectype).or.psb_is_upd_dec(dectype)) then + n_col = max(1,desc_a%matrix_data(psb_n_col_)) + call psb_realloc(n_col,x,info) + if (info.ne.0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + endif + do i=1,n_col + x(i) = 0.0d0 + end do + + else if (psb_is_bld_dec(dectype)) then + n_row = max(1,desc_a%matrix_data(psb_n_row_)) + call psb_realloc(n_row,x,info) + if (info.ne.0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + endif + do i=1,n_row + x(i) = 0.0d0 + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return + +end subroutine psb_dallocv + diff --git a/src/tools/psb_dasb.f90 b/src/tools/psb_dasb.f90 new file mode 100644 index 00000000..6aa6c69a --- /dev/null +++ b/src/tools/psb_dasb.f90 @@ -0,0 +1,217 @@ +! File: psb_dasb.f90 +! +! Subroutine: psb_dasb +! Assembles a dense matrix for PSBLAS routines +! +! Parameters: +! x - real,pointer(dim=2). The matrix to be assembled. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code +subroutine psb_dasb(x, desc_a, info) + !....assembly dense matrix x ..... + use psb_descriptor_type + use psb_const_mod + use psb_psblas_mod + use psb_error_mod + implicit none + + type(psb_desc_type), intent(in) :: desc_a + real(kind(1.d0)), pointer :: x(:,:) + integer, intent(out) :: info + + ! local variables + integer :: err, icontxt,nprow,npcol,me,mypcol,temp,lwork,nrow,ncol, err_act + real(kind(1.d0)),pointer :: dtemp(:,:) + integer :: int_err(5), i1sz, i2sz, dectype, i,j + double precision :: real_err(5) + integer, parameter :: ione=1 + real(kind(1.d0)),parameter :: one=1 + logical, parameter :: debug=.false. + character(len=20) :: name, ch_err + + info=0 + name='psb_dasb' + call psb_erractionsave(err_act) + + icontxt=desc_a%matrix_data(psb_ctxt_) + dectype=desc_a%matrix_data(psb_dec_type_) + + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + + if ((.not.associated(desc_a%matrix_data))) then + info=3110 + call psb_errpush(info,name) + goto 9999 + endif + + if (debug) write(*,*) 'asb start: ',nprow,npcol,me,& + &desc_a%matrix_data(psb_dec_type_) + ! ....verify blacs grid correctness.. + if (nprow.eq.-1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol.ne.1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name,i_err=int_err) + goto 9999 + else if (.not.psb_is_asb_dec(dectype)) then + if (debug) write(*,*) 'asb error ',& + &dectype + info = 3110 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + ! check size + icontxt=desc_a%matrix_data(psb_ctxt_) + nrow=desc_a%matrix_data(psb_n_row_) + ncol=desc_a%matrix_data(psb_n_col_) + i1sz = size(x,dim=1) + i2sz = size(x,dim=2) + if (debug) write(*,*) 'asb: ',i1sz,i2sz,nrow,ncol + if (i1sz.lt.ncol) then + allocate(dtemp(ncol,i2sz),stat=info) + if (info.ne.0) then + info=2025 + int_err(1)=ncol + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + do j=1,size(x,2) + do i=1,nrow + dtemp(i,j) = x(i,j) + end do + end do + + deallocate(x) + x => dtemp + endif + + ! ..update halo elements.. + call psb_halo(x,desc_a,info) + if(info /= 0) then + info=4010 + ch_err='psb_halo' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_ret) then + return + else + call psb_error(icontxt) + end if + return + +end subroutine psb_dasb + + + +! Subroutine: psb_dasb +! Assembles a dense matrix for PSBLAS routines +! +! Parameters: +! x - real,pointer(dim=1). The matrix to be assembled. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code +subroutine psb_dasbv(x, desc_a, info) + !....assembly dense matrix x ..... + use psb_descriptor_type + use psb_const_mod + use psb_psblas_mod + use psb_error_mod + implicit none + + type(psb_desc_type), intent(in) :: desc_a + real(kind(1.d0)), pointer :: x(:) + integer, intent(out) :: info + + ! local variables + integer :: err, icontxt,nprow,npcol,me,mypcol,temp,lwork + integer :: int_err(5), i1sz,nrow,ncol, dectype, i, err_act + real(kind(1.d0)),pointer :: dtemp(:) + double precision :: real_err(5) + integer, parameter :: ione=1 + real(kind(1.d0)),parameter :: one=1 + logical, parameter :: debug=.false. + character(len=20) :: name,ch_err + + info = 0 + int_err(1) = 0 + name = 'psb_dasbv' + + icontxt=desc_a%matrix_data(psb_ctxt_) + dectype=desc_a%matrix_data(psb_dec_type_) + + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + + ! ....verify blacs grid correctness.. + if (nprow.eq.-1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol.ne.1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name,i_err=int_err) + goto 9999 + else if (.not.psb_is_asb_dec(dectype)) then + info = 3110 + call psb_errpush(info,name) + goto 9999 + endif + + nrow=desc_a%matrix_data(psb_n_row_) + ncol=desc_a%matrix_data(psb_n_col_) + if (debug) write(*,*) name,' sizes: ',nrow,ncol + i1sz = size(x) + if (debug) write(*,*) 'dasb: sizes ',i1sz,ncol + if (i1sz.lt.ncol) then + allocate(dtemp(ncol),stat=info) + if (info.ne.0) then + info=2025 + int_err(1)=ncol + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + do i=1,nrow + dtemp(i) = x(i) + end do + deallocate(x) + x => dtemp + endif + + ! ..update halo elements.. + call psb_halo(x,desc_a,info) + if(info /= 0) then + info=4010 + ch_err='f90_pshalo' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_ret) then + return + else + call psb_error(icontxt) + end if + return + +end subroutine psb_dasbv + diff --git a/src/tools/psb_dcsrovr.f90 b/src/tools/psb_dcsrovr.f90 new file mode 100644 index 00000000..4260a9f9 --- /dev/null +++ b/src/tools/psb_dcsrovr.f90 @@ -0,0 +1,322 @@ +! File: psb_dcsrovr.f90 +! +!***************************************************************************** +!* * +!* This routine does the retrieval of remote matrix rows. * +!* Note that retrieval is done through GTROW, therefore it should work * +!* for any format. * +!* Currently the output is BLK%FIDA='CSR' but it would take little * +!* work to change that; the pieces are transferred in COO format * +!* thus we would only need a DCSDP at the end to exit in whatever format * +!* is needed. * +!* But I'm feeling soooooo lazy today...... * +!* * +!* * +!* * +!* * +!***************************************************************************** +Subroutine psb_dcsrovr(a,desc_a,blk,info,rwcnv,clcnv,outfmt) + + use psb_serial_mod + use psb_descriptor_type + Use psb_prec_type + use psb_realloc_mod + use psb_tools_mod, only : psb_glob_to_loc, psb_loc_to_glob + use psb_error_mod + + Implicit None + + include 'mpif.h' + Type(psb_dspmat_type),Intent(in) :: a + Type(psb_dspmat_type),Intent(inout) :: blk + Type(psb_desc_type),Intent(in) :: desc_a + integer, intent(out) :: info + logical, optional, intent(in) :: rwcnv,clcnv + character(len=5), optional :: outfmt + !c ...local scalars.... + Integer :: nprow,npcol,me,mycol,counter,proc,n,i,& + & n_el_send,k,n_el_recv,icontxt, idx, r, tot_elem,& + & n_elem, m, j, ipx,mat_recv, iszs, iszr,& + & idxs,idxr, nrv, nsd,nz + Type(psb_dspmat_type) :: tmp + Integer :: l1,ircode, icomm, err_act + Integer,Pointer :: wrk(:), sdid(:,:), brvindx(:),rvid(:,:), & + & rvsz(:), bsdindx(:),sdsz(:) + logical :: rwcnv_,clcnv_ + character(len=5) :: outfmt_ + Logical,Parameter :: debug=.false., usea2av=.true. + real(kind(1.d0)) :: t1,t2,t3,t4,t5,t6,t7,t8,t9 + character(len=20) :: name, ch_err + + info=0 + name='psb_dcsrovr' + call psb_erractionsave(err_act) + + if(debug) write(0,*)'Inside DCSROVR' + if (present(rwcnv)) then + rwcnv_ = rwcnv + else + rwcnv_ = .true. + endif + if (present(clcnv)) then + clcnv_ = clcnv + else + clcnv_ = .true. + endif + + if (present(outfmt)) then + call touppers(outfmt,outfmt_) + else + outfmt_ = 'CSR' + endif + + icontxt=desc_a%matrix_data(psb_ctxt_) + Call blacs_gridinfo(icontxt,nprow,npcol,me,mycol) + + t1 = mpi_wtime() + Allocate(sdid(nprow,3),rvid(nprow,3),brvindx(nprow+1),& + & rvsz(nprow),sdsz(nprow),bsdindx(nprow+1),stat=info) + + if (info /= 0) then + info=4000 + call psb_errpush(info,name) + goto 9999 + end if + + If (debug) Write(0,*)'dcsrovr',me + + + l1 = 0 + + sdsz(:)=0 + rvsz(:)=0 + ipx = 1 + brvindx(ipx) = 0 + bsdindx(ipx) = 0 + counter=1 + idx = 0 + idxs = 0 + idxr = 0 + blk%k = a%k + blk%m = 0 + ! For all rows in the halo descriptor, extract and send/receive. + Do + proc=desc_a%halo_index(counter) + if (proc == -1) exit + n_el_recv = desc_a%halo_index(counter+psb_n_elem_recv_) + counter = counter+n_el_recv + n_el_send = desc_a%halo_index(counter+psb_n_elem_send_) + tot_elem = 0 + Do j=0,n_el_send-1 + idx = desc_a%halo_index(counter+psb_elem_send_+j) + call psb_spinfo(psb_nzrowreq_,a,n_elem,info,iaux=idx) + if (info /= 0) then + info=4010 + ch_err='psb_spinfo' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + tot_elem = tot_elem+n_elem + Enddo + sdsz(proc+1) = tot_elem + + blk%m = blk%m + n_el_recv + + counter = counter+n_el_send+3 + Enddo + call blacs_get(icontxt,10,icomm) + + call mpi_alltoall(sdsz,1,mpi_integer,rvsz,1,mpi_integer,icomm,info) + if (info /= 0) then + info=4010 + ch_err='mpi_alltoall' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + idxs = 0 + idxr = 0 + counter = 1 + Do + proc=desc_a%halo_index(counter) + if (proc == -1) exit + n_el_recv = desc_a%halo_index(counter+psb_n_elem_recv_) + counter = counter+n_el_recv + n_el_send = desc_a%halo_index(counter+psb_n_elem_send_) + + bsdindx(proc+1) = idxs + idxs = idxs + sdsz(proc+1) + brvindx(proc+1) = idxr + idxr = idxr + rvsz(proc+1) + counter = counter+n_el_send+3 + Enddo + + iszr=sum(rvsz) + call psb_spreall(blk,max(iszr,1),info) + if(debug) write(0,*)me,'CSROVR Sizes:',size(blk%ia1),size(blk%ia2) + if (info /= 0) then + info=4010 + ch_err='psb_spreall' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + mat_recv = iszr + iszs=sum(sdsz) + call psb_spall(0,0,tmp,max(iszs,1),info) + tmp%fida='COO' + + t2 = mpi_wtime() + + l1 = 0 + ipx = 1 + counter=1 + idx = 0 + call psb_spreinit(tmp) + Do + proc=desc_a%halo_index(counter) + if (proc == -1) exit + n_el_recv=desc_a%halo_index(counter+psb_n_elem_recv_) + counter=counter+n_el_recv + n_el_send=desc_a%halo_index(counter+psb_n_elem_send_) + tot_elem=0 + + Do j=0,n_el_send-1 + idx = desc_a%halo_index(counter+psb_elem_send_+j) + call psb_spinfo(psb_nzrowreq_,a,n_elem,info,iaux=idx) + if (info /= 0) then + info=4010 + ch_err='spinfo' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if +!!$ write(0,*) me,'Getting row ',idx,n_elem + call psb_spgtrow(idx,a,tmp,info,append=.true.) + if (info /= 0) then + info=4010 + ch_err='psb_spgtrow' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + tot_elem=tot_elem+n_elem + Enddo + + ipx = ipx + 1 + + counter = counter+n_el_send+3 + Enddo + nz = tmp%infoa(psb_nnz_) +!!$ call csprt(20+me,tmp,head='% CSROVR border SEND .') +!!$ close(20+me) + + if (rwcnv_) call psb_loc_to_glob(tmp%ia1(1:nz),desc_a,info,iact='I') + if (clcnv_) call psb_loc_to_glob(tmp%ia2(1:nz),desc_a,info,iact='I') + if (info /= 0) then + info=4010 + ch_err='psb_loc_to_glob' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if +!!$ call csprt(30+me,tmp,head='% CSROVR border SEND .') +!!$ close(30+me) + + + call mpi_alltoallv(tmp%aspk,sdsz,bsdindx,mpi_double_precision,& + & blk%aspk,rvsz,brvindx,mpi_double_precision,icomm,info) + call mpi_alltoallv(tmp%ia1,sdsz,bsdindx,mpi_integer,& + & blk%ia1,rvsz,brvindx,mpi_integer,icomm,info) + call mpi_alltoallv(tmp%ia2,sdsz,bsdindx,mpi_integer,& + & blk%ia2,rvsz,brvindx,mpi_integer,icomm,info) + if (info /= 0) then + info=4010 + ch_err='mpi_alltoallv' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + t3 = mpi_wtime() + + + ! + ! Convert into local numbering + ! + if (rwcnv_) call psb_glob_to_loc(blk%ia1(1:iszr),desc_a,info,iact='I') + if (clcnv_) call psb_glob_to_loc(blk%ia2(1:iszr),desc_a,info,iact='I') + if (info /= 0) then + info=4010 + ch_err='psbglob_to_loc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + l1 = 0 + Do i=1,iszr +!!$ write(0,*) work5(i),work6(i) + r=(blk%ia1(i)) + k=(blk%ia2(i)) + If (k.Gt.0) Then + l1=l1+1 + blk%aspk(l1) = blk%aspk(i) + blk%ia1(l1) = r + blk%ia2(l1) = k + blk%k = max(blk%k,k) + End If + Enddo + blk%fida='COO' + blk%infoa(psb_nnz_)=l1 +!!$ open(50+me) +!!$ call csprt(50+me,blk,head='% CSROVR border .') +!!$ close(50+me) + t4 = mpi_wtime() + + if(debug) Write(0,*)me,'End first loop',counter,l1,blk%m + + ! + ! Combined sort & conversion to CSR. + ! + if(debug) write(0,*) me,'Calling ipcoo2csr from dcsrovr ',blk%m,blk%k,l1,blk%ia2(2) + + select case(outfmt_) + case ('CSR') + call psb_ipcoo2csr(blk,info,rwshr=.true.) + if (info /= 0) then + info=4010 + ch_err='psb_ipcoo2csr' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + case('COO') + ! Do nothing! + case default + write(0,*) 'Error in DCSROVR : invalid outfmt "',outfmt_,'"' + end select + t5 = mpi_wtime() + + + +!!$ write(0,'(i3,1x,a,4(1x,i14))') me,'DCSROVR sizes:',iszr,iszs +!!$ write(0,'(i3,1x,a,4(1x,g14.5))') me,'DCSROVR timings:',t6-t2,t7-t6,t8-t7,t3-t8 +!!$ write(0,'(i3,1x,a,4(1x,g14.5))') me,'DCSROVR timings:',t2-t1,t3-t2,t4-t3,t5-t4 + + Deallocate(sdid,brvindx,rvid,bsdindx,rvsz,sdsz,stat=info) + + call psb_spfree(tmp,info) + if (info /= 0) then + info=4010 + ch_err='psb_spfree' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return + +End Subroutine psb_dcsrovr diff --git a/src/tools/psb_dcsrp.f90 b/src/tools/psb_dcsrp.f90 new file mode 100644 index 00000000..0a806b47 --- /dev/null +++ b/src/tools/psb_dcsrp.f90 @@ -0,0 +1,169 @@ +! File: psb_dcsrp.f90 +! +! Subroutine: psb_dcsrp +! Apply a right permutation to a sparse matrix, i.e. permute the column +! indices. +! +! Parameters: +! trans - character. Whether iperm or its transpose should be applied +! iperm - integer, pointer, dimension(:). A permutation vector; its size must be either N_ROW or N_COL +! a - type(). The communication descriptor. +! info - integer. Eventually returns an error code +subroutine psb_dcsrp(trans,iperm,a, desc_a, info) + use psb_descriptor_type + use psb_serial_mod + use psb_const_mod + ! implicit none + + interface dcsrp + + subroutine dcsrp(trans,m,n,fida,descra,ia1,ia2,& + & infoa,p,work,lwork,ierror) + integer, intent(in) :: m, n, lwork + integer, intent(out) :: ierror + character, intent(in) :: trans + double precision, intent(inout) :: work(*) + integer, intent(in) :: p(*) + integer, intent(inout) :: ia1(*), ia2(*), infoa(*) + character, intent(in) :: fida*5, descra*11 + end subroutine dcsrp + end interface + + + interface isaperm + + logical function isaperm(n,ip) + integer, intent(in) :: n + integer, intent(inout) :: ip(*) + end function isaperm + end interface + + !...parameters.... + type(psb_dspmat_type), intent(inout) :: a + type(psb_desc_type), intent(in) :: desc_a + integer, intent(inout) :: iperm(:), info + character, intent(in) :: trans + !....locals.... + integer :: int_err(5),p(1),infoa(10) + real(kind(1.d0)) :: real_err(5) + integer,pointer :: ipt(:) + integer :: i,err,nprow,npcol,me,& + & mypcol ,ierror ,n_col,l_dcsdp, iout, ipsize + integer :: dectype + real(kind(1.d0)), pointer :: work_dcsdp(:) + integer :: icontxt,temp(1),n_row,err_act + character(len=20) :: name, char_err + + real(kind(1.d0)) :: time(10), mpi_wtime + external mpi_wtime + logical, parameter :: debug=.false. + + time(1) = mpi_wtime() + + icontxt=desc_a%matrix_data(psb_ctxt_) + dectype=desc_a%matrix_data(psb_dec_type_) + n_row = desc_a%matrix_data(psb_n_row_) + n_col = desc_a%matrix_data(psb_n_col_) + + info=0 + call psb_erractionsave(err_act) + name = 'psd_csrp' + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow.eq.-1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol.ne.1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name,int_err) + goto 9999 + endif + + + if (.not.is_asb_dec(dectype)) then + info = 600 + int_err(1) = dectype + call psb_errpush(info,name,int_err) + goto 9999 + endif + + ipsize = size(iperm) + if (.not.((ipsize.eq.n_col).or.(ipsize.eq.n_row) )) then + info = 35 + int_err(1) = 1 + int_err(2) = ipsize + call psb_errpush(info,name,int_err) + goto 9999 + else + if (.not.isaperm(ipsize,iperm)) then + info = 70 + int_err(1) = 1 + call psb_errpush(info,name,int_err) + goto 9999 + endif + endif + + l_dcsdp = (n_col) + + call psb_realloc(l_dcsdp,work_dcsdp,info) + call psb_realloc(n_col,ipt,info) + if(info /= no_err) then + info=4010 + char_err='psrealloc' + call psb_errpush(info,name,a_err=char_err) + goto 9999 + end if + + if (ipsize.eq.n_col) then + do i=1, n_col + ipt(i) = iperm(i) + enddo + else + do i=1, n_row + ipt(i) = iperm(i) + enddo + do i=n_row+1,n_col + ipt(i) = i + enddo + endif + ! crossed fingers..... + ! fix glob_to_loc/loc_to_glob mappings, then indices lists + ! hmm, maybe we should just move all of this onto a different level, + ! have a specialized subroutine, and do it in the solver context???? + if (debug) write(0,*) 'spasb: calling dcsrp',size(work_dcsdp) + call dcsrp(trans,n_row,n_col,a%fida,a%descra,a%ia1,a%ia2,a%infoa,& + & ipt,work_dcsdp,size(work_dcsdp),info) + if(info /= no_err) then + info=4010 + char_err='dcsrp' + call psb_errpush(info,name,a_err=char_err) + goto 9999 + end if + + deallocate(ipt,work_dcsdp) + + time(4) = mpi_wtime() + time(4) = time(4) - time(3) + if (debug) then + call dgamx2d(icontxt, all, topdef, ione, ione, time(4),& + & ione,temp ,temp,-ione ,-ione,-ione) + + write (*, *) ' comm structs assembly: ', time(4)*1.d-3 + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error() + return + end if + return + +end subroutine psb_dcsrp diff --git a/src/tools/psb_descasb.f90 b/src/tools/psb_descasb.f90 new file mode 100644 index 00000000..46b06128 --- /dev/null +++ b/src/tools/psb_descasb.f90 @@ -0,0 +1,664 @@ +! File: psb_descasb.f90 +! +! Subroutine: psb_descasb +! This routine takes a matrix A with its descriptor, and builds the +! auxiliary descriptor corresponding to the number of overlap levels +! specified on input. This is the actual worker horse..... +! Note that n_ovr > 0 thanks to the caller routine. +! +! Parameters: +! n_ovr - integer. The number of overlap levels +! desc_p - type(). The communication descriptor for the preconditioner. +! desc_a - type(). The communication descriptor. +! a - type( desc_a%halo_index + + Allocate(tmp_ovr_idx(l_tmp_ovr_idx),tmp_halo(l_tmp_halo)) + desc_p%ovrlap_elem(:) = -1 + tmp_ovr_idx(:) = -1 + tmp_halo(:) = -1 + counter_e = 1 + tot_recv = 0 + counter_h = 1 + counter_o = 1 +!!$ write(0,*) 'Before ',tmp_ovr_idx(1:10) +!!$ ierr = MPE_Log_event( idscb, 0, "st DSCLP " ) + + ! See comment in main loop below. + call InitPairSearchTree(info) + if (info.ne.0) then + info=4010 + ch_err='InitPairSearhTree' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (debug) write(0,*) myrow,'Done InitPairSearchTree',info + + ! + ! A picture is in order to understand what goes on here. + ! I is the internal part; H is halo, R row, C column. The final + ! matrix with N levels of overlap looks like this + ! + ! I | Hc1 | 0 | 0 | + ! -------|-----|-----|-----| + ! Hr1 | Hd1 | Hc2 | 0 | + ! -------|-----|-----|-----| + ! 0 | Hr2 | Hd2 | Hc2 | + ! -------|-----|-----|-----| + ! 0 | 0 | Hr3 | Hd3 | Hc3 + ! + ! At the start we already have I and Hc1, so we know the row + ! indices that will make up Hr1, and also who owns them. As we + ! actually get those rows, we receive the column indices in Hc2; + ! these define the row indices for Hr2, and so on. When we have + ! reached the desired level HrN, we may ignore HcN. + ! + ! + Do i_ovr=1,n_ovr + + if (debug) write(0,*) myrow,'Running on overlap level ',i_ovr,' of ',n_ovr +!!$ t_halo_in(:) = -1 + + ! + ! At this point, halo contains a valid halo corresponding to the + ! matrix enlarged with the elements in the frontier for I_OVR-1. + ! At the start, this is just the halo for A; the rows for indices in + ! the first halo will contain column indices defining the second halo + ! level and so on. + ! + bsdindx(:) = 0 + sdsz(:) = 0 + brvindx(:) = 0 + rvsz(:) = 0 + idxr = 0 + idxs = 0 + counter = 1 + counter_t = 1 + + t1 = mpi_wtime() + Do While (halo(counter) /= -1) + tot_elem=0 + proc=halo(counter+psb_proc_id_) + n_elem_recv=halo(counter+psb_n_elem_recv_) + n_elem_send=halo(counter+n_elem_recv+psb_n_elem_send_) + If ((counter+n_elem_recv+n_elem_send) > Size(halo)) then + info=-1 + call psb_errpush(info,name) + goto 9999 + end If + tot_recv=tot_recv+n_elem_recv + if (debug) write(0,*) myrow,' DESCASB tot_recv:',proc,n_elem_recv,tot_recv + ! + ! While running through the column indices exchanged with other procs + ! we have to keep track of which elements actually are overlap and halo + ! ones to record them in overlap_elem. We do this by maintaining + ! an AVL balanced search tree: at each point counter_e is the next + ! free index element. The search routine for gidx will return + ! glx if gidx was already assigned a local index (glxSize(halo)) then + info=-2 + call psb_errpush(info,name) + goto 9999 + end If + idx = halo(counter+psb_elem_recv_+j) + idx = halo(counter+psb_elem_recv_+j) + If(idx > Size(desc_p%loc_to_glob)) then + info=-3 + call psb_errpush(info,name) + goto 9999 + endif + + gidx = desc_p%loc_to_glob(idx) + + If((counter_o+2) > Size(tmp_ovr_idx)) Then + isz = max((3*Size(tmp_ovr_idx))/2,(counter_o+3)) + if (debug) write(0,*) myrow,'Realloc tmp_ovr',isz + call psb_realloc(isz,tmp_ovr_idx,info,pad=-1) + if (info.ne.0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + End If + + tmp_ovr_idx(counter_o)=proc + tmp_ovr_idx(counter_o+1)=1 + tmp_ovr_idx(counter_o+2)=gidx + tmp_ovr_idx(counter_o+3)=-1 + counter_o=counter_o+3 + + If((counter_h+2) > Size(tmp_halo)) Then + isz = max((3*Size(tmp_halo))/2,(counter_h+3)) + if (debug) write(0,*) myrow,'Realloc tmp_halo',isz + call psb_realloc(isz,tmp_halo,info) + if (info.ne.0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + End If + + tmp_halo(counter_h)=proc + tmp_halo(counter_h+1)=1 + tmp_halo(counter_h+2)=idx + tmp_halo(counter_h+3)=-1 + + counter_h=counter_h+3 + + call SearchInsKeyVal(gidx,counter_e,glx,info) +!!$ if (debug) write(0,*) 'From searchInsKey ',gidx,glx,counter_e,info + if (info>=0) then + If (glx < counter_e) Then + desc_p%ovrlap_elem(glx+psb_n_dom_ovr_)= & + & desc_p%ovrlap_elem(glx+psb_n_dom_ovr_)+1 + Else + If((counter_e+2) > Size(desc_p%ovrlap_elem)) Then + isz = max((3*Size(desc_p%ovrlap_elem))/2,(counter_e+3)) + if (debug) write(0,*) myrow,'Realloc ovr_El',isz + call psb_realloc(isz,desc_p%ovrlap_elem,info) + if (info.ne.0) then + info=4010 + ch_err='psrealloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + End If +!!$ if (debug) write(0,*) 'Adding into ovrlap ',gidx,glx,counter_e,info + + desc_p%ovrlap_elem(counter_e)=gidx + desc_p%ovrlap_elem(counter_e+psb_n_dom_ovr_)=2 + desc_p%ovrlap_elem(counter_e+2)=-1 + counter_e = counter_e + 2 + End If + else + write(0,*) myrow, 'Descasb From SearchInsKeyVal: ',info + endif + Enddo + if (debug) write(0,*) myrow,'Checktmp_o_i Loop Mid1',tmp_ovr_idx(1:10) + counter = counter+n_elem_recv + + ! + ! add send elements in halo_index into ovrlap_index + ! + Do j=0,n_elem_send-1 + + idx = halo(counter+psb_elem_send_+j) + gidx = desc_p%loc_to_glob(idx) + + If((counter_o+2) > Size(tmp_ovr_idx)) Then + isz = max((3*Size(tmp_ovr_idx))/2,(counter_o+3)) + if (debug) write(0,*) myrow,'Realloc tmp_ovr',isz + call psb_realloc(isz,tmp_ovr_idx,info) + if (info.ne.0) then + info=4010 + ch_err='psrealloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + End If + + tmp_ovr_idx(counter_o)=proc + tmp_ovr_idx(counter_o+1)=1 + tmp_ovr_idx(counter_o+2)=gidx + tmp_ovr_idx(counter_o+3)=-1 + counter_o=counter_o+3 + + call SearchInsKeyVal(gidx,counter_e,glx,info) +!!$ if (debug) write(0,*) 'From searchInsKey ',gidx,glx,counter_e,info + if (info>=0) then + If (glx < counter_e) Then + desc_p%ovrlap_elem(glx+psb_n_dom_ovr_)= & + & desc_p%ovrlap_elem(glx+psb_n_dom_ovr_)+1 + Else + If((counter_e+2) > Size(desc_p%ovrlap_elem)) Then + isz = max((3*Size(desc_p%ovrlap_elem))/2,(counter_e+3)) + if (debug) write(0,*) myrow,'Realloc ovr_el',isz + call psb_realloc(isz,desc_p%ovrlap_elem,info) + if (info.ne.0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + End If +!!$ if (debug) write(0,*) 'Adding into ovrlap ',gidx,glx,counter_e,info + desc_p%ovrlap_elem(counter_e)=gidx + desc_p%ovrlap_elem(counter_e+psb_n_dom_ovr_)=2 + desc_p%ovrlap_elem(counter_e+2)=-1 + counter_e = counter_e + 2 + End If + else + write(0,*) myrow,'Descasb From SearchInsKeyVal: ',info + endif + + ! + ! Prepare to exchange the halo rows with the other proc. + ! + If (i_ovr < (n_ovr)) Then + call psb_spinfo(psb_nzrowreq_,a,n_elem,info,iaux=idx) + if (info.ne.0) then + info=4010 + ch_err='psb_spinfo' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + If((idxs+tot_elem+n_elem) > lworks) Then + isz = max((3*lworks)/2,(idxs+tot_elem+n_elem)) + if (debug) write(0,*) myrow,'Realloc works',isz + call psb_realloc(isz,works,info) + if (info.ne.0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + lworks = isz + End If + If((n_elem) > size(blk%ia2)) Then + isz = max((3*size(blk%ia2))/2,(n_elem)) + if (debug) write(0,*) myrow,'Realloc blk',isz + call psb_spreall(blk,isz,info) + if (info.ne.0) then + info=4010 + ch_err='psb_spreall' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + End If + + call psb_spgtrow(idx,a,blk,info) + if (info.ne.0) then + info=4010 + ch_err='psb_spgtrow' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + Do jj=1,n_elem + works(idxs+tot_elem+jj)=desc_p%loc_to_glob(blk%ia2(jj)) + End Do + tot_elem=tot_elem+n_elem + End If + + Enddo + + t4 = t4 + mpi_wtime() -t3 + + if (i_ovr < n_ovr) then + if (tot_elem > 1) then +!!$ write(0,*) me,'Realloc temp',tot_elem+2 + if (tot_elem+2 > size(temp)) then +!!$ write(0,*) me,'Realloc temp',tot_elem+2 + deallocate(temp) + allocate(temp(tot_elem+2),stat=info) + endif + Call mrgsrt(tot_elem,works(idxs+1),temp,info) + If (info.Eq.0) Call ireordv1(tot_elem,works(idxs+1),temp) + lx = works(idxs+1) + i = 1 + j = 1 + do + j = j + 1 + if (j > tot_elem) exit + if (works(idxs+j) /= lx) then + i = i + 1 + works(idxs+i) = works(idxs+j) + lx = works(idxs+i) + end if + end do + tot_elem = i + endif + if (debug) write(0,*) myrow,'Checktmp_o_i Loop Mid2',tmp_ovr_idx(1:10) + sdsz(proc+1) = tot_elem + idxs = idxs + tot_elem + end if + counter = counter+n_elem_send+3 + if (debug) write(0,*) myrow,'Checktmp_o_i Loop End',tmp_ovr_idx(1:10) + Enddo + if (debug) write(0,*)myrow,'End phase 1 DESCASB', m, n_col, tot_recv + + if (i_ovr < n_ovr) then + ! + ! Exchange data requests with everybody else: so far we have + ! accumulated RECV requests, we have an all-to-all to build + ! matchings SENDs. + ! + call mpi_alltoall(sdsz,1,mpi_integer,rvsz,1,mpi_integer,icomm,info) + if (info.ne.0) then + info=4010 + ch_err='mpi_alltoall' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + idxs = 0 + idxr = 0 + counter = 1 + Do + proc=halo(counter) + if (proc == -1) exit + n_elem_recv = halo(counter+psb_n_elem_recv_) + counter = counter+n_elem_recv + n_elem_send = halo(counter+psb_n_elem_send_) + + bsdindx(proc+1) = idxs + idxs = idxs + sdsz(proc+1) + brvindx(proc+1) = idxr + idxr = idxr + rvsz(proc+1) + counter = counter+n_elem_send+3 + Enddo + + iszr=sum(rvsz) + if (max(iszr,1) > lworkr) then + call psb_realloc(max(iszr,1),workr,info) + if (info.ne.0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + lworkr=max(iszr,1) + end if + + call mpi_alltoallv(works,sdsz,bsdindx,mpi_integer,& + & workr,rvsz,brvindx,mpi_integer,icomm,info) + if (info.ne.0) then + info=4010 + ch_err='mpi_alltoallv' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + Do i=1,iszr + idx=workr(i) + if (idx <1) then + write(0,*) myrow,'Error in DESCASB ',idx,i,iszr +!!$ write(0,*) myrow, ' WORKR :',workr(1:iszr) + else If (desc_p%glob_to_loc(idx) < -np) Then + ! + ! This is a new index. Assigning a local index as + ! we receive the guarantees that all indices for HALO(I) + ! will be less than those for HALO(J) whenever I Size(desc_p%loc_to_glob)) Then + isz = 3*n_col/2 + if (debug) write(0,*) myrow,'Realloc loc_to_glob' + call psb_realloc(isz,desc_p%loc_to_glob,info) + if (info.ne.0) then + info=4010 + ch_err='psrealloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + End If + desc_p%glob_to_loc(idx)=n_col + desc_p%loc_to_glob(n_col)=idx + If((counter_t+3) > Size(t_halo_in))Write(0,*)'bingo' + t_halo_in(counter_t)=proc_id + t_halo_in(counter_t+1)=1 + t_halo_in(counter_t+2)=n_col + counter_t=counter_t+3 + if (debug) write(0,*) myrow,' DESCASB: Added into t_halo_in from recv',& + &proc_id,n_col,idx + else if (desc_p%glob_to_loc(idx) < 0) Then + if (debug) write(0,*) myrow,'Wrong input to descasb??',& + &idx,desc_p%glob_to_loc(idx) + End If + End Do + end if + t2 = mpi_wtime() + n_row=m+tot_recv + desc_p%matrix_data(psb_n_row_)=n_row + desc_p%matrix_data(psb_n_col_)=n_col + + ! + ! Ok, now we have a temporary halo with all the info for the + ! next round. If we need to keep going, convert the halo format + ! from temporary to final, so that we can work out the next iteration. + ! This uses one of the convert_comm internals, i.e. we are doing + ! the equivalent of a partial call to convert_comm + ! + + If (i_ovr < (n_ovr)) Then + + If(lwork < (counter_t/3+np*3)) Then + isz = max((3*lwork)/2,(counter_t/3+np*3)) + if (debug) write(0,*) myrow,'Realloc work',isz + deallocate(work) + allocate(work(isz),stat=info) + lwork=size(work) + Endif + t_halo_in(counter_t)=-1 +!!$ +!!$ t_halo_out(:)=-1 +!!$ ierr = MPE_Log_event( icrhb, 0, "st CRTHAL" ) + if (debug) write(0,*) myrow,'Checktmp_o_i 1',tmp_ovr_idx(1:10) + if (debug) write(0,*) myrow,'Calling Crea_Halo' + call psi_crea_index(desc_p,t_halo_in,t_halo_out,.false.,info) +!!$ Call psi_crea_halo(desc_p%matrix_data,t_halo_in,& +!!$ & np,t_halo_out,Size(t_halo_out),dep_list,& +!!$ & dl_lda,length_dl,desc_p%loc_to_glob,& +!!$ & desc_p%glob_to_loc,work,lwork,info) + if (debug) then + write(0,*) myrow,'Done Crea_Halo' + call blacs_barrier(icontxt,'All') + end if + if (debug) write(0,*) myrow,'Checktmp_o_i 2',tmp_ovr_idx(1:10) + if (debug) write(0,*) myrow,'Done Crea_Halo' +!!$ ierr = MPE_Log_event( icrhe, 0, "ed CRHAL " ) + + halo => t_halo_out + ! + ! At this point we have built the halo necessary for I_OVR+1. + ! + End If + if (debug) write(0,*) myrow,'Checktmp_o_i ',tmp_ovr_idx(1:10) + t3 = mpi_wtime() + tl = tl +(t2-t1) + tch = tch +(t3-t2) + End Do + t1 = mpi_wtime() + call FreePairSearchTree() +!!$ ierr = MPE_Log_event( idsce, 0, "st DSCASB" ) + desc_p%matrix_data(psb_m_)=desc_a%matrix_data(psb_m_) + desc_p%matrix_data(psb_n_)=desc_a%matrix_data(psb_n_) + + tmp_halo(counter_h)=-1 + tmp_ovr_idx(counter_o)=-1 +!!$ ierr = MPE_Log_event( iovrb, 0, "st CNVCRT" ) + + ! + ! At this point we have gathered all the indices in the halo at + ! N levels of overlap. Just call convert_comm. This is + ! the same routine as gets called inside SPASB. + ! + + if (debug) then + write(0,*) 'psb_dscasb: converting indexes' + call blacs_barrier(icontxt,'All') + end if + !.... convert comunication stuctures.... + ! first the halo index + call psi_crea_index(desc_p,tmp_halo,& + & desc_p%halo_index,.false.,info) + if(info.ne.0) then + call psb_errpush(4010,name,a_err='psi_crea_index') + goto 9999 + end if + + ! then the overlap index + call psi_crea_index(desc_p,tmp_ovr_idx,& + & desc_p%ovrlap_index,.true.,info) + if(info.ne.0) then + call psb_errpush(4010,name,a_err='psi_crea_index') + goto 9999 + end if + + ! next is the ovrlap_elem index + call psi_crea_ovr_elem(desc_p%ovrlap_index,desc_p%ovrlap_elem) + + ! finally bnd_elem + call psi_crea_bnd_elem(desc_p,info) + if(info.ne.0) then + call psb_errpush(4010,name,a_err='psi_crea_bnd_elem') + goto 9999 + end if + + ! Ok, register into MATRIX_DATA & free temporary work areas + desc_a%matrix_data(psb_dec_type_) = psb_desc_asb_ + + allocate(desc_p%lprm(1)) + desc_p%lprm(1) = 0 + + if (debug) then + write(0,*) myrow,'Done Convert_comm' + call blacs_barrier(icontxt,'All') + end if + + if (.false.) then + call descprt(70+myrow,desc_p,.false.) + end if + + if (debug) write(0,*) myrow,'Done ConvertComm' +!!$ ierr = MPE_Log_event( iovre, 0, "ed CNVCRT" ) + Deallocate(works,workr,t_halo_in,t_halo_out,work,& + & length_dl,dep_list,tmp_ovr_idx,tmp_halo,& + & brvindx,rvsz,sdsz,bsdindx,temp,stat=info) + call psb_spfree(blk,info) + if (info.ne.0) then + info=4010 + ch_err='spfree' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + t2 = mpi_wtime() + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return + +End Subroutine psb_descasb diff --git a/src/tools/psb_descprt.f90 b/src/tools/psb_descprt.f90 new file mode 100644 index 00000000..1c81f249 --- /dev/null +++ b/src/tools/psb_descprt.f90 @@ -0,0 +1,176 @@ +! File: psb_descprt.f90 +! +! Subroutine: psb_descprt +! Prints the descriptor to an output file +! +! Parameters: +! iout - integer. The output unit to print to. +! desc_p - type(). The communication descriptor to be printed. +! glob - logical(otpional). Wheter to print out global or local data. +! short - logical(optional). Used to choose a verbose output. +subroutine psb_descprt(iout,desc_p,glob,short) + use psb_const_mod + use psb_descriptor_type + implicit none + type(psb_desc_type), intent(in) :: desc_p + integer, intent(in) :: iout + logical, intent(in), optional :: glob,short + logical :: lshort, lglob + + integer :: m, n_row, n_col,counter,idx,n_elem_recv,n_elem_send,& + & proc,i + + if (present(glob)) then + lglob = glob + else + lglob = .false. + endif + if (present(short)) then + lshort = short + else + lshort = .true. + endif + + if (.not.lglob) then + write(iout,*) 'Precond. descriptor:',desc_p%matrix_data(1:10) + m=desc_p%matrix_data(psb_m_) + n_row=desc_p%matrix_data(psb_n_row_) + n_col=desc_p%matrix_data(psb_n_col_) + if (.not.lshort) & + & write(iout,*) 'Loc_to_glob ',desc_p%loc_to_glob(1:n_row), ': ',& + & desc_p%loc_to_glob(n_row+1:n_col) + + if (.not.lshort) write(iout,*) 'glob_to_loc ',desc_p%glob_to_loc(1:m) + write(iout,*) 'Halo_index' + counter = 1 + Do + proc=desc_p%halo_index(counter+psb_proc_id_) + if (proc == -1) exit + n_elem_recv=desc_p%halo_index(counter+psb_n_elem_recv_) + n_elem_send=desc_p%halo_index(counter+n_elem_recv+psb_n_elem_send_) + write(iout,*) 'Halo_index Receive',proc,n_elem_recv + if (.not.lshort) write(iout,*) & + & desc_p%halo_index(counter+psb_n_elem_recv_+1:counter+psb_n_elem_recv_+n_elem_recv) + write(iout,*) 'Halo_index Send',proc,n_elem_send + if (.not.lshort) write(iout,*) & + & desc_p%halo_index(counter+n_elem_recv+psb_n_elem_send_+1: & + & counter+n_elem_recv+psb_n_elem_send_+n_elem_send) + + counter = counter+n_elem_recv+n_elem_send+3 + enddo + + + write(iout,*) 'Ovrlap_index' + counter = 1 + Do + proc=desc_p%ovrlap_index(counter+psb_proc_id_) + if (proc == -1) exit + n_elem_recv=desc_p%ovrlap_index(counter+psb_n_elem_recv_) + n_elem_send=desc_p%ovrlap_index(counter+n_elem_recv+psb_n_elem_send_) + write(iout,*) 'Ovrlap_index Receive',proc,n_elem_recv + if (.not.lshort) write(iout,*) & + & desc_p%ovrlap_index(counter+psb_n_elem_recv_+1:& + & counter+psb_n_elem_recv_+n_elem_recv) + write(iout,*) 'Ovrlap_index Send',proc,n_elem_send + if (.not.lshort) write(iout,*) & + & desc_p%ovrlap_index(counter+n_elem_recv+psb_n_elem_send_+1: & + & counter+n_elem_recv+psb_n_elem_send_+n_elem_send) + + counter = counter+n_elem_recv+n_elem_send+3 + enddo + + write(iout,*) 'Ovrlap_elem' + counter = 1 + Do + idx=desc_p%ovrlap_elem(counter) + if (idx == -1) exit + n_elem_recv=desc_p%ovrlap_elem(counter+1) + if (.not.lshort) write(iout,*) idx,n_elem_Recv + counter = counter+2 + enddo + + else if (lglob) then + + write(iout,*) 'Precond. descriptor:',desc_p%matrix_data(1:10) + m=desc_p%matrix_data(psb_m_) + n_row=desc_p%matrix_data(psb_n_row_) + n_col=desc_p%matrix_data(psb_n_col_) + if (.not.lshort) then + write(iout,*) 'Loc_to_glob ' + do i=1, n_row + write(iout,*) i, desc_p%loc_to_glob(i) + enddo + write(iout,*) '........' + do i=n_row+1,n_col + write(iout,*) i, desc_p%loc_to_glob(i) + enddo + + write(iout,*) 'glob_to_loc ' + do i=1,m + write(iout,*) i,desc_p%glob_to_loc(i) + enddo + endif + write(iout,*) 'Halo_index' + counter = 1 + Do + proc=desc_p%halo_index(counter+psb_proc_id_) + if (proc == -1) exit + n_elem_recv=desc_p%halo_index(counter+psb_n_elem_recv_) + n_elem_send=desc_p%halo_index(counter+n_elem_recv+psb_n_elem_send_) + write(iout,*) 'Halo_index Receive',proc,n_elem_recv + if (.not.lshort) then + do i=counter+psb_n_elem_recv_+1,counter+psb_n_elem_recv_+n_elem_recv + write(iout,*) & + & desc_p%loc_to_glob(desc_p%halo_index(i)),desc_p%halo_index(i) + enddo + endif + write(iout,*) 'Halo_index Send',proc,n_elem_send + if (.not.lshort) then + do i=counter+n_elem_recv+psb_n_elem_send_+1, & + & counter+n_elem_recv+psb_n_elem_send_+n_elem_send + write(iout,*) & + & desc_p%loc_to_glob(desc_p%halo_index(i)), desc_p%halo_index(i) + enddo + endif + counter = counter+n_elem_recv+n_elem_send+3 + enddo + + + write(iout,*) 'Ovrlap_index' + counter = 1 + Do + proc=desc_p%ovrlap_index(counter+psb_proc_id_) + if (proc == -1) exit + n_elem_recv=desc_p%ovrlap_index(counter+psb_n_elem_recv_) + n_elem_send=desc_p%ovrlap_index(counter+n_elem_recv+psb_n_elem_send_) + write(iout,*) 'Ovrlap_index Receive',proc,n_elem_recv + if (.not.lshort) then + do i=counter+psb_n_elem_recv_+1,counter+psb_n_elem_recv_+n_elem_recv + write(iout,*) desc_p%loc_to_glob(desc_p%ovrlap_index(i)),& + & desc_p%ovrlap_index(i) + enddo + endif + write(iout,*) 'Ovrlap_index Send',proc,n_elem_send + if (.not.lshort) then + do i=counter+n_elem_recv+psb_n_elem_send_+1, & + & counter+n_elem_recv+psb_n_elem_send_+n_elem_send + write(iout,*) desc_p%loc_to_glob(desc_p%ovrlap_index(i)),& + & desc_p%ovrlap_index(i) + enddo + endif + counter = counter+n_elem_recv+n_elem_send+3 + enddo + + write(iout,*) 'Ovrlap_elem' + counter = 1 + if (.not.lshort) then + Do + idx=desc_p%ovrlap_elem(counter) + if (idx == -1) exit + n_elem_recv=desc_p%ovrlap_elem(counter+1) + write(iout,*) desc_p%loc_to_glob(idx),idx,n_elem_Recv + counter = counter+2 + enddo + endif + end if +end subroutine psb_descprt diff --git a/src/tools/psb_dfree.f90 b/src/tools/psb_dfree.f90 new file mode 100644 index 00000000..98bc88c2 --- /dev/null +++ b/src/tools/psb_dfree.f90 @@ -0,0 +1,167 @@ +! File: psb_dfree.f90 +! +! Subroutine: psb_dfree +! frees a dense matrix structure +! +! Parameters: +! x - real, pointer, dimension(:,:). The dense matrix to be freed. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code +subroutine psb_dfree(x, desc_a, info) + !...free dense matrix structure... + use psb_const_mod + use psb_descriptor_type + use psb_error_mod + implicit none + + !....parameters... + real(kind(1.d0)),pointer :: x(:,:) + type(psb_desc_type), intent(in) :: desc_a + integer :: info + + !...locals.... + integer :: int_err(5) + integer :: icontxt,nprow,npcol,me,mypcol,err, err_act + integer,parameter :: ione=1 + character(len=20) :: name + + + info=0 + call psb_erractionsave(err_act) + name='psb_dfree' + + icontxt=desc_a%matrix_data(psb_ctxt_) + + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + ! ....verify blacs grid correctness.. + if (nprow.eq.-1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol.ne.1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name,int_err) + goto 9999 + endif + + if (.not.associated(desc_a%matrix_data)) then + info=295 + call psb_errpush(info,name) + goto 9999 + end if + + if (.not.associated(x)) then + info=295 + call psb_errpush(info,name) + goto 9999 + end if + + !deallocate x + deallocate(x,stat=info) + if (info.ne.no_err) then + info=4000 + call psb_errpush(info,name) + goto 9999 + else + nullify(x) + endif + + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_ret) then + return + else + call psb_error(icontxt) + end if + return + +end subroutine psb_dfree + + + +! Subroutine: psb_dfreev +! frees a dense matrix structure +! +! Parameters: +! x - real, pointer, dimension(:). The dense matrix to be freed. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code +subroutine psb_dfreev(x, desc_a, info) + !...free dense matrix structure... + use psb_const_mod + use psb_descriptor_type + use psb_error_mod + + implicit none + !....parameters... + real(kind(1.d0)),pointer :: x(:) + type(psb_desc_type), intent(in) :: desc_a + integer :: info + + !...locals.... + integer :: int_err(5) + integer :: icontxt,nprow,npcol,me,mypcol,err, err_act + integer,parameter :: ione=1 + character(len=20) :: name + + + info=0 + call psb_erractionsave(err_act) + name='psb_dfreev' + + icontxt=desc_a%matrix_data(psb_ctxt_) + + if (.not.associated(desc_a%matrix_data)) then + info=295 + call psb_errpush(info,name) + goto 9999 + end if + + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + ! ....verify blacs grid correctness.. + if (nprow.eq.-1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol.ne.1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name,int_err) + goto 9999 + endif + + if (.not.associated(x)) then + info=295 + call psb_errpush(info,name) + goto 9999 + end if + + !deallocate x + deallocate(x,stat=info) + if (info.ne.no_err) then + info=4000 + call psb_errpush(info,name) + else + nullify(x) + endif + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_ret) then + return + else + call psb_error(icontxt) + end if + return + +end subroutine psb_dfreev diff --git a/src/tools/psb_dgelp.f90 b/src/tools/psb_dgelp.f90 new file mode 100644 index 00000000..82ea46cd --- /dev/null +++ b/src/tools/psb_dgelp.f90 @@ -0,0 +1,238 @@ +! File: psb_dgelp.f90 +! +! Subroutine: psb_dgelp +! ??????????? +! +! Parameters: +! trans - character. +! iperm - integer. +! x - real, dimension(:,:). +! info - integer. Eventually returns an error code. +subroutine psb_dgelp(trans,iperm,x,desc_a,info) + !....assembly dense matrix x ..... + use psb_descriptor_type + use psb_serial_mod + use psb_const_mod + use psb_psblas_mod + use psb_error_mod + implicit none + + type(psb_desc_type), intent(in) :: desc_a + real(kind(1.d0)), intent(inout) :: x(:,:) + integer, intent(inout) :: iperm(:),info + character, intent(in) :: trans + + ! local variables + integer :: err, icontxt,nprow, & + & npcol,me,mypcol,temp,lwork,nrow,ncol + real(kind(1.d0)),pointer :: dtemp(:) + integer :: int_err(5), i1sz, i2sz, dectype, i, err_act + character(len=20) :: itrans + integer, parameter :: ione=1 + real(kind(1.d0)),parameter :: one=1 + logical, parameter :: debug=.false. + + interface dgelp + subroutine dgelp(trans,m,n,p,b,ldb,work,lwork,ierror) + integer, intent(in) :: ldb, m, n, lwork + integer, intent(out) :: ierror + character, intent(in) :: trans + double precision, intent(inout) :: b(ldb,*), work(*) + integer, intent(in) :: p(*) + end subroutine dgelp + end interface + + interface isaperm + + logical function isaperm(n,ip) + integer, intent(in) :: n + integer, intent(inout) :: ip(*) + end function isaperm + end interface + + character(len=20) :: name, ch_err + name = 'psb_dgelp' + + info=0 + call psb_erractionsave(err_act) + + icontxt=desc_a%matrix_data(psb_ctxt_) + dectype=desc_a%matrix_data(psb_dec_type_) + nrow = desc_a%matrix_data(psb_n_row_) + ncol = desc_a%matrix_data(psb_n_col_) + i1sz = size(x,dim=1) + i2sz = size(x,dim=2) + + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + + if (debug) write(*,*) 'asb start: ',nprow,npcol,me,& + &desc_a%matrix_data(psb_dec_type_) + ! ....verify blacs grid correctness.. + if (nprow.eq.-1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol.ne.1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name,i_err=int_err) + goto 9999 + else if (.not.psb_is_asb_dec(dectype)) then + info = 3110 + call psb_errpush(info,name) + goto 9999 + endif + + + if (.not.isaperm(i1sz,iperm)) then + info = 70 + int_err(1) = 1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + if (debug) write(*,*) 'asb: ',i1sz,i2sz,nrow,ncol + allocate(dtemp(i1sz),stat=info) + + call dgelp(trans,i1sz,i2sz,iperm,x,i1sz,dtemp,i1sz,info) + if(info.ne.0) then + info=4010 + ch_err='dgelp' + call psb_errpush(info,name,a_err=ch_err) + end if + + deallocate(dtemp) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_ret) then + return + else + call psb_error(icontxt) + end if + return + +end subroutine psb_dgelp + + + +! Subroutine: psb_dgelpv +! ??????????? +! +! Parameters: +! trans - character. +! iperm - integer. +! x - real, dimension(:). +! info - integer. Eventually returns an error code. +subroutine psb_dgelpv(trans,iperm,x,desc_a,info) + !....assembly dense matrix x ..... + use psb_descriptor_type + use psb_serial_mod + use psb_const_mod + use psb_psblas_mod + use psb_error_mod + implicit none + + type(psb_desc_type), intent(in) :: desc_a + real(kind(1.d0)), intent(inout) :: x(:) + integer, intent(inout) :: iperm(:), info + character, intent(in) :: trans + + ! local variables + integer :: err, icontxt,nprow,npcol,me,mypcol,temp,lwork + integer :: int_err(5), i1sz,nrow,ncol,dectype, i, err_act + real(kind(1.d0)),pointer :: dtemp(:) + double precision :: real_err(5) + character :: itrans + integer, parameter :: ione=1 + real(kind(1.d0)),parameter :: one=1 + logical, parameter :: debug=.false. + + interface dgelp + subroutine dgelp(trans,m,n,p,b,ldb,work,lwork,ierror) + integer, intent(in) :: ldb, m, n, lwork + integer, intent(out) :: ierror + character, intent(in) :: trans + double precision, intent(inout) :: b(*), work(*) + integer, intent(in) :: p(*) + end subroutine dgelp + end interface + + interface isaperm + + logical function isaperm(n,ip) + integer, intent(in) :: n + integer, intent(inout) :: ip(*) + end function isaperm + end interface + + character(len=20) :: name, ch_err + name = 'psb_dgelpv' + + info=0 + call psb_erractionsave(err_act) + + i1sz = size(x) + + icontxt=desc_a%matrix_data(psb_ctxt_) + dectype=desc_a%matrix_data(psb_dec_type_) + nrow=desc_a%matrix_data(psb_n_row_) + ncol=desc_a%matrix_data(psb_n_col_) + + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + + ! ....verify blacs grid correctness.. + if (nprow.eq.-1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol.ne.1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name,i_err=int_err) + goto 9999 + else if (.not.psb_is_asb_dec(dectype)) then + info = 3110 + call psb_errpush(info,name) + goto 9999 + endif + + if (debug) write(0,*) 'calling isaperm ',i1sz,size(iperm),trans + + if (.not.isaperm(i1sz,iperm)) then + info = 70 + int_err(1) = 1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + allocate(dtemp(i1sz),stat=info) + + call dgelp(trans,i1sz,1,iperm,x,i1sz,dtemp,i1sz,info) + if(info.ne.0) then + info=4010 + ch_err='dgelp' + call psb_errpush(info,name,a_err=ch_err) + end if + + deallocate(dtemp) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_ret) then + return + else + call psb_error(icontxt) + end if + return + +end subroutine psb_dgelpv + diff --git a/src/tools/psb_dins.f90 b/src/tools/psb_dins.f90 new file mode 100644 index 00000000..083d2e26 --- /dev/null +++ b/src/tools/psb_dins.f90 @@ -0,0 +1,480 @@ +! File: psb_dins.f90 +! +! Subroutine: psb_dins +! Insert dense submatrix to dense matrix. +! +! Parameters: +! m - integer. Rows number of submatrix belonging to blck to be inserted. +! n - integer. Cols number of submatrix belonging to blck to be inserted. +! x - real, pointer, dimension(:,:). The destination dense matrix. +! ix - integer. x global-row corresponding to position at which blck submatrix must be inserted. +! jx - integer. x global-col corresponding to position at which blck submatrix must be inserted. +! blck - real, pointer, dimension(:,:). The source dense submatrix. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code +! iblck - integer(optional). First row of submatrix belonging to blck to be inserted. +! jblck - integer(optional). First col of submatrix belonging to blck to be inserted. +subroutine psb_dins(m, n, x, ix, jx, blck, desc_a, info,& + & iblck, jblck) + !....insert dense submatrix to dense matrix ..... + use psb_descriptor_type + use psb_const_mod + use psb_error_mod + implicit none + + + !....parameters... + integer, intent(in) :: m,n + type(psb_desc_type), intent(in) :: desc_a + real(kind(1.d0)),pointer :: x(:,:) + integer, intent(in) :: ix,jx + real(kind(1.d0)), intent(in) :: blck(:,:) + integer,intent(out) :: info + integer, optional, intent(in) :: iblck,jblck + + !locals..... + + integer :: icontxt,i,loc_row,glob_row,row,k,err_act,& + & nprocs,mode, loc_cols,col,iblock, jblock, mglob, int_err(5), err + integer :: nprow,npcol, me ,mypcol + character :: temp_descra*11,temp_fida*5 + character(len=20) :: name, char_err + + info=0 + call psb_erractionsave(err_act) + name = 'psb_dins' + + if (.not.associated(desc_a%glob_to_loc)) then + info=3110 + call psb_errpush(info,name) + return + end if + if ((.not.associated(desc_a%matrix_data))) then + int_err(1)=3110 + call psb_errpush(info,name) + return + end if + + icontxt=desc_a%matrix_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow.eq.-1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol.ne.1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name,int_err) + goto 9999 + endif + + !... check parameters.... + if (m.lt.0) then + info = 10 + int_err(1) = 1 + int_err(2) = m + call psb_errpush(info,name,int_err) + goto 9999 + else if (n.lt.0) then + info = 10 + int_err(1) = 2 + int_err(2) = n + call psb_errpush(info,name,int_err) + goto 9999 + else if (ix.lt.1) then + info = 20 + int_err(1) = 6 + int_err(2) = ix + call psb_errpush(info,name,int_err) + goto 9999 + else if (jx.lt.1) then + info = 20 + int_err(1) = 7 + int_err(2) = jx + call psb_errpush(info,name,int_err) + goto 9999 + else if (.not.psb_is_ok_dec(desc_a%matrix_data(psb_dec_type_))) then + info = 3110 + int_err(1) = desc_a%matrix_data(psb_dec_type_) + call psb_errpush(info,name,int_err) + goto 9999 + else if (size(x, dim=1).lt.desc_a%matrix_data(psb_n_row_)) then + info = 310 + int_err(1) = 5 + int_err(2) = 4 + call psb_errpush(info,name,int_err) + goto 9999 + else if (size(x, dim=2).lt.n) then + ! check if dimension of x is greater than dimension of submatrix + ! to insert + info = 320 + int_err(1) = 2 + int_err(2) = size(x, dim=2) + int_err(3) = n + call psb_errpush(info,name,int_err) + goto 9999 + endif + + loc_cols = desc_a%matrix_data(psb_n_col_) + mglob = desc_a%matrix_data(psb_m_) + if (present(iblck)) then + iblock = iblck + else + iblock = 1 + endif + + if (present(jblck)) then + jblock = jblck + else + jblock = 1 + endif + + do i = 1, m + !loop over all blck's rows + + ! row actual block row + glob_row=ix+i-1 + if (glob_row > mglob) exit + loc_row=desc_a%glob_to_loc(glob_row) + if (loc_row.ge.1) then + ! this row belongs to me + ! copy i-th row of block blck in x + do col = 1, n + x(loc_row,jx+col-1) = blck(iblock+i-1,jblock+col-1) + enddo + end if + enddo + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_ret) then + return + else + call psb_error(icontxt) + end if + return + +end subroutine psb_dins + + + + + +! Subroutine: psb_dinsvm +! Insert dense submatrix to dense matrix. +! +! Parameters: +! m - integer. Rows number of submatrix belonging to blck to be inserted. +! x - real, pointer, dimension(:,:). The destination dense matrix. +! ix - integer. x global-row corresponding to position at which blck submatrix must be inserted. +! jx - integer. x global-col corresponding to position at which blck submatrix must be inserted. +! blck - real, pointer, dimension(:,:). The source dense submatrix. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code +! iblck - integer(optional). First row of submatrix belonging to blck to be inserted. +subroutine psb_dinsvm(m, x, ix, jx, blck, desc_a,info,& + & iblck) + !....insert dense submatrix to dense matrix ..... + use psb_descriptor_type + use psb_const_mod + use psb_error_mod + implicit none + + ! m rows number of submatrix belonging to blck to be inserted + + ! iblck first row of submatrix belonging to blck to be inserted + + ! ix x global-row corresponding to position at which blck submatrix + ! must be inserted + + ! jx x global-col corresponding to position at which blck submatrix + ! must be inserted + + !....parameters... + integer, intent(in) :: m + type(psb_desc_type), intent(in) :: desc_a + real(kind(1.d0)),pointer :: x(:,:) + integer, intent(in) :: ix,jx + real(kind(1.d0)), intent(in) :: blck(:) + integer, intent(out) :: info + integer, optional, intent(in) :: iblck + + !locals..... + integer :: icontxt,i,loc_row,glob_row,loc_cols,mglob,err_act, int_err(5),err + integer :: nprow,npcol, me ,mypcol, iblock + character(len=20) :: name, char_err + + info=0 + call psb_erractionsave(err_act) + name = 'psb_dinsvm' + + if (.not.associated(desc_a%glob_to_loc)) then + info=3110 + call psb_errpush(info,name) + return + end if + if ((.not.associated(desc_a%matrix_data))) then + int_err(1)=3110 + call psb_errpush(info,name) + return + end if + + icontxt=desc_a%matrix_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow.eq.-1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol.ne.1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name,int_err) + goto 9999 + endif + + !... check parameters.... + if (m.lt.0) then + info = 10 + int_err(1) = 1 + int_err(2) = m + call psb_errpush(info,name,int_err) + goto 9999 + else if (ix.lt.1) then + info = 20 + int_err(1) = 6 + int_err(2) = ix + call psb_errpush(info,name,int_err) + goto 9999 + else if (jx.lt.1) then + info = 20 + int_err(1) = 7 + int_err(2) = jx + call psb_errpush(info,name,int_err) + goto 9999 + else if (.not.psb_is_ok_dec(desc_a%matrix_data(psb_dec_type_))) then + info = 3110 + int_err(1) = desc_a%matrix_data(psb_dec_type_) + call psb_errpush(info,name,int_err) + goto 9999 + else if (size(x, dim=1).lt.desc_a%matrix_data(psb_n_row_)) then + info = 310 + int_err(1) = 5 + int_err(2) = 4 + call psb_errpush(info,name,int_err) + goto 9999 + else if (size(x, dim=2).lt.1) then + ! check if dimension of x is greater than dimension of submatrix + ! to insert + info = 320 + int_err(1) = 2 + int_err(2) = size(x, dim=2) + int_err(3) = 1 + call psb_errpush(info,name,int_err) + goto 9999 + endif + + loc_cols=desc_a%matrix_data(psb_n_col_) + mglob = desc_a%matrix_data(psb_m_) + + if (present(iblck)) then + iblock = iblck + else + iblock = 1 + endif + + do i = 1, m + !loop over all blck's rows + + ! row actual block row + glob_row=ix+i-1 + if (glob_row > mglob) exit + + loc_row=desc_a%glob_to_loc(glob_row) + if (loc_row.ge.1) then + ! this row belongs to me + ! copy i-th row of block blck in x + x(loc_row,jx) = blck(iblock+i-1) + end if + enddo + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_ret) then + return + else + call psb_error(icontxt) + end if + return + +end subroutine psb_dinsvm + + + +! Subroutine: psb_dinsvv +! Insert dense submatrix to dense matrix. +! +! Parameters: +! m - integer. Rows number of submatrix belonging to blck to be inserted. +! x - real, pointer, dimension(:). The destination dense matrix. +! ix - integer. x global-row corresponding to position at which blck submatrix must be inserted. +! blck - real, pointer, dimension(:). The source dense submatrix. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code +! iblck - integer(optional). First row of submatrix belonging to blck to be inserted. +! insflag - integer(optional). ??? +subroutine psb_dinsvv(m, x, ix, blck, desc_a, info,& + & iblck,insflag) + !....insert dense submatrix to dense matrix ..... + use psb_descriptor_type + use psb_const_mod + use psb_error_mod + implicit none + + ! m rows number of submatrix belonging to blck to be inserted + + ! iblck first row of submatrix belonging to blck to be inserted + + ! ix x global-row corresponding to position at which blck submatrix + ! must be inserted + + !....parameters... + integer, intent(in) :: m + type(psb_desc_type), intent(in) :: desc_a + real(kind(1.d0)),pointer :: x(:) + integer, intent(in) :: ix + real(kind(1.d0)), intent(in) :: blck(:) + integer, intent(out) :: info + integer, optional, intent(in) :: iblck + integer, optional, intent(in) :: insflag + + !locals..... + integer :: icontxt,i,loc_row,glob_row,row,k,& + & loc_rows,loc_cols,iblock, liflag,mglob,err_act, int_err(5), err + integer :: nprow,npcol, me ,mypcol + character(len=20) :: name, char_err + + info=0 + call psb_erractionsave(err_act) + name = 'psb_dinsvv' + + if (.not.associated(desc_a%glob_to_loc)) then + info=3110 + call psb_errpush(info,name) + return + end if + if ((.not.associated(desc_a%matrix_data))) then + int_err(1)=3110 + call psb_errpush(info,name) + return + end if + + icontxt=desc_a%matrix_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow.eq.-1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol.ne.1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name,int_err) + goto 9999 + endif + + !... check parameters.... + if (m.lt.0) then + info = 10 + int_err(1) = 1 + int_err(2) = m + call psb_errpush(info,name,int_err) + goto 9999 + else if (ix.lt.1) then + info = 20 + int_err(1) = 6 + int_err(2) = ix + call psb_errpush(info,name,int_err) + goto 9999 + else if (.not.psb_is_ok_dec(desc_a%matrix_data(psb_dec_type_))) then + info = 3110 + int_err(1) = desc_a%matrix_data(psb_dec_type_) + call psb_errpush(info,name,int_err) + goto 9999 + else if (size(x, dim=1).lt.desc_a%matrix_data(psb_n_row_)) then + info = 310 + int_err(1) = 5 + int_err(2) = 4 + call psb_errpush(info,name,int_err) + goto 9999 + endif + + loc_rows=desc_a%matrix_data(psb_n_row_) + loc_cols=desc_a%matrix_data(psb_n_col_) + mglob = desc_a%matrix_data(psb_m_) + + if (present(iblck)) then + iblock = iblck + else + iblock = 1 + endif + if (present(insflag)) then + liflag = insflag + else + liflag = psb_upd_glb_ + end if + + if (liflag == psb_upd_glb_) then + do i = 1, m + !loop over all blck's rows + + ! row actual block row + glob_row=ix+i-1 + if (glob_row > mglob) exit + + loc_row=desc_a%glob_to_loc(glob_row) + if (loc_row.ge.1) then + ! this row belongs to me + ! copy i-th row of block blck in x + x(loc_row) = blck(iblock+i-1) + end if + enddo + else if (liflag == psb_upd_loc_) then + k = min(ix+m-1,loc_rows) + do i=ix,k + x(i) = blck(i-ix+1) + enddo + else + info=-1 + call psb_errpush(info,name) + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_ret) then + return + else + call psb_error(icontxt) + end if + return + +end subroutine psb_dinsvv + + diff --git a/src/tools/psb_dptins.f90 b/src/tools/psb_dptins.f90 new file mode 100644 index 00000000..945795a7 --- /dev/null +++ b/src/tools/psb_dptins.f90 @@ -0,0 +1,297 @@ +! File: psb_dptins.f90 +! +! Subroutine: psb_dptins +! insert sparse submatrix to sparse matrix structure for psblas +! routines +! +! Parameters: +! ia - integer. a global-row corresponding to position at which blck submatrix must be inserted. +! ja - integer. a global-col corresponding to position at which blck submatrix must be inserted. +! blck - type(). The source sparse submatrix. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code +subroutine psb_dptins(ia,ja,blck,desc_a,info) + use psb_descriptor_type + use psb_spmat_type + use psb_serial_mod + use psb_const_mod + use psb_error_mod + implicit none + + !....parameters... + type(psb_desc_type), intent(inout) :: desc_a + integer, intent(in) :: ia,ja + type(psb_dspmat_type), intent(in) :: blck + integer,intent(out) :: info + + !locals..... + + interface + subroutine dcsins(m,n,fida,descra,a,ia1,ia2,infoa,& + & ia,ja,latot,lia1tot,lia2tot,& + &fidh,descrh,h,ih1,ih2,infoh,ih,jh,work,lwork,ierror) + implicit none + ! .. scalar arguments .. + integer, intent(in) :: m, n, lwork, latot,lia1tot,lia2tot,ia,ja,ih,jh + integer, intent(out) :: ierror + ! .. array arguments .. + double precision, intent(in) :: h(*) + double precision, intent(inout) :: a(*), work(*) + integer, intent(in) :: ih1(*), ih2(*), infoh(10) + integer, intent(inout) :: ia1(*), ia2(*), infoa(10) + character, intent(in) :: fida*5, fidh*5,descra*11, descrh*11 + end subroutine dcsins + end interface + + integer :: i,icontxt,nprocs ,glob_row,row,& + & k ,start_row,end_row,int_err(5),& + & first_loc_row,n_row,j, ierror,locix,locjx,& + & allocated_prcv,dectype,mglob, nnza, err_act + integer,pointer :: prcv(:), tia1(:),tia2(:), temp(:) + integer :: nprow,npcol, me ,mypcol, iflag, isize, irlc + integer :: m,n, pnt_halo,ncol, nh, ip + type(psb_dspmat_type) :: a + real(kind(1.d0)),pointer :: workarea(:),taspk(:) + logical, parameter :: debug=.false. + integer, parameter :: nrlcthr=3 + integer, save :: irlcmin,nrlc + data irlcmin/500/,nrlc/0/ + character(len=20) :: name, ch_err + + info=0 + call psb_erractionsave(err_act) + name = 'psb_dptins' + + locix=1 + locjx=1 + icontxt = desc_a%matrix_data(psb_ctxt_) + dectype = desc_a%matrix_data(psb_dec_type_) + mglob = desc_a%matrix_data(psb_m_) + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow.eq.-1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol.ne.1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name,int_err) + goto 9999 + endif + + if (.not.psb_is_bld_dec(dectype)) then + info = 3110 + call psb_errpush(info,name) + goto 9999 + endif + + allocate(workarea(1),prcv(nprow),stat=info) + if (info.ne.0) then + info = 2023 + call psb_errpush(info,name) + goto 9999 + end if + call psb_spall(a,size(blck%aspk),info) + if (info.ne.0) then + info = 4010 + ch_err='spall' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + allocated_prcv = 1 + a%infoa(:) = 0 + a%fida = 'coo' + a%descra = 'gun' + n_row = desc_a%matrix_data(psb_n_row_) + nnza = a%infoa(psb_nnz_) + m = blck%m + n = blck%k + row = ia + i = 1 + do while (i.le.m) + !loop over all blck's rows + ! row actual block row + + row = locix+i-1 + glob_row = ia+i-1 + if (glob_row > mglob) exit + if (debug) then + write(0,*) 'ptins: inserting ',glob_row + endif + k = desc_a%glob_to_loc(glob_row) + if (k.gt.0) then + start_row = row + first_loc_row = k +!!$ do while ((i.lt.m).and.& +!!$ & (desc_a%glob_to_loc(ia+i).gt.0)) +!!$ i=i+1 +!!$ enddo + do + if (i>=m) exit + if ((ia+i)>mglob) exit + if (desc_a%glob_to_loc(ia+i) <=0 ) exit + i=i+1 + enddo + + end_row=locix+i-1 + ! insert blck submatrix in 'coo' format + call dcsins(end_row-start_row+1,n,a%fida,a%descra,a%aspk,& + & a%ia1,a%ia2,a%infoa,first_loc_row, ja,& + & size(a%aspk),size(a%ia1),size(a%ia2),& + & blck%fida,blck%descra,blck%aspk,blck%ia1,blck%ia2,& + & blck%infoa,start_row,locjx,workarea,size(workarea),& + & info) + + if (info.ne.0) then + + if (info.eq.60) then + ! try reallocating + irlc = irlcmin + do while (info.eq.60) + if (debug) write(*,*) "attempting reallocation with",irlc + + isize = size(a%ia1) + allocate(tia1(isize+irlc),stat=info) + if (info.ne.0) goto 9998 + tia1(1:isize) = a%ia1(1:isize) + deallocate(a%ia1,stat=info) + if (info.ne.0) goto 9998 + a%ia1 => tia1 + nullify(tia1) + + isize = size(a%ia2) + allocate(tia2(isize+irlc),stat=info) + if (info.ne.0) goto 9998 + tia2(1:isize) = a%ia2(1:isize) + deallocate(a%ia2,stat=info) + if (info.ne.0) goto 9998 + a%ia2 => tia2 + nullify(tia2) + + isize = size(a%aspk) + allocate(taspk(isize+irlc),stat=info) + if (info.ne.0) goto 9998 + taspk(1:isize) = a%aspk(1:isize) + deallocate(a%aspk,stat=info) + if (info.ne.0) goto 9998 + a%aspk => taspk + nullify(taspk) + +9998 if (info.ne.0) then + info=4000 + call psb_errpush(info,name) + goto 9999 + end if + + ! insert blck submatrix in 'coo' format + call dcsins(end_row-start_row+1,n,a%fida,a%descra,a%aspk,& + & a%ia1,a%ia2,a%infoa,first_loc_row, ja,& + & size(a%aspk), size(a%ia1),size(a%ia2),& + & blck%fida,blck%descra,blck%aspk,blck%ia1,blck%ia2,& + & blck%infoa,start_row, locjx,workarea,size(workarea),& + & info) + + if (info.eq.60) irlc = irlc*2 + enddo + ! if we get here, it means we succesfully reallocated. + nrlc = nrlc+1 + if (nrlc .ge. nrlcthr) then + nrlc = 0 + irlcmin = irlcmin * 2 + endif + + else + info = 4010 + ch_err='spall' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + endif + endif + endif + ! next blck's row + i=i+1 + enddo + + + if (.not.associated(desc_a%halo_index)) then + allocate(desc_a%halo_index(irlcmin)) + desc_a%halo_index(:) = -1 + endif + pnt_halo=1 + do while (desc_a%halo_index(pnt_halo) .ne. -1 ) + pnt_halo = pnt_halo + 1 + end do + ncol = desc_a%matrix_data(psb_n_col_) + + isize = size(desc_a%halo_index) + do i = nnza+1,a%infoa(psb_nnz_) + ip = a%ia2(i) + k = desc_a%glob_to_loc(ip) + if (k.lt.-nprow) then + k = k + nprow + k = - k - 1 + ncol = ncol + 1 + desc_a%glob_to_loc(ip) = ncol + isize = size(desc_a%loc_to_glob) + if (ncol > isize) then + nh = ncol + irlcmin + call psb_realloc(nh,desc_a%loc_to_glob,info,pad=-1) + if (me==0) then + if (debug) write(0,*) 'done realloc ',nh + end if + if (info /= 0) then + info=4000 + call psb_errpush(info,name) + goto 9999 + end if + isize = nh + endif + desc_a%loc_to_glob(ncol) = ip + isize = size(desc_a%halo_index) + if ((pnt_halo+3).gt.isize) then + nh = isize + irlcmin + call psb_realloc(nh,desc_a%halo_index,info,pad=-1) + if (info /= 0) then + info=4000 + call psb_errpush(info,name) + goto 9999 + end if + isize = nh + endif + desc_a%halo_index(pnt_halo) = k + desc_a%halo_index(pnt_halo+1) = 1 + desc_a%halo_index(pnt_halo+2) = ncol + pnt_halo = pnt_halo + 3 + endif + enddo + + desc_a%matrix_data(psb_n_col_) = ncol + + + + if (allocated_prcv.eq.1) then + call psb_spfree(a,info) + if (info /= 0) then + info=4000 + call psb_errpush(info,name) + goto 9999 + end if + deallocate(prcv,workarea,stat=info) + endif + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_ret) then + return + else + call psb_error(icontxt) + end if + return +end subroutine psb_dptins + diff --git a/src/tools/psb_dscall.f90 b/src/tools/psb_dscall.f90 new file mode 100644 index 00000000..2e42dcf7 --- /dev/null +++ b/src/tools/psb_dscall.f90 @@ -0,0 +1,305 @@ +! File: psb_dscall.f90 +! +! Subroutine: psb_dscall +! Allocate descriptor +! and checks correctness of PARTS subroutine +! +! Parameters: +! m - integer. The number of rows. +! n - integer. The number of columns. +! parts - external subroutine. The routine that contains the partitioning scheme. +! icontxt - integer. The communication context. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code +subroutine psb_dscall(m, n, parts, icontxt, desc_a, info) + use psb_error_mod + use psb_descriptor_type + use psb_realloc_mod + use psb_serial_mod + use psb_const_mod + use psb_parts_mod + implicit None + !....Parameters... + Integer, intent(in) :: M,N,ICONTXT + Type(psb_desc_type), intent(out) :: desc_a + integer, intent(out) :: info + + !locals + Integer :: counter,i,j,nprow,npcol,me,mypcol,& + & loc_row,err,loc_col,nprocs,& + & l_ov_ix,l_ov_el,idx, err_act, itmpov, k + Integer :: INT_ERR(5),TEMP(1),EXCH(2) + Real(Kind(1.d0)) :: REAL_ERR(5) + Integer, Pointer :: PRC_V(:), TEMP_OVRLAP(:), OV_IDX(:),OV_EL(:) + logical, parameter :: debug=.false. + character(len=20) :: name, char_err + + info=0 + err=0 + name = 'psb_dscall' + call psb_erractionsave(err_act) + + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (debug) write(*,*) 'psb_dscall: ',nprow,npcol,me,mypcol + ! ....verify blacs grid correctness.. + if (npcol /= 1) then + info = 2030 + err=info + int_err(1) = npcol + call psb_errpush(err,name,int_err) + goto 9999 + endif + + + !... check m and n parameters.... + if (m.lt.1) then + info = 10 + err=info + int_err(1) = 1 + int_err(2) = m + call psb_errpush(err,name,int_err) + goto 9999 + else if (n.lt.1) then + info = 10 + err=info + int_err(1) = 2 + int_err(2) = n + call psb_errpush(err,name,int_err) + goto 9999 + endif + + + if (debug) write(*,*) 'psb_dscall: doing global checks' + !global check on m and n parameters + if (me.eq.root) then + exch(1)=m + exch(2)=n + call igebs2d(icontxt,psb_all_,psb_topdef_, itwo,ione, exch, itwo) + else + call igebr2d(icontxt,psb_all_,psb_topdef_, itwo,ione, exch, itwo, root,& + & 0) + if (exch(1) /= m) then + err=550 + int_err(1)=1 + call psb_errpush(err,name,int_err) + goto 9999 + else if (exch(2) /= n) then + err=550 + int_err(1)=2 + call psb_errpush(err,name,int_err) + goto 9999 + endif + endif + + call psb_nullify_desc(desc_a) + + !count local rows number + ! allocate work vector + allocate(prc_v(nprow),desc_a%glob_to_loc(m),& + &desc_a%matrix_data(psb_mdata_size_),temp_ovrlap(m),stat=info) + if (info /= no_err) then + info=2025 + err=info + int_err(1)=m + call psb_errpush(err,name,int_err) + goto 9999 + endif + + + if (debug) write(*,*) 'PSB_DSCALL: starting main loop' ,info + counter = 0 + itmpov = 0 + temp_ovrlap(:) = -1 + do i=1,m + if (info.eq.0) then + call parts(i,m,nprow,prc_v,nprocs) + if (nprocs.gt.nprow) then + info=570 + int_err(1)=3 + int_err(2)=nprow + int_err(3)=nprocs + int_err(4)=i + err=info + call psb_errpush(err,name,int_err) + goto 9999 + else if (nprocs.le.0) then + info=575 + int_err(1)=3 + int_err(2)=nprocs + int_err(3)=i + err=info + call psb_errpush(err,name,int_err) + goto 9999 + else + do j=1,nprocs + if ((prc_v(j).gt.nprow-1).or.(prc_v(j).lt.0)) then + info=580 + int_err(1)=3 + int_err(2)=prc_v(j) + int_err(3)=i + err=info + call psb_errpush(err,name,int_err) + goto 9999 + end if + end do + endif + desc_a%glob_to_loc(i) = -(nprow+prc_v(1)+1) + j=1 +!!$ do while ((j.le.nprocs).and.(prc_v(j).ne.me)) + do + if (j > nprocs) exit + if (prc_v(j) == me) exit + j=j+1 + enddo + if (j.le.nprocs) then + if (prc_v(j).eq.me) then + ! this point belongs to me + counter=counter+1 + desc_a%glob_to_loc(i) = counter + if (nprocs.gt.1) then + if ((itmpov+2+nprocs).gt.m) then + info=2025 + int_err(1)=m + err=info + call psb_errpush(err,name,int_err) + goto 9999 + else + itmpov = itmpov + 1 + temp_ovrlap(itmpov) = i + itmpov = itmpov + 1 + temp_ovrlap(itmpov) = nprocs + temp_ovrlap(itmpov+1:itmpov+nprocs) = prc_v(1:nprocs) + itmpov = itmpov + nprocs + endif + endif + end if + end if + endif + enddo + + loc_row=counter + ! check on parts function + if (debug) write(*,*) 'PSB_DSCALL: End main loop:' ,loc_row,itmpov,info + + + if (debug) write(*,*) 'PSB_DSCALL: error check:' ,err + + l_ov_ix=0 + l_ov_el=0 + i = 1 + do while (temp_ovrlap(i) /= -1) + idx = temp_ovrlap(i) + i=i+1 + nprocs = temp_ovrlap(i) + i = i + 1 + l_ov_ix = l_ov_ix+3*(nprocs-1) + l_ov_el = l_ov_el + 2 + i = i + nprocs + enddo + + l_ov_ix = l_ov_ix+3 + l_ov_el = l_ov_el+3 + + if (debug) write(*,*) 'PSB_DSCALL: Ov len',l_ov_ix,l_ov_el + call psb_realloc(l_ov_ix,ov_idx,info) + call psb_realloc(l_ov_el,ov_el,info) + if (info /= no_err) then + info=4010 + char_err='psb_realloc' + err=info + call psb_errpush(err,name,a_err=char_err) + goto 9999 + end if + + l_ov_ix=0 + l_ov_el=0 + i = 1 + do while (temp_ovrlap(i) /= -1) + idx = temp_ovrlap(i) + i = i+1 + nprocs = temp_ovrlap(i) + ov_el(l_ov_el+1) = idx + ov_el(l_ov_el+2) = nprocs + l_ov_el = l_ov_el+2 + do j=1, nprocs + if (temp_ovrlap(i+j) /= me) then + ov_idx(l_ov_ix+1) = temp_ovrlap(i+j) + ov_idx(l_ov_ix+2) = 1 + ov_idx(l_ov_ix+3) = idx + l_ov_ix = l_ov_ix+3 + endif + enddo + i = i + nprocs +1 + enddo + l_ov_el = l_ov_el + 1 + ov_el(l_ov_el) = -1 + l_ov_ix = l_ov_ix + 1 + ov_idx(l_ov_ix) = -1 + + desc_a%ovrlap_index => ov_idx + desc_a%ovrlap_elem => ov_el + deallocate(prc_v,temp_ovrlap,stat=info) + if (info /= no_err) then + info=4000 + err=info + call psb_errpush(err,name) + Goto 9999 + endif + ! estimate local cols number + loc_col=int((psb_colrow_+1.d0)*loc_row)+1 + allocate(desc_a%loc_to_glob(loc_col),& + &desc_a%lprm(1),stat=info) + call psb_realloc(1, desc_a%lprm, info) + call psb_realloc(loc_col, desc_a%loc_to_glob, info) + if (info /= no_err) then + info=2025 + char_err='psb_realloc' + call psb_errpush(err,name,a_err=char_err) + Goto 9999 + end if + + ! set LOC_TO_GLOB array to all "-1" values + desc_a%lprm(1) = 0 + desc_a%loc_to_glob(:) = -1 + do i=1,m + k = desc_a%glob_to_loc(i) + if (k.gt.0) then + desc_a%loc_to_glob(k) = i + endif + enddo + nullify(desc_a%bnd_elem,desc_a%halo_index) + +!!$ if (debug) write(*,*) 'PSB_DSCALL: Last bits in desc_a', loc_row,k + ! set fields in desc_a%MATRIX_DATA.... + desc_a%matrix_data(psb_n_row_) = loc_row + desc_a%matrix_data(psb_n_col_) = loc_row + + call psb_realloc(1,desc_a%halo_index, info) + if (info /= no_err) then + info=2025 + char_err='psb_realloc' + call psb_errpush(err,name,a_err=char_err) + Goto 9999 + end if + + desc_a%halo_index(:) = -1 + + + desc_a%matrix_data(psb_m_) = m + desc_a%matrix_data(psb_n_) = n + desc_a%matrix_data(psb_dec_type_) = psb_desc_bld_ + desc_a%matrix_data(psb_ctxt_) = icontxt + call blacs_get(icontxt,10,desc_a%matrix_data(psb_mpi_c_)) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return + +end subroutine psb_dscall diff --git a/src/tools/psb_dscalv.f90 b/src/tools/psb_dscalv.f90 new file mode 100644 index 00000000..c33d37e8 --- /dev/null +++ b/src/tools/psb_dscalv.f90 @@ -0,0 +1,268 @@ +! File: psb_dscalv.f90 +! +! Subroutine: psb_dscalv +! Allocate descriptor +! and checks correctness of PARTS subroutine +! +! Parameters: +! m - integer. The number of rows. +! v - integer, dimension(:). The array containg the partitioning scheme. +! icontxt - integer. The communication context. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code +! flag - integer. ??? +subroutine psb_dscalv(m, v, icontxt, desc_a, info, flag) + use psb_descriptor_type + use psb_serial_mod + use psb_const_mod + use psb_error_mod + implicit None + !....Parameters... + Integer, intent(in) :: m,icontxt, v(:) + integer, intent(in), optional :: flag + integer, intent(out) :: info + type(psb_desc_type), intent(out) :: desc_a + + !locals + Integer :: counter,i,j,nprow,npcol,me,mypcol,& + & loc_row,err,loc_col,nprocs,n,itmpov, k,& + & l_ov_ix,l_ov_el,idx, flag_, err_act + Integer :: INT_ERR(5),TEMP(1),EXCH(2) + Real(Kind(1.d0)) :: REAL_ERR(5) + Integer, Pointer :: temp_ovrlap(:), ov_idx(:),ov_el(:) + logical, parameter :: debug=.false. + character(len=20) :: name, ch_err + + info=0 + err=0 + name = 'psb_dscalv' + + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (debug) write(*,*) 'psb_dscall: ',nprow,npcol,me,mypcol + ! ....verify blacs grid correctness.. + if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + n = m + !... check m and n parameters.... + if (m < 1) then + info = 10 + int_err(1) = 1 + int_err(2) = m + else if (n < 1) then + info = 10 + int_err(1) = 2 + int_err(2) = n + else if (size(v)1)) then + info = 6 + err=info + call psb_errpush(info,name) + goto 9999 + end if + + !count local rows number + ! allocate work vector + allocate(desc_a%glob_to_loc(m),desc_a%matrix_data(psb_mdata_size_),& + &temp_ovrlap(m),stat=info) + if (info /= 0) then + info=2025 + int_err(1)=m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + + if (debug) write(*,*) 'PSB_DSCALL: starting main loop' ,info + counter = 0 + itmpov = 0 + temp_ovrlap(:) = -1 + do i=1,m + + if (((v(i)-flag_) > nprow-1).or.((v(i)-flag_) < 0)) then + info=580 + int_err(1)=3 + int_err(2)=v(i) - flag_ + int_err(3)=i + exit + end if + + if ((v(i)-flag_) == me) then + ! this point belongs to me + counter=counter+1 + desc_a%glob_to_loc(i) = counter + else + desc_a%glob_to_loc(i) = -(nprow+(v(i)-flag_)+1) + end if + enddo + + loc_row=counter + ! check on parts function + if (debug) write(*,*) 'PSB_DSCALL: End main loop:' ,loc_row,itmpov,info + + if (info /= 0) then + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (debug) write(*,*) 'PSB_DSCALL: error check:' ,err + + l_ov_ix=0 + l_ov_el=0 + i = 1 + do while (temp_ovrlap(i) /= -1) + idx = temp_ovrlap(i) + i=i+1 + nprocs = temp_ovrlap(i) + i = i + 1 + l_ov_ix = l_ov_ix+3*(nprocs-1) + l_ov_el = l_ov_el + 2 + i = i + nprocs + enddo + + l_ov_ix = l_ov_ix+3 + l_ov_el = l_ov_el+3 + + if (debug) write(*,*) 'PSB_DSCALL: Ov len',l_ov_ix,l_ov_el + allocate(ov_idx(l_ov_ix),ov_el(l_ov_el), stat=info) + if (info /= 0) then + info=2025 + int_err(1)=loc_col + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + l_ov_ix=0 + l_ov_el=0 + i = 1 + do while (temp_ovrlap(i) /= -1) + idx = temp_ovrlap(i) + i = i+1 + nprocs = temp_ovrlap(i) + ov_el(l_ov_el+1) = idx + ov_el(l_ov_el+2) = nprocs + l_ov_el = l_ov_el+2 + do j=1, nprocs + if (temp_ovrlap(i+j) /= me) then + ov_idx(l_ov_ix+1) = temp_ovrlap(i+j) + ov_idx(l_ov_ix+2) = 1 + ov_idx(l_ov_ix+3) = idx + l_ov_ix = l_ov_ix+3 + endif + enddo + i = i + nprocs +1 + enddo + l_ov_el = l_ov_el + 1 + ov_el(l_ov_el) = -1 + l_ov_ix = l_ov_ix + 1 + ov_idx(l_ov_ix) = -1 + + desc_a%ovrlap_index => ov_idx + desc_a%ovrlap_elem => ov_el + deallocate(temp_ovrlap,stat=info) + if (info /= 0) then + info=4000 + call psb_errpush(info,name) + goto 9999 + endif + + ! estimate local cols number + loc_col=int((psb_colrow_+1.d0)*loc_row)+1 + allocate(desc_a%loc_to_glob(loc_col),& + &desc_a%lprm(1),stat=info) + if (info /= 0) then + info=2025 + int_err(1)=loc_col + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + ! set LOC_TO_GLOB array to all "-1" values + desc_a%lprm(1) = 0 + desc_a%loc_to_glob(:) = -1 + do i=1,m + k = desc_a%glob_to_loc(i) + if (k.gt.0) then + desc_a%loc_to_glob(k) = i + endif + enddo + nullify(desc_a%bnd_elem,desc_a%halo_index) + +!!$ if (debug) write(*,*) 'PSB_DSCALL: Last bits in desc_a', loc_row,k + ! set fields in desc_a%MATRIX_DATA.... + desc_a%matrix_data(psb_n_row_) = loc_row + desc_a%matrix_data(psb_n_col_) = loc_row + + allocate(desc_a%halo_index(1),stat=info) + if (info /= 0) then + info=4000 + call psb_errpush(info,name) + goto 9999 + endif + + desc_a%halo_index(:) = -1 + + + desc_a%matrix_data(psb_m_) = m + desc_a%matrix_data(psb_n_) = n + desc_a%matrix_data(psb_dec_type_) = psb_desc_bld_ + desc_a%matrix_data(psb_ctxt_) = icontxt + call blacs_get(icontxt,10,desc_a%matrix_data(psb_mpi_c_)) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return + +end subroutine psb_dscalv diff --git a/src/tools/psb_dscasb.f90 b/src/tools/psb_dscasb.f90 new file mode 100644 index 00000000..5a9e1a0e --- /dev/null +++ b/src/tools/psb_dscasb.f90 @@ -0,0 +1,200 @@ +! File: psb_dscasb.f90 +! +! Subroutine: psb_dscasb +! Assembly the psblas communications descriptor. +! +! Parameters: +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +subroutine psb_dscasb(desc_a,info) + use psb_descriptor_type + use psb_serial_mod + use psb_const_mod + use psi_mod + use psb_error_mod + implicit none + + !...Parameters.... + type(psb_desc_type), intent(inout) :: desc_a + integer, intent(out) :: info + + + !....Locals.... + integer :: int_err(5), itemp(2) + integer,pointer :: ovrlap_index(:),halo_index(:) + integer :: i,err,nprow,npcol,me,mypcol,& + & lovrlap,lhalo,nhalo,novrlap,max_size,max_halo,n_col,ldesc_halo,& + & ldesc_ovrlap, dectype, err_act + integer :: icontxt,temp(1),n_row + integer, parameter :: ione=1, itwo=2 + logical, parameter :: debug=.false., debugwrt=.false. + character(len=20) :: name,ch_err + + info = 0 + int_err(1) = 0 + name = 'psb_dscasb' + + call psb_erractionsave(err_act) + + icontxt = desc_a%matrix_data(psb_ctxt_) + dectype = desc_a%matrix_data(psb_dec_type_) + n_row = desc_a%matrix_data(psb_n_row_) + n_col = desc_a%matrix_data(psb_n_col_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow == -1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + if (.not.is_ok_dec(dectype)) then + info = 600 + int_err(1) = dectype + call psb_errpush(info,name) + goto 9999 + endif + + if (debug) write (0, *) ' Begin matrix assembly...' + + if (is_bld_dec(dectype)) then + if (debug) write(0,*) 'psb_dscasb: Checking rows insertion' + ! check if all local row are inserted + do i=1,desc_a%matrix_data(psb_n_col_) + if (desc_a%loc_to_glob(i) < 0) then + info=3100 + exit + endif + enddo + + if (info /= no_err) then + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + + ! comm desc_size is size requested for temporary comm descriptors + ! (expressed in No of dble element) + ldesc_halo = (((3*(n_col-n_row)+1)+1)) + ovrlap_index => desc_a%ovrlap_index + nullify(desc_a%ovrlap_index) + halo_index => desc_a%halo_index + nullify(desc_a%halo_index) + + lhalo = 1 + do while (halo_index(lhalo) /= -1) + lhalo = lhalo + 1 + enddo + nhalo = (lhalo-1)/3 + lovrlap=1 + do while (ovrlap_index(lovrlap) /= -1) + lovrlap=lovrlap+1 + enddo + novrlap = (lovrlap-1)/3 + + ! Allocate final comm PSBLAS descriptors + ! compute necessary dimension of halo index + max_halo = max(nhalo,1) + max_size = max(1,min(3*desc_a%matrix_data(psb_n_row_),novrlap*3)) + + itemp(1) = max_size + itemp(2) = max_halo + call igamx2d(icontxt, all, topdef, itwo, ione, itemp,& + & itwo,temp ,temp,-ione ,-ione,-ione) + max_size = itemp(1) + max_halo = itemp(2) + + ldesc_halo = 3*max_halo+3*nhalo+1 + + ! allocate HALO_INDEX field + call psb_realloc(ldesc_halo, desc_a%halo_index, info) + ! check on allocate + if (info /= no_err) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + endif + + ! compute necessary dimension of ovrlap index + ldesc_ovrlap = 2*lovrlap+1 + + ! allocate OVRLAP_INDEX field + call psb_realloc(ldesc_ovrlap, desc_a%ovrlap_index, info) + ! check on allocate + if (info /= no_err) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + endif + + + + if (debug) write(0,*) 'psb_dscasb: converting indexes',& + & nhalo,lhalo,halo_index(lhalo) + !.... convert comunication stuctures.... + ! first the halo index + call psi_crea_index(desc_a,halo_index,& + & desc_a%halo_index,.false.,info) + if(info.ne.0) then + call psb_errpush(4010,name,a_err='psi_crea_index') + goto 9999 + end if + + ! then the overlap index + call psi_crea_index(desc_a,ovrlap_index,& + & desc_a%ovrlap_index,.true.,info) + if(info.ne.0) then + call psb_errpush(4010,name,a_err='psi_crea_index') + goto 9999 + end if + + ! next is the ovrlap_elem index + call psi_crea_ovr_elem(desc_a%ovrlap_index,desc_a%ovrlap_elem) + + ! finally bnd_elem + call psi_crea_bnd_elem(desc_a,info) + if(info.ne.0) then + call psb_errpush(4010,name,a_err='psi_crea_bnd_elem') + goto 9999 + end if + + ! Ok, register into MATRIX_DATA & free temporary work areas + desc_a%matrix_data(psb_dec_type_) = desc_asb + + deallocate(halo_index,ovrlap_index, stat=info) + if (info /= 0) then + info =4000 + call psb_errpush(info,name) + goto 9999 + end if + + else + info = 600 + call psb_errpush(info,name) + goto 9999 + if (debug) write(0,*) 'dectype 2 :',dectype,desc_bld,& + &desc_asb,desc_upd + endif + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_ret) then + return + else + call psb_error(icontxt) + end if + return + +end subroutine psb_dscasb diff --git a/src/tools/psb_dsccpy.f90 b/src/tools/psb_dsccpy.f90 new file mode 100644 index 00000000..9debb2e7 --- /dev/null +++ b/src/tools/psb_dsccpy.f90 @@ -0,0 +1,206 @@ +! File: psb_dsccpy.f90 +! +! Subroutine: psb_dsccpy +! Produces a clone of a descriptor. +! +! Parameters: +! desc_out - type(). The output communication descriptor. +! desc_a - type(). The communication descriptor to be cloned. +! info - integer. Eventually returns an error code. +subroutine psb_dsccpy(desc_out, desc_a, info) + + use psb_descriptor_type + use psb_serial_mod + use psb_realloc_mod + use psb_const_mod + use psb_error_mod + + implicit none + !....parameters... + + type(psb_desc_type), intent(out) :: desc_out + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + + !locals + integer :: nprow,npcol,me,mypcol,& + & icontxt, isz, dectype, err_act, err + integer :: int_err(5),temp(1) + real(kind(1.d0)) :: real_err(5) + integer, parameter :: ione=1, itwo=2,root=0 + logical, parameter :: debug=.false. + character(len=20) :: name, char_err + + info=0 + call psb_erractionsave(err_act) + name = 'psb_dsccpy' + + icontxt=desc_a%matrix_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow.eq.-1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol.ne.1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name,int_err) + goto 9999 + endif + + call psb_nullify_desc(desc_out) + + if (associated(desc_a%matrix_data)) then + isz = size(desc_a%matrix_data) + ! allocate(desc_out%matrix_data(isz),stat=info) + call psb_realloc(isz,desc_out%matrix_data,info) + if(debug) write(0,*) 'dsccpy: m_data',isz,':',desc_a%matrix_data(:) + if (info.ne.0) then + info=4010 + char_err='psb_realloc' + call psb_errpush(info,name,a_err=char_err) + goto 9999 + else + desc_out%matrix_data(:) = desc_a%matrix_data(:) + endif + endif + + if (associated(desc_a%halo_index)) then + isz = size(desc_a%halo_index) + ! allocate(desc_out%matrix_data(isz),stat=info) + call psb_realloc(isz,desc_out%halo_index,info) + if(debug) write(0,*) 'dsccpy: h_idx',isz,':',desc_a%halo_index(:) + if (info.ne.0) then + info=4010 + char_err='psb_realloc' + call psb_errpush(info,name,a_err=char_err) + goto 9999 + else + desc_out%halo_index(:) = desc_a%halo_index(:) + endif + endif + + + if (associated(desc_a%bnd_elem)) then + isz = size(desc_a%bnd_elem) + ! allocate(desc_out%matrix_data(isz),stat=info) + call psb_realloc(isz,desc_out%bnd_elem,info) + if(debug) write(0,*) 'dsccpy: bnd_elem',isz,':',desc_a%bnd_elem(:) + if (info.ne.0) then + info=4010 + char_err='psb_realloc' + call psb_errpush(info,name,a_err=char_err) + goto 9999 + else + desc_out%bnd_elem(:) = desc_a%bnd_elem(:) + endif + endif + + + if (associated(desc_a%ovrlap_elem)) then + isz = size(desc_a%ovrlap_elem) + ! allocate(desc_out%matrix_data(isz),stat=info) + call psb_realloc(isz,desc_out%ovrlap_elem,info) + if(debug) write(0,*) 'dsccpy: ovrlap_elem',isz,':',desc_a%ovrlap_elem(:) + if (info.ne.0) then + info=4010 + char_err='psrealloc' + call psb_errpush(info,name,a_err=char_err) + goto 9999 + else + desc_out%ovrlap_elem(:) = desc_a%ovrlap_elem(:) + endif + endif + + if (associated(desc_a%ovrlap_index)) then + isz = size(desc_a%ovrlap_index) + ! allocate(desc_out%matrix_data(isz),stat=info) + call psb_realloc(isz,desc_out%ovrlap_index,info) + if(debug) write(0,*) 'dsccpy: ovrlap_index',isz,':',desc_a%ovrlap_index(:) + if (info.ne.0) then + info=4010 + char_err='psrealloc' + call psb_errpush(info,name,a_err=char_err) + goto 9999 + else + desc_out%ovrlap_index(:) = desc_a%ovrlap_index(:) + endif + endif + + + if (associated(desc_a%loc_to_glob)) then + isz = size(desc_a%loc_to_glob) + ! allocate(desc_out%matrix_data(isz),stat=info) + call psb_realloc(isz,desc_out%loc_to_glob,info) + if(debug) write(0,*) 'dsccpy: loc_to_glob',isz,':',desc_a%loc_to_glob(:) + if (info.ne.0) then + info=4010 + char_err='psrealloc' + call psb_errpush(info,name,a_err=char_err) + goto 9999 + else + desc_out%loc_to_glob(:) = desc_a%loc_to_glob(:) + endif + endif + + if (associated(desc_a%glob_to_loc)) then + isz = size(desc_a%glob_to_loc) + ! allocate(desc_out%matrix_data(isz),stat=info) + call psb_realloc(isz,desc_out%glob_to_loc,info) + if(debug) write(0,*) 'dsccpy: glob_to_loc',isz,':',desc_a%glob_to_loc(:) + if (info.ne.0) then + info=4010 + char_err='psrealloc' + call psb_errpush(info,name,a_err=char_err) + goto 9999 + else + desc_out%glob_to_loc(:) = desc_a%glob_to_loc(:) + endif + endif + + if (associated(desc_a%lprm)) then + isz = size(desc_a%lprm) + ! allocate(desc_out%matrix_data(isz),stat=info) + call psb_realloc(isz,desc_out%lprm,info) + if(debug) write(0,*) 'dsccpy: lprm',isz,':',desc_a%lprm(:) + if (info.ne.0) then + info=4010 + char_err='psb_realloc' + call psb_errpush(info,name,a_err=char_err) + goto 9999 + else + desc_out%lprm(:) = desc_a%lprm(:) + endif + endif + + if (associated(desc_a%idx_space)) then + isz = size(desc_a%idx_space) + ! allocate(desc_out%matrix_data(isz),stat=info) + call psb_realloc(isz,desc_out%idx_space,info) + if(debug) write(0,*) 'dsccpy: idx_space',isz,':',desc_a%idx_space(:) + if (info.ne.0) then + info=4010 + char_err='psb_realloc' + call psb_errpush(info,name,a_err=char_err) + goto 9999 + else + desc_out%idx_space(:) = desc_a%idx_space(:) + endif + endif + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_ret) then + return + else + call psb_error(icontxt) + end if + return + +end subroutine psb_dsccpy diff --git a/src/tools/psb_dscdec.f90 b/src/tools/psb_dscdec.f90 new file mode 100644 index 00000000..ef7ad22d --- /dev/null +++ b/src/tools/psb_dscdec.f90 @@ -0,0 +1,209 @@ +! --------------------------------------------------------------------- +! +! -- PSSBLAS routine (version 1.0) -- +! +! --------------------------------------------------------------------- +!/ +subroutine psb_dscdec(nloc, icontxt, desc_a, info) + + ! Purpose + ! ======= + ! + ! Allocate special descriptor for decoupled index space + ! + ! + ! + ! INPUT + !====== + ! NLOC : (Local Input) Integer + ! Number of local indices + ! required. + ! + ! ICONTXT : (Global Input)Integer BLACS context for an NPx1 grid + ! required. + ! + ! OUTPUT + !========= + ! desc_a : TYPEDESC + ! desc_a OUTPUT FIELDS: + ! + ! MATRIX_DATA : Pointer to integer Array + ! contains some + ! local and global information about matrix: + ! + ! NOTATION STORED IN EXPLANATION + ! ------------ ---------------------- ------------------------------------- + ! DEC_TYPE MATRIX_DATA[DEC_TYPE_] Decomposition type, temporarly is + ! setted to 1( matrix not yet assembled) + ! M MATRIX_DATA[M_] Total number of equations + ! N MATRIX_DATA[N_] Total number of variables + ! N_ROW MATRIX_DATA[N_ROW_] Number of local equations + ! N_COL MATRIX_DATA[N_COL_] Number of local columns (see below) + ! CTXT_A MATRIX_DATA[CTXT_] The BLACS context handle, + ! indicating + ! the global context of the operation + ! on the matrix. + ! The context itself is global. + ! + ! GLOB_TO_LOC Array of dimension equal to number of global + ! rows/cols (MATRIX_DATA[M_]). On exit, + ! for all global indices either: + ! 1. The index belongs to the current process; the entry + ! is set to the next free local row index. + ! 2. The index belongs to process P (0<=P<=NP-1); the entry + ! is set to + ! -(NP+P+1) + ! + ! LOC_TO_GLOB An array of dimension equal to number of local cols N_COL + ! i.e. all columns of the matrix such that there is at least + ! one nonzero entry within the local row range. At the time + ! this routine is called N_COL cannot be know, so we set + ! N_COL=N_ROW, and dimension this vector on N_ROW plus an + ! estimate. On exit the vector elements are set + ! to the index of the corresponding entry in GLOB_TO_LOC, or + ! to -1 for indices I>N_ROW. + ! + ! + ! HALO_INDEX Not touched here, as it depends on the matrix pattern + ! + ! OVRLAP_INDEX On exit from this routine, the overlap indices are stored in + ! triples (Proc, 1, Index), similar to the assembled format + ! but neither optimized, nor deadlock free. + ! List is terminated with -1 + ! + ! OVRLAP_ELEM On exit from this routine, just a list of pairs (index,#p). + ! List is terminated with -1. + ! + ! + ! END OF desc_a OUTPUT FIELDS + ! + ! + + use psb_descriptor_type + use psb_serial_mod + use psb_const_mod + use psb_error_mod + implicit None + !....Parameters... + Integer, intent(in) :: nloc,icontxt + integer, intent(out) :: info + Type(psb_desc_type), intent(out) :: desc_a + + !locals + Integer :: counter,i,j,nprow,npcol,me,mypcol,& + & loc_row,err,loc_col,nprocs,n,itmpov, k,& + & l_ov_ix,l_ov_el,idx, flag_, err_act,m, ip + Integer :: INT_ERR(5),TEMP(1),EXCH(2) + Real(Kind(1.d0)) :: REAL_ERR(5) + Integer, Parameter :: IONE=1, ITWO=2,ROOT=0 + Integer, Pointer :: temp_ovrlap(:), ov_idx(:), ov_el(:) + integer, allocatable :: nlv(:) + logical, parameter :: debug=.false. + character(len=20) :: name, ch_err + + info=0 + err=0 + name = 'psb_dscrep' + + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (debug) write(*,*) 'psb_dscall: ',nprow,npcol,me,mypcol + ! ....verify blacs grid correctness.. + if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + n = nloc + !... check nloc and n parameters.... + if (nloc < 1) then + info = 10 + int_err(1) = 1 + int_err(2) = nloc + else if (n < 1) then + info = 10 + int_err(1) = 2 + int_err(2) = n + endif + + if (info /= 0) then + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (debug) write(*,*) 'psb_dscall: doing global checks' + !global check on m and n parameters + + allocate(nlv(0:nprow-1)) + nlv(:) = 0 + nlv(me) = nloc + + call igsum2d(icontxt,'All',' ',nprow,1,nlv,nprow,-1,-1) + m = sum(nlv) + + + + call psb_nullify_desc(desc_a) + + + !count local rows number + ! allocate work vector + allocate(desc_a%glob_to_loc(m),desc_a%matrix_data(mdata_size),& + & desc_a%loc_to_glob(nloc),desc_a%lprm(1),& + & desc_a%ovrlap_index(1),desc_a%ovrlap_elem(1),& + & desc_a%halo_index(1),desc_a%bnd_elem(1),stat=info) + if (info /= 0) then + info=2025 + int_err(1)=m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + + + j = 1 + do ip=0, nprow-1 + if (ip==me) then + do i=1, nlv(ip) + desc_a%glob_to_loc(j) = i + desc_a%loc_to_glob(i) = j + j = j + 1 + enddo + else + do i=1, nlv(ip) + desc_a%glob_to_loc(j) = -(nprow+ip+1) + j = j + 1 + enddo + endif + enddo + + + + desc_a%lprm(:) = 0 + desc_a%halo_index(:) = -1 + desc_a%bnd_elem(:) = -1 + desc_a%ovrlap_index(:) = -1 + desc_a%ovrlap_elem(:) = -1 + + + desc_a%matrix_data(m_) = m + desc_a%matrix_data(n_) = m + desc_a%matrix_data(psb_n_row_) = nloc + desc_a%matrix_data(psb_n_col_) = nloc + desc_a%matrix_data(psb_dec_type_) = desc_asb + desc_a%matrix_data(psb_ctxt_) = icontxt + call blacs_get(icontxt,10,desc_a%matrix_data(mpi_c_)) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return + +end subroutine psb_dscdec diff --git a/src/tools/psb_dscfree.f90 b/src/tools/psb_dscfree.f90 new file mode 100644 index 00000000..ce695675 --- /dev/null +++ b/src/tools/psb_dscfree.f90 @@ -0,0 +1,154 @@ +! File: psb_dscfree.f90 +! +! Subroutine: psb_dscfree +! Frees a descriptor data structure. +! +! Parameters: +! desc_a - type(). The communication descriptor to be freed. +! info - integer. Eventually returns an error code. +subroutine psb_dscfree(desc_a,info) + !...free descriptor structure... + use psb_descriptor_type + use psb_const_mod + use psb_error_mod + implicit none + !....parameters... + type(psb_desc_type), intent(inout) :: desc_a + integer, intent(out) :: info + !...locals.... + integer :: int_err(5) + integer :: temp(1) + real(kind(1.d0)) :: real_err(5) + integer :: icontxt,nprow,npcol,me,mypcol, err_act + integer,parameter :: ione=1 + character(len=20) :: name, char_err + + info=0 + call psb_erractionsave(err_act) + name = 'psb_dscfree' + + + if (.not.associated(desc_a%matrix_data)) then + info=295 + call psb_errpush(info,name) + return + end if + + icontxt=desc_a%matrix_data(psb_ctxt_) + deallocate(desc_a%matrix_data) + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + ! ....verify blacs grid correctness.. + if (nprow.eq.-1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol.ne.1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name,int_err) + goto 9999 + endif + + !...deallocate desc_a.... + if(.not.associated(desc_a%loc_to_glob)) then + info=295 + call psb_errpush(info,name) + goto 9999 + end if + + !deallocate loc_to_glob field + deallocate(desc_a%loc_to_glob,stat=info) + if (info /= 0) then + info=2051 + call psb_errpush(info,name) + goto 9999 + end if + + if (.not.associated(desc_a%glob_to_loc)) then + info=295 + call psb_errpush(info,name) + goto 9999 + end if + + !deallocate glob_to_loc field + deallocate(desc_a%glob_to_loc,stat=info) + if (info /= 0) then + info=2052 + call psb_errpush(info,name) + goto 9999 + end if + + if (.not.associated(desc_a%halo_index)) then + info=295 + call psb_errpush(info,name) + goto 9999 + end if + + !deallocate halo_index field + deallocate(desc_a%halo_index,stat=info) + if (info /= 0) then + info=2053 + call psb_errpush(info,name) + goto 9999 + end if + + if (.not.associated(desc_a%bnd_elem)) then + info=296 + call psb_errpush(info,name) + goto 9999 + end if + + !deallocate halo_index field + deallocate(desc_a%bnd_elem,stat=info) + if (info /= 0) then + info=2054 + call psb_errpush(info,name) + goto 9999 + end if + + if (.not.associated(desc_a%ovrlap_index)) then + info=295 + call psb_errpush(info,name) + goto 9999 + end if + + !deallocate ovrlap_index field + deallocate(desc_a%ovrlap_index,stat=info) + if (info /= 0) then + info=2055 + call psb_errpush(info,name) + goto 9999 + end if + + !deallocate ovrlap_elem field + deallocate(desc_a%ovrlap_elem,stat=info) + if (info /= 0) then + info=2056 + call psb_errpush(info,name) + goto 9999 + end if + + !deallocate ovrlap_index field + deallocate(desc_a%lprm,stat=info) + if (info /= 0) then + info=2057 + call psb_errpush(info,name) + goto 9999 + end if + + call psb_nullify_desc(desc_a) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_ret) then + return + else + call psb_error(icontxt) + end if + return + +end subroutine psb_dscfree diff --git a/src/tools/psb_dscins.f90 b/src/tools/psb_dscins.f90 new file mode 100644 index 00000000..711b02c7 --- /dev/null +++ b/src/tools/psb_dscins.f90 @@ -0,0 +1,161 @@ +! File: psb_dscins.f90 +! +! Subroutine: psb_dscins +! Takes as input a cloud of points and updates the descriptor accordingly. +! +! Parameters: +! nz - integer. The number of points to insert. +! ia - integer,dimension(:). The row indices of the points. +! ja - integer,dimension(:). The column indices of the points. +! desc_a - type(). The communication descriptor to be freed. +! info - integer. Eventually returns an error code. +! is - integer(optional). The row offset. +! js - integer(optional). The column offset. +subroutine psb_dscins(nz,ia,ja,desc_a,info,is,js) + + use psb_descriptor_type + use psb_serial_mod + use psb_const_mod + use psb_error_mod + implicit none + + !....PARAMETERS... + Type(psb_desc_type), intent(inout) :: desc_a + Integer, intent(in) :: nz,IA(:),JA(:) + integer, intent(out) :: info + integer, intent(in), optional :: is,js + + !LOCALS..... + + integer :: i,icontxt,nprocs ,glob_row,row,k,start_row,end_row,& + & first_loc_row,j, ierror,locix,locjx,& + & dectype,mglob, nnza, nglob,err + integer,pointer :: tia1(:),tia2(:), temp(:) + integer :: nprow,npcol, me ,mypcol, iflag, isize, irlc + integer :: m,n, pnt_halo,nrow,ncol, nh, ip,jp, err_act + logical, parameter :: debug=.false. + integer, parameter :: relocsz=200 + character(len=20) :: name,ch_err + + info = 0 + name = 'psb_dscins' + call psb_erractionsave(err_act) + + icontxt = desc_a%matrix_data(psb_ctxt_) + dectype = desc_a%matrix_data(psb_dec_type_) + mglob = desc_a%matrix_data(m_) + nglob = desc_a%matrix_data(n_) + nrow = desc_a%matrix_data(psb_n_row_) + ncol = desc_a%matrix_data(psb_n_col_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (npcol.ne.1) then + info = 2030 + call psb_errpush(info,name) + goto 9999 + endif + if (.not.is_bld_dec(dectype)) then + info = 3110 + call psb_errpush(info,name) + goto 9999 + endif + + if (nz <= 0) then + info = 1111 + call psb_errpush(info,name) + goto 9999 + end if + if (size(ia) < nz) then + info = 1111 + call psb_errpush(info,name) + goto 9999 + end if + + if (size(ja) < nz) then + info = 1111 + call psb_errpush(info,name) + goto 9999 + end if + + + if (.not.associated(desc_a%halo_index)) then + allocate(desc_a%halo_index(relocsz)) + desc_a%halo_index(:) = -1 + endif + pnt_halo=1 + do while (desc_a%halo_index(pnt_halo) .ne. -1 ) + pnt_halo = pnt_halo + 1 + end do + isize = size(desc_a%halo_index) + + do i = 1, nz + ip = ia(i) + jp = ja(i) + if ((ip < 1 ).or.(ip>mglob).or.(jp<1).or.(jp>mglob)) then +! write(0,*) 'wrong input ',i,ip,jp + info = 1133 + call psb_errpush(info,name) + goto 9999 + endif + if ((1<=desc_a%glob_to_loc(ip)).and.(desc_a%glob_to_loc(ip))<=nrow) then + k = desc_a%glob_to_loc(jp) + if (k.lt.-nprow) then + k = k + nprow + k = - k - 1 + ncol = ncol + 1 + desc_a%glob_to_loc(jp) = ncol + isize = size(desc_a%loc_to_glob) + if (ncol > isize) then + nh = ncol + max(nz,relocsz) + call psb_realloc(nh,desc_a%loc_to_glob,info,pad=-1) + if (me==0) then + if (debug) write(0,*) 'done realloc ',nh + end if + if (info /= 0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name) + goto 9999 + end if + isize = nh + endif + desc_a%loc_to_glob(ncol) = jp + isize = size(desc_a%halo_index) + if ((pnt_halo+3).gt.isize) then + nh = isize + max(nz,relocsz) + call psb_realloc(nh,desc_a%halo_index,info,pad=-1) + if (info /= 0) then + info=4010 + ch_err='psb_realloc' + call psb_errpush(info,name) + goto 9999 + end if + isize = nh + endif + desc_a%halo_index(pnt_halo) = k + desc_a%halo_index(pnt_halo+1) = 1 + desc_a%halo_index(pnt_halo+2) = ncol + pnt_halo = pnt_halo + 3 + endif + else + ! currently we ignore items not belonging to us. + endif + enddo + desc_a%matrix_data(psb_n_col_) = ncol + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_ret) then + return + else + call psb_error(icontxt) + end if + return + +end subroutine psb_dscins + diff --git a/src/tools/psb_dscov.f90 b/src/tools/psb_dscov.f90 new file mode 100644 index 00000000..30f572d3 --- /dev/null +++ b/src/tools/psb_dscov.f90 @@ -0,0 +1,206 @@ +! File: psb_dscov.f90 +! +! Subroutine: psb_dscov +! This routine takes a matrix A with its descriptor, and builds the +! auxiliary descriptor corresponding to the number of overlap levels +! specified on input. It really is just a size estimation/allocation +! front end for . +! +! Parameters: +! a - type(). The input sparse matrix. +! desc_a - type(). The input communication descriptor. +! norv - integer. The number of overlap levels. +! desc_ov - type(). The auxiliary output communication descriptor. +! info - integer. Eventually returns an error code. +! +Subroutine psb_dscov(a,desc_a,novr,desc_ov,info) + + use psb_serial_mod + use psb_descriptor_type + Use psb_prec_type + Use psb_prec_mod + use psb_error_mod + Implicit None + + ! .. Array Arguments .. + integer, intent(in) :: novr + Type(psb_dspmat_type), Intent(in) :: a + Type(psb_desc_type), Intent(in) :: desc_a + Type(psb_desc_type), Intent(inout) :: desc_ov + integer, intent(out) :: info + + real(kind(1.d0)) :: t1,t2,t3,mpi_wtime + external mpi_wtime + integer idscb,idsce,iovrb,iovre, ierr, irank, icomm, err_act +!!$ integer mpe_log_get_event_number,mpe_Describe_state,mpe_log_event + + interface psb_psdsccpy + subroutine psb_dsccpy(desc_out,desc_a,info) + use psb_descriptor_type + type(psb_desc_type), intent(out) :: desc_out + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + end subroutine psb_dsccpy + end interface + + interface psb_descasb + subroutine psb_descasb(n_ovr,desc_p,desc_a,a,l_tmp_halo,& + & l_tmp_ovr_idx,lworks,lworkr,info) + use psb_prec_type + use psb_spmat_type + type(psb_dspmat_type),intent(in) :: a + type(psb_desc_type),intent(in) :: desc_a + type(psb_desc_type),intent(inout) :: desc_p + integer,intent(in) :: n_ovr + integer, intent(in) :: l_tmp_halo,l_tmp_ovr_idx + integer, intent(inout) :: lworks, lworkr + integer, intent(out) :: info + end subroutine psb_descasb + end interface + + + + ! .. Local Scalars .. + Integer :: i, j, k, nprow,npcol, me, mycol,m,nnzero,& + & icontxt, lovr, lelem,lworks,lworkr, n_col, int_err(5),& + & n_row,index_dim,elem_dim, l_tmp_ovr_idx,l_tmp_halo, nztot,nhalo + Logical,Parameter :: debug=.false. + character(len=20) :: name, ch_err + + name='psb_dscov' + info = 0 + call psb_erractionsave(err_act) + + icontxt=desc_a%matrix_data(psb_ctxt_) + + Call blacs_gridinfo(icontxt,nprow,npcol,me,mycol) + + If(debug) Write(0,*)'in psb_dscov',novr + + m=desc_a%matrix_data(psb_n_row_) + nnzero=Size(a%aspk) + n_col=desc_a%matrix_data(psb_n_col_) + nhalo = n_col-m + If(debug) Write(0,*)'IN DSCOV1',novr ,m,nnzero,n_col + if (novr<0) then + info=10 + int_err(1)=1 + int_err(2)=novr + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + if (novr==0) then + ! + ! Just copy the input. + ! + if (debug) write(0,*) 'Calling desccpy' + call psb_dsccpy(desc_ov,desc_a,info) + if (info.ne.0) then + info=4010 + ch_err='psb_dsccpy' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (debug) write(0,*) 'From desccpy' + return + endif + + call blacs_get(icontxt,10,icomm ) +!!$ call MPI_Comm_rank(icomm,irank,ierr) +!!$ idscb = mpe_log_get_event_number() +!!$ idsce = mpe_log_get_event_number() +!!$ iovrb = mpe_log_get_event_number() +!!$ iovre = mpe_log_get_event_number() +!!$ if (irank==0) then +!!$ info = mpe_describe_state(idscb,idsce,"DSCASB ","NavyBlue") +!!$ info = mpe_describe_state(iovrb,iovre,"DSCOVR ","DeepPink") +!!$ endif + If(debug)Write(0,*)'BEGIN dscov',me,nhalo +!!$ call blacs_barrier(icontxt,'All') + t1 = mpi_wtime() + + + +!!$ ierr = MPE_Log_event( idscb, 0, "st DSCASB" ) + ! + ! Ok, since we are only estimating, do it as follows: + ! LOVR= (NNZ/NROW)*N_HALO*N_OVR This assumes that the local average + ! nonzeros per row is the same as the global. + ! + call psb_spinfo(nztotreq,a,nztot,info) + if (info.ne.0) then + info=4010 + ch_err='spinfo' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (nztot>0) then + lovr = ((nztot+m-1)/m)*nhalo*novr + lworks = ((nztot+m-1)/m)*nhalo + lworkr = ((nztot+m-1)/m)*nhalo + else + info=-1 + call psb_errpush(info,name) + goto 9999 + endif + If(debug)Write(0,*)'ovr_est done',me,novr,lovr + index_dim = size(desc_a%halo_index) + elem_dim = size(desc_a%halo_index) + + allocate(desc_ov%ovrlap_index(novr*(Max(2*index_dim,1)+1)),& + & desc_ov%ovrlap_elem(novr*(Max(elem_dim,1)+3)),& + & desc_ov%matrix_data(10),& + & desc_ov%halo_index(novr*(Size(desc_a%halo_index)+3)),STAT=INFO) + if (info.ne.0) then + info=4000 + call psb_errpush(info,name) + goto 9999 + end if + + l_tmp_ovr_idx=novr*(3*Max(2*index_dim,1)+1) + l_tmp_halo=novr*(3*Size(desc_a%halo_index)) + + desc_ov%ovrlap_index(:) = -1 + desc_ov%ovrlap_elem(:) = -1 + desc_ov%halo_index(:) = -1 + desc_ov%matrix_data(1:10) = desc_a%matrix_data(1:10) + desc_ov%matrix_data(psb_dec_type_) = desc_bld + + Allocate(desc_ov%loc_to_glob(Size(desc_a%loc_to_glob)),& + & desc_ov%glob_to_loc(Size(desc_a%glob_to_loc))) + + desc_ov%loc_to_glob(:) = desc_a%loc_to_glob(:) + desc_ov%glob_to_loc(:) = desc_a%glob_to_loc(:) + If(debug)Write(0,*)'Start descasb',me,lworks,lworkr + call blacs_barrier(icontxt,'All') + + ! + ! The real work goes on in here.... + ! + Call psb_descasb(novr,desc_ov,desc_a,a,& + & l_tmp_halo,l_tmp_ovr_idx,lworks,lworkr,info) + if (info.ne.0) then + info=4010 + ch_err='psb_descasb' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + desc_ov%matrix_data(psb_dec_type_) = desc_asb + If(debug)Write(0,*)'Done descasb',me,lworks,lworkr + call blacs_barrier(icontxt,'All') +!!$ ierr = MPE_Log_event( idsce, 0, "st DSCASB" ) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + Return + +End Subroutine psb_dscov + diff --git a/src/tools/psb_dscren.f90 b/src/tools/psb_dscren.f90 new file mode 100644 index 00000000..446a0433 --- /dev/null +++ b/src/tools/psb_dscren.f90 @@ -0,0 +1,205 @@ +! File: psb_dscren.f90 +! +! Subroutine: psb_dscren +! Updates a communication descriptor according to a renumbering scheme. +! +! Parameters: +! trans - character. Whether iperm or its transpose should be applied. +! iperm - integer,dimension(:). The renumbering scheme. +! desc_a - type(). The communication descriptor to be updated. +! info - integer. Eventually returns an error code. +! +subroutine psb_dscren(trans,iperm,desc_a,info) + use psb_descriptor_type + use psb_const_mod + use psb_error_mod + implicit none + + interface isaperm + logical function isaperm(n,ip) + integer, intent(in) :: n + integer, intent(inout) :: ip(*) + end function isaperm + end interface + + !...parameters.... + type(psb_desc_type), intent(inout) :: desc_a + integer, intent(inout) :: iperm(:) + character, intent(in) :: trans + integer, intent(out) :: info + !....locals.... + integer :: i,j,err,nprow,npcol,me,mypcol, n_col, kh, nh + integer :: dectype + integer :: icontxt,temp(1),n_row, int_err(5), err_act + integer, parameter :: ione=1 + real(kind(1.d0)) :: time(10), mpi_wtime, real_err(6) + external mpi_wtime + logical, parameter :: debug=.false. + character(len=20) :: name, char_err + + info=0 + call psb_erractionsave(err_act) + name = 'psb_dcren' + + time(1) = mpi_wtime() + + icontxt=desc_a%matrix_data(psb_ctxt_) + dectype=desc_a%matrix_data(psb_dec_type_) + n_row = desc_a%matrix_data(psb_n_row_) + n_col = desc_a%matrix_data(psb_n_col_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow.eq.-1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol.ne.1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name,int_err) + goto 9999 + endif + + if (.not.is_asb_dec(dectype)) then + info = 600 + int_err(1) = dectype + call psb_errpush(info,name,int_err) + goto 9999 + endif + + if (iperm(1) /= 0) then + if (.not.isaperm(n_row,iperm)) then + info = 610 + int_err(1) = iperm(1) + call psb_errpush(info,name,int_err) + goto 9999 + endif + endif + + if (debug) write (*, *) ' begin matrix assembly...' + + !check on errors encountered in psdspins + + if ((iperm(1) /= 0)) then + + if (debug) write(0,*) 'spasb: here we go with ',iperm(1) + deallocate(desc_a%lprm) + allocate(desc_a%lprm(n_col)) + if (trans.eq.'n') then + do i=1, n_row + desc_a%lprm(iperm(i)) = i + enddo + do i=n_row+1,n_col + desc_a%lprm(i) = i + enddo + else if (trans.eq.'t') then + do i=1, n_row + desc_a%lprm(i) = iperm(i) + enddo + do i=n_row+1,n_col + desc_a%lprm(i) = i + enddo + endif + ! crossed fingers..... + ! fix glob_to_loc/loc_to_glob mappings, then indices lists + ! hmm, maybe we should just moe all of this onto a different level, + ! have a specialized subroutine, and do it in the solver context???? + if (debug) write(0,*) 'spasb: renumbering glob_to_loc' + do i=1, n_col + desc_a%glob_to_loc(desc_a%loc_to_glob(desc_a%lprm(i))) = i + enddo + if (debug) write(0,*) 'spasb: renumbering loc_to_glob' + do i=1,desc_a%matrix_data(m_) + j = desc_a%glob_to_loc(i) + if (j>0) then + desc_a%loc_to_glob(j) = i + endif + enddo + if (debug) write(0,*) 'spasb: renumbering halo_index' + i=1 + kh=desc_a%halo_index(i) + do while (kh /= -1) + i = i+1 + nh = desc_a%halo_index(i) + do j = i+1, i+nh + desc_a%halo_index(j) = & + &desc_a%lprm(desc_a%halo_index(j)) + enddo + i = i + nh + 1 + nh = desc_a%halo_index(i) + do j= i+1, i+nh + desc_a%halo_index(j) = & + &desc_a%lprm(desc_a%halo_index(j)) + enddo + i = i + nh + 1 + kh=desc_a%halo_index(i) + enddo + if (debug) write(0,*) 'spasb: renumbering ovrlap_index' + i=1 + kh=desc_a%ovrlap_index(i) + do while (kh /= -1) + i = i + 1 + nh = desc_a%ovrlap_index(i) + do j= i+1, i+nh + desc_a%ovrlap_index(j) = & + &desc_a%lprm(desc_a%ovrlap_index(j)) + enddo + i = i + nh + 1 + kh=desc_a%ovrlap_index(i) + enddo + if (debug) write(0,*) 'spasb: renumbering ovrlap_elem' + i = 1 + kh=desc_a%ovrlap_elem(i) + do while (kh /= -1) + desc_a%ovrlap_elem(i) = & + &desc_a%lprm(desc_a%ovrlap_elem(i)) + i = i+2 + kh=desc_a%ovrlap_elem(i) + enddo + if (debug) write(0,*) 'spasb: done renumbering' + if (debug) then + write(60+me,*) 'n_row ',n_row,' n_col',n_col, ' trans: ',trans + do i=1,n_col + write(60+me,*)i, ' lprm ', desc_a%lprm(i), ' iperm',iperm(i) + enddo + i=1 + kh = desc_a%halo_index(i) + do while (kh /= -1) + write(60+me,*) i, kh + i = i+1 + kh = desc_a%halo_index(i) + enddo + close(60+me) + end if + +!!$ iperm(1) = 0 + else +!!$ allocate(desc_a%lprm(1)) +!!$ desc_a%lprm(1) = 0 + endif + + + time(4) = mpi_wtime() + time(4) = time(4) - time(3) + if (debug) then + call dgamx2d(icontxt, all, topdef, ione, ione, time(4),& + & ione,temp ,temp,-ione ,-ione,-ione) + + write (*, *) ' comm structs assembly: ', time(4)*1.d-3 + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_ret) then + return + else + call psb_error(icontxt) + end if + return + +end subroutine psb_dscren diff --git a/src/tools/psb_dscrep.f90 b/src/tools/psb_dscrep.f90 new file mode 100644 index 00000000..7120faf3 --- /dev/null +++ b/src/tools/psb_dscrep.f90 @@ -0,0 +1,205 @@ +! --------------------------------------------------------------------- +! +! -- PSSBLAS routine (version 1.0) -- +! +! --------------------------------------------------------------------- +!/ +subroutine psb_dscrep(m, icontxt, desc_a, info) + + ! Purpose + ! ======= + ! + ! Allocate special descriptor for replicated index space. + ! + ! + ! + ! INPUT + !====== + ! M :(Global Input) Integer + ! Total number of equations + ! required. + ! + ! ICONTXT : (Global Input)Integer BLACS context for an NPx1 grid + ! required. + ! + ! OUTPUT + !========= + ! desc_a : TYPEDESC + ! desc_a OUTPUT FIELDS: + ! + ! MATRIX_DATA : Pointer to integer Array + ! contains some + ! local and global information about matrix: + ! + ! NOTATION STORED IN EXPLANATION + ! ------------ ---------------------- ------------------------------------- + ! DEC_TYPE MATRIX_DATA[DEC_TYPE_] Decomposition type, temporarly is + ! setted to 1( matrix not yet assembled) + ! M MATRIX_DATA[M_] Total number of equations + ! N MATRIX_DATA[N_] Total number of variables + ! N_ROW MATRIX_DATA[N_ROW_] Number of local equations + ! N_COL MATRIX_DATA[N_COL_] Number of local columns (see below) + ! CTXT_A MATRIX_DATA[CTXT_] The BLACS context handle, + ! indicating + ! the global context of the operation + ! on the matrix. + ! The context itself is global. + ! + ! GLOB_TO_LOC Array of dimension equal to number of global + ! rows/cols (MATRIX_DATA[M_]). On exit, + ! for all global indices either: + ! 1. The index belongs to the current process; the entry + ! is set to the next free local row index. + ! 2. The index belongs to process P (0<=P<=NP-1); the entry + ! is set to + ! -(NP+P+1) + ! + ! LOC_TO_GLOB An array of dimension equal to number of local cols N_COL + ! i.e. all columns of the matrix such that there is at least + ! one nonzero entry within the local row range. At the time + ! this routine is called N_COL cannot be know, so we set + ! N_COL=N_ROW, and dimension this vector on N_ROW plus an + ! estimate. On exit the vector elements are set + ! to the index of the corresponding entry in GLOB_TO_LOC, or + ! to -1 for indices I>N_ROW. + ! + ! + ! HALO_INDEX Not touched here, as it depends on the matrix pattern + ! + ! OVRLAP_INDEX On exit from this routine, the overlap indices are stored in + ! triples (Proc, 1, Index), similar to the assembled format + ! but neither optimized, nor deadlock free. + ! List is terminated with -1 + ! + ! OVRLAP_ELEM On exit from this routine, just a list of pairs (index,#p). + ! List is terminated with -1. + ! + ! + ! END OF desc_a OUTPUT FIELDS + ! + ! + + use psb_descriptor_type + use psb_serial_mod + use psb_const_mod + use psb_error_mod + implicit None + !....Parameters... + Integer, intent(in) :: m,icontxt + integer, intent(out) :: info + Type(psb_desc_type), intent(out) :: desc_a + + !locals + Integer :: counter,i,j,nprow,npcol,me,mypcol,& + & loc_row,err,loc_col,nprocs,n,itmpov, k,& + & l_ov_ix,l_ov_el,idx, flag_, err_act + Integer :: INT_ERR(5),TEMP(1),EXCH(2) + Real(Kind(1.d0)) :: REAL_ERR(5) + Integer, Parameter :: IONE=1, ITWO=2,ROOT=0 + Integer, Pointer :: temp_ovrlap(:), ov_idx(:),ov_el(:) + logical, parameter :: debug=.false. + character(len=20) :: name, ch_err + + info=0 + err=0 + name = 'psb_dscrep' + + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (debug) write(*,*) 'psb_dscall: ',nprow,npcol,me,mypcol + ! ....verify blacs grid correctness.. + if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + n = m + !... check m and n parameters.... + if (m < 1) then + info = 10 + int_err(1) = 1 + int_err(2) = m + else if (n < 1) then + info = 10 + int_err(1) = 2 + int_err(2) = n + endif + + if (info /= 0) then + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (debug) write(*,*) 'psb_dscall: doing global checks' + !global check on m and n parameters + if (me.eq.root) then + exch(1)=m + exch(2)=n + call igebs2d(icontxt,all,topdef, itwo,ione, exch, itwo) + else + call igebr2d(icontxt,all,topdef, itwo,ione, exch, itwo, root,& + & 0) + if (exch(1) /= m) then + info=550 + int_err(1)=1 + else if (exch(2) /= n) then + info=550 + int_err(1)=2 + endif + endif + + if (info /= 0) then + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + + call psb_nullify_desc(desc_a) + + + !count local rows number + ! allocate work vector + allocate(desc_a%glob_to_loc(m),desc_a%matrix_data(mdata_size),& + & desc_a%loc_to_glob(m),desc_a%lprm(1),& + & desc_a%ovrlap_index(1),desc_a%ovrlap_elem(1),& + & desc_a%halo_index(1),desc_a%bnd_elem(1),stat=info) + if (info /= 0) then + info=2025 + int_err(1)=m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + do i=1,m + desc_a%glob_to_loc(i) = i + desc_a%loc_to_glob(i) = i + enddo + + desc_a%lprm(:) = 0 + desc_a%halo_index(:) = -1 + desc_a%bnd_elem(:) = -1 + desc_a%ovrlap_index(:) = -1 + desc_a%ovrlap_elem(:) = -1 + + + desc_a%matrix_data(m_) = m + desc_a%matrix_data(n_) = n + desc_a%matrix_data(psb_n_row_) = m + desc_a%matrix_data(psb_n_col_) = n + desc_a%matrix_data(psb_dec_type_) = desc_repl + desc_a%matrix_data(psb_ctxt_) = icontxt + call blacs_get(icontxt,10,desc_a%matrix_data(mpi_c_)) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return + +end subroutine psb_dscrep diff --git a/src/tools/psb_dspalloc.f90 b/src/tools/psb_dspalloc.f90 new file mode 100644 index 00000000..b551b236 --- /dev/null +++ b/src/tools/psb_dspalloc.f90 @@ -0,0 +1,122 @@ +! File: psb_dspalloc.f90 +! +! Subroutine: psb_dspalloc +! Allocate sparse matrix structure for psblas routines. +! +! Parameters: +! a - type(). The sparse matrix to be allocated. +! desc_a - type(). The communication descriptor to be updated. +! info - integer. Eventually returns an error code. +! nnz - integer(optional). The number of nonzeroes in the matrix. +! +subroutine psb_dspalloc(a, desc_a, info, nnz) + + use psb_descriptor_type + use psb_dspmat_type + use psb_serial_mod + use psb_const_mod + use psb_error_mod + implicit none + + !....parameters... + type(psb_desc_type), intent(inout) :: desc_a + type(psb_dspmat_type), intent(out) :: a + integer, intent(out) :: info + integer, optional, intent(in) :: nnz + + !locals + integer :: icontxt, dectype + integer :: nprow,npcol,me,mypcol,loc_row,& + & length_ia1,length_ia2,err,nprocs, err_act,m,n + integer :: int_err(5),temp(1) + real(kind(1.d0)) :: real_err(5) + integer, parameter :: ione=1, itwo=2,root=0 + logical, parameter :: debug=.false. + character(len=20) :: name, ch_err + + info=0 + call psb_erractionsave(err_act) + name = 'psb_dspalloc' + + icontxt = desc_a%matrix_data(psb_ctxt_) + dectype=desc_a%matrix_data(psb_dec_type_) + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) +! ....verify blacs grid correctness.. + if (nprow.eq.-1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol.ne.1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name,int_err) + goto 9999 + endif + + ! + ! hmm, not a good idea, not all compilers can rely on any given + ! value for non initialized pointers. let's avoid this, + ! and just rely on documentation. + ! check if psdalloc is already called for this matrix + + ! set fields in desc_a%matrix_data.... + loc_row = desc_a%matrix_data(psb_n_row_) + m = desc_a%matrix_data(m_) + n = desc_a%matrix_data(n_) + + !...allocate matrix data... + if (present(nnz))then + if (nnz.lt.0) then + info=45 + int_err(1)=7 + int_err(2)=nnz + call psb_errpush(info,name,int_err) + goto 9999 + endif + length_ia1=nnz + length_ia2=nnz + else + length_ia1=max(1,4*loc_row) + length_ia2=max(1,4*loc_row) + endif + + if (debug) write(*,*) 'allocating size:',length_ia1 + + !....allocate aspk, ia1, ia2..... + call psb_spall(loc_row,loc_row,a,length_ia1,info) + if(info.ne.0) then + info=4010 + ch_err='spreall' + call psb_errpush(info,name,int_err) + goto 9999 + end if + + ! set permutation matrices + a%pl(1)=0 + a%pr(1)=0 + ! set infoa fields + a%fida = 'COO' + a%descra = 'GUN' + a%infoa(nnz_) = 0 + a%infoa(srtd_) = 0 + a%infoa(state_) = spmat_bld + + if (debug) write(0,*) 'spall: ', & + &desc_a%matrix_data(psb_dec_type_),desc_bld + desc_a%matrix_data(psb_dec_type_) = desc_bld + return + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_ret) then + return + else + call psb_error(icontxt) + end if + return + +end subroutine psb_dspalloc diff --git a/src/tools/psb_dspasb.f90 b/src/tools/psb_dspasb.f90 new file mode 100644 index 00000000..e06eedd6 --- /dev/null +++ b/src/tools/psb_dspasb.f90 @@ -0,0 +1,258 @@ +! File: psb_dspasb.f90 +! +! Subroutine: psb_dspasb +! Assembly sparse matrix and set psblas communications +! structures. +! +! Parameters: +! a - type(). The sparse matrix to be allocated. +! desc_a - type(). The communication descriptor to be updated. +! info - integer. Eventually returns an error code. +! afmt - character,dimension(5)(optional). The output format. +! up - character(optional). ??? +! dup - integer(optional). ??? +! +subroutine psb_dspasb(a,desc_a, info, afmt, up, dup) + + use psb_descriptor_type + use psb_dspmat_type + use psb_serial_mod + use psb_const_mod + use psi_mod + use psb_error_mod + + implicit none + + interface psb_cest + subroutine psb_cest(afmt, nnz, lia1, lia2, lar, up, info) + integer, intent(in) :: nnz + integer, intent(out) :: lia1, lia2, lar, info + character, intent(in) :: afmt*5, up + end subroutine psb_cest + end interface + + !...Parameters.... + type(psb_dspmat_type), intent (inout) :: a + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + integer,optional, intent(in) :: dup + character, optional, intent(in) :: afmt*5, up + !....Locals.... + integer :: int_err(5) + type(psb_dspmat_type) :: atemp + real(kind(1.d0)) :: real_err(5) + integer :: ia1_size,ia2_size,aspk_size,m,i,err,& + & nprow,npcol,me,mypcol ,size_req,idup,n_col,iout, err_act + integer :: dscstate, spstate, nr,k,j, iupdup + integer :: icontxt,temp(2),isize(2),n_row + character :: iup + integer, parameter :: ione=1 + logical, parameter :: debug=.false., debugwrt=.false. + character(len=20) :: name, ch_err + + info = 0 + int_err(1)=0 + name = 'psb_spasb' + call psb_erractionsave(err_act) + + icontxt = desc_a%matrix_data(psb_ctxt_) + dscstate = desc_a%matrix_data(psb_dec_type_) + n_row = desc_a%matrix_data(psb_n_row_) + n_col = desc_a%matrix_data(psb_n_col_) + + ! check on BLACS grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow.eq.-1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol /= 1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name) + goto 9999 + endif + + if (.not.is_asb_dec(dscstate)) then + info = 600 + int_err(1) = dscstate + call psb_errpush(info,name) + goto 9999 + endif + + if (debug) Write (*, *) ' Begin matrix assembly...' + + !check on errors encountered in psdspins + + spstate = a%infoa(state_) + if (spstate == SPMAT_BLD) then + ! + ! First case: we come from a fresh build. + ! + + n_row = desc_a%matrix_data(psb_n_row_) + n_col = desc_a%matrix_data(psb_n_col_) + + ! + ! Second step: handle the local matrix part. + ! + iupdup = 0 + if (present(up)) then + if(up.eq.'Y') then + iupdup = 4 + iup = up + else if (up /= 'N') then + write(0,*)'Wrong value for update input in ASB...' + write(0,*)'Changing to default' + iup = 'N' + else + iup = 'N' + endif + else + iup = 'N' + endif + + if (present(dup)) then + if((dup.lt.1).or.(dup.gt.3)) then + write(0,*)'Wrong value for duplicate input in ASB...' + write(0,*)'Changing to default' + idup = 1 + else + idup = dup + endif + else + idup = 1 + endif + iupdup = ieor(iupdup,idup) + + + a%infoa(upd_)=iupdup + if (debug) write(0,*)'in ASB',upd_,iupdup + + a%m = n_row + a%k = n_col + + call psb_spclone(a,atemp,info) + if(info /= no_err) then + info=4010 + ch_err='psb_spclone' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + ! convert to user requested format after the temp copy + end if + if (present(afmt)) then + a%fida = afmt + else + a%fida = '???' + endif + + ! + ! work area requested must be fixed to + ! No of Grid'd processes and NNZ+2 + ! + size_req = max(a%infoa(nnz_),1)+3 + if (debug) write(0,*) 'DCSDP : size_req 1:',size_req + call psb_cest(a%fida, size_req, ia1_size, ia2_size, aspk_size, iup,info) + if (info /= no_err) then + info=4010 + ch_err='psb_cest' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + endif + + call psb_spreall(a,ia1_size,ia2_size,aspk_size,info) + if (info /= no_err) then + info=4010 + ch_err='psb_spreall' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + endif + + a%pl(:) = 0 + a%pr(:) = 0 + + if (debugwrt) then + iout = 30+me + open(iout) + call psb_csprt(iout,atemp,head='Input mat') + close(iout) + endif + + ! Do the real conversion into the requested storage formatmode + ! result is put in A + call psb_csdp90(atemp,a,info,ifc=2) + + IF (debug) WRITE (*, *) me,' ASB: From DCSDP',info,' ',A%FIDA + if (info /= no_err) then + info=4010 + ch_err='psb_csdp90' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + endif + + if (debugwrt) then + iout = 60+me + open(iout) + call csprt(iout,a,head='Output mat') + close(iout) + endif + + + else if (spstate == SPMAT_UPD) then + ! + ! Second case: we come from an update loop. + ! + + + ! Right now, almost nothing to be done, but this + ! may change in the future + ! as we revise the implementation of the update routine. + call psb_spall(atemp,1,info) + atemp%m=a%m + atemp%k=a%k + ! check on allocation + if (info /= no_err) then + info=4010 + ch_err='psb_spall' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + endif + + call psb_csdp90(atemp,a,info,check='R') + ! check on error retuned by dcsdp + if (info /= no_err) then + info = 4010 + ch_err='psb_csdp90' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + call psb_spfree(atemp,info) + if (info /= no_err) then + info = 4010 + ch_err='spfree' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + else + + info = 600 + call psb_errpush(info,name) + goto 9999 + if (debug) write(0,*) 'Sparse matrix state:',spstate,spmat_bld,spmat_upd + + endif + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return + +end subroutine psb_dspasb diff --git a/src/tools/psb_dspcnv.f90 b/src/tools/psb_dspcnv.f90 new file mode 100644 index 00000000..d280bfb5 --- /dev/null +++ b/src/tools/psb_dspcnv.f90 @@ -0,0 +1,224 @@ +! File: psb_dspcnv.f90 +! +! Subroutine: psb_dspcnv +! converts sparse matrix a into b +! +! Parameters: +! a - type(). The sparse input matrix. +! b - type(). The sparse output matrix. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! +subroutine psb_dspcnv(a,b,desc_a,info) + + use psb_descriptor_type + use psb_spmat_type + use psb_serial_mod + use psb_const_mod + use psb_error_mod + implicit none + interface dcsdp + + subroutine dcsdp(check,trans,m,n,unitd,d,& + & fida,descra,a,ia1,ia2,infoa,& + & pl,fidh,descrh,h,ih1,ih2,infoh,pr,lh,lh1,lh2,& + & work,lwork,ierror) + integer, intent(in) :: lh, lwork, lh1, lh2, m, n + integer, intent(out) :: ierror + character, intent(in) :: check, trans, unitd + real(kind(1.d0)), intent(in) :: d(*), a(*) + real(kind(1.d0)), intent(out) :: h(*) + real(kind(1.d0)), intent(inout) :: work(*) + integer, intent(in) :: ia1(*), ia2(*), infoa(*) + integer, intent(out) :: ih1(*), ih2(*), pl(*),pr(*), infoh(*) + character, intent(in) :: fida*5, descra*11 + character, intent(out) :: fidh*5, descrh*11 + end subroutine dcsdp + end interface + + + interface dcsrp + + subroutine dcsrp(trans,m,n,fida,descra,ia1,ia2,& + & infoa,p,work,lwork,ierror) + integer, intent(in) :: m, n, lwork + integer, intent(out) :: ierror + character, intent(in) :: trans + real(kind(1.d0)), intent(inout) :: work(*) + integer, intent(in) :: p(*) + integer, intent(inout) :: ia1(*), ia2(*), infoa(*) + character, intent(in) :: fida*5, descra*11 + end subroutine dcsrp + end interface + + interface dcsprt + subroutine dcsprt(m,n,fida,descra,a,ia1,ia2,infoa ,iout,ierror) + integer, intent(in) :: iout,m, n + integer, intent(out) :: ierror + real(kind(1.d0)), intent(in) :: a(*) + integer, intent(in) :: ia1(*), ia2(*), infoa(*) + character, intent(in) :: fida*5, descra*11 + end subroutine dcsprt + end interface + + !...parameters.... + type(psb_dspmat_type), intent(in) :: a + type(psb_dspmat_type), intent(out) :: b + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + !....locals.... + integer :: int_err(5) + integer,pointer :: ovrlap_elem(:),ovrlap_index(:) + real(kind(1.d0)) :: d(1) + integer,pointer :: i_temp(:) + real(kind(1.d0)),pointer :: work_dcsdp(:) + integer :: ia1_size,ia2_size,aspk_size,err_act& + & ,i,err,nprow,npcol,me,mypcol,n_col,l_dcsdp, iout, nrow + integer :: lwork_dcsdp,dectype + integer :: icontxt,temp(1),n_row + character :: check*1, trans*1, unitd*1 + integer, parameter :: ione=1 + + real(kind(1.d0)) :: time(10), mpi_wtime + external mpi_wtime + logical, parameter :: debug=.false. + character(len=20) :: name, ch_err + + info=0 + name = 'psb_dspcnv' + call psb_erractionsave(err_act) + + time(1) = mpi_wtime() + + + icontxt = desc_a%matrix_data(psb_ctxt_) + dectype = desc_a%matrix_data(psb_dec_type_) + n_row = desc_a%matrix_data(psb_n_row_) + n_col = desc_a%matrix_data(psb_n_col_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow.eq.-1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol.ne.1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + if (.not.is_ok_dec((dectype))) then + info = 600 + int_err(1) = dectype + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + if (debug) write (0, *) name,' begin matrix assembly...' + + ia1_size = size(a%ia1) + ia2_size = size(a%ia2) + aspk_size = size(a%aspk) + + if (debug) write (0, *) name,' sizes',ia1_size,ia2_size,aspk_size + + ! convert only without check + check='N' + trans='N' + unitd='U' + + ! l_dcsdp is the size requested for dcsdp procedure + l_dcsdp=(ia1_size+100) + + b%m=nrow + b%k=n_col + call psb_spall(b,ia1_size,ia2_size,aspk_size,info) + allocate(work_dcsdp(l_dcsdp),stat=info) + if (info.ne.0) then + info=2025 + int_err(1)=l_dcsdp + call psb_errpush(info, name, i_err=int_err) + goto 9999 + endif + + lwork_dcsdp=size(work_dcsdp) + ! set infoa(1) to nnzero + b%pl(:) = 0 + b%pr(:) = 0 + + if (debug) write (0, *) name,' calling dcsdp',lwork_dcsdp,& + &size(work_dcsdp) + ! convert aspk,ia1,ia2 in requested representation mode + if (debug) then + + endif + ! result is put in b + call dcsdp(check,trans,n_row,n_col,unitd,d,a%fida,a%descra,& + & a%aspk,a%ia1,a%ia2,a%infoa,& + & b%pl,b%fida,b%descra,b%aspk,b%ia1,b%ia2,b%infoa,b%pr,& + & size(b%aspk),size(b%ia1),size(b%ia2),& + & work_dcsdp,size(work_dcsdp),info) + + if(info.ne.no_err) then + info=4010 + ch_err='spclone' + call psb_errpush(info, name, a_err=ch_err) + goto 9999 + end if + + ! + ! hmmm, have to fix b%pl and b%pr according to a%pl and a%pr!!! + ! should work (crossed fingers :-) + if (a%pr(1).ne.0) then + if (b%pr(1).ne.0) then + allocate(i_temp(n_col)) + do i=1, n_col + i_temp(i) = b%pr(a%pr(i)) + enddo + deallocate(b%pr) + b%pr => i_temp + else + allocate(i_temp(n_col)) + do i=1, n_col + i_temp(i) = a%pr(i) + enddo + deallocate(b%pr) + b%pr => i_temp + endif + endif + if (a%pl(1).ne.0) then + if (b%pr(1).ne.0) then + allocate(i_temp(n_row)) + do i=1, n_row + i_temp(i) = a%pl(b%pl(i)) + enddo + deallocate(b%pl) + b%pl => i_temp + else + allocate(i_temp(n_row)) + do i=1, n_row + i_temp(i) = a%pl(i) + enddo + deallocate(b%pl) + b%pl => i_temp + endif + endif + + + if (debug) write (0, *) me,name,' from dcsdp ',& + &b%fida,' pl ', b%pl(:),'pr',b%pr(:) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return + +end subroutine psb_dspcnv diff --git a/src/tools/psb_dspfree.f90 b/src/tools/psb_dspfree.f90 new file mode 100644 index 00000000..44b51a09 --- /dev/null +++ b/src/tools/psb_dspfree.f90 @@ -0,0 +1,94 @@ +! File: psb_dspfree.f90 +! +! Subroutine: psb_dspfree +! Frees a sparse matrix structure. +! +! Parameters: +! a - type(). The sparse matrix to be freed. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! +subroutine psb_dspfree(a, desc_a,info) + !...free sparse matrix structure... + use psb_descriptor_type + use psb_spmat_type + use psb_serial_mod + use psb_const_mod + use psb_error_mod + implicit none + + !....parameters... + type(psb_desc_type), intent(in) :: desc_a + type(psb_dspmat_type), intent(inout) ::a + integer, intent(out) :: info + !...locals.... + integer :: int_err(5) + integer :: temp(1) + real(kind(1.d0)) :: real_err(5) + integer :: icontxt,nprow,npcol,me,mypcol,err, err_act + integer,parameter :: ione=1 + character(len=20) :: name, ch_err + + info=0 + name = 'psb_dspfree' + call psb_erractionsave(err_act) + + if (.not.associated(desc_a%matrix_data)) then + info=295 + call psb_errpush(info,name) + return + else + icontxt=desc_a%matrix_data(psb_ctxt_) + end if + + !...deallocate a.... + + if ((info.eq.0).and.(.not.associated(a%pr))) info=2951 + if (info.eq.0) then + !deallocate pr field + deallocate(a%pr,stat=info) + if (info.ne.0) info=2045 + end if + if ((info.eq.0).and.(.not.associated(a%pl))) info=2952 + !deallocate pl field + if (info.eq.0) then + deallocate(a%pl,stat=info) + if (info.ne.0) info=2046 + end if + if ((info.eq.0).and.(.not.associated(a%ia2))) info=2953 + if (info.eq.0) then + !deallocate ia2 field + deallocate(a%ia2,stat=info) + if (info.ne.0) info=2047 + end if + if ((info.eq.0).and.(.not.associated(a%ia1))) info=2954 + if (info.eq.0) then + !deallocate ia1 field + deallocate(a%ia1,stat=info) + if (info.ne.0) info=2048 + endif + if ((info.eq.0).and.(.not.associated(a%aspk))) info=2955 + if (info.eq.0) then + !deallocate aspk field + deallocate(a%aspk,stat=info) + if (info.ne.0) info=2049 + endif + if (info.eq.0) call psb_nullify_sp(a) + + if(info.ne.0) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return + +end subroutine psb_dspfree diff --git a/src/tools/psb_dspins.f90 b/src/tools/psb_dspins.f90 new file mode 100644 index 00000000..3571c076 --- /dev/null +++ b/src/tools/psb_dspins.f90 @@ -0,0 +1,154 @@ +! File: psb_dspins.f90 +! +! Subroutine: psb_dspins +! Takes a cloud of points and inserts them into a sparse matrix. +! +! Parameters: +! nz - integer. The number of points to insert. +! ia - integer,dimension(:). The row indices of the points. +! ja - integer,dimension(:). The column indices of the points. +! val - real,dimension(:). The values of the points to be inserted. +! a - type(). The sparse destination matrix. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! is - integer(optional). The row offset. +! js - integer(optional). The column offset. +! +subroutine psb_dspins(nz,ia,ja,val,a,desc_a,info,is,js) + + use psb_descriptor_type + use psb_spmat_type + use psb_serial_mod + use psb_const_mod + use psb_error_mod + implicit none + + !....parameters... + type(psb_desc_type), intent(inout) :: desc_a + type(psb_dspmat_type), intent(inout) :: a + integer, intent(in) :: nz,ia(:),ja(:) + real(kind(1.d0)), intent(in) :: val(:) + integer, intent(out) :: info + integer, intent(in), optional :: is,js + + !locals..... + + integer :: i,icontxt,nprocs ,glob_row,row,k,start_row,end_row,& + & first_loc_row,nrow,j, err,locix,locjx,err_act,& + & dectype,mglob, nnza,m,n, pnt_halo,ncol, nh, ip, spstate + integer,pointer :: tia1(:),tia2(:), temp(:) + integer :: nprow,npcol, me ,mypcol, iflag, isize, irlc + logical, parameter :: debug=.false. + integer, parameter :: relocsz=200 + + interface psb_dscins + subroutine psb_dscins(nz,ia,ja,desc_a,info,is,js) + use typedesc + implicit none + type(desc_type), intent(inout) :: desc_a + integer, intent(in) :: nz,ia(:),ja(:) + integer, intent(out) :: info + integer, intent(in), optional :: is,js + end subroutine psb_dscins + end interface + + character(len=20) :: name, ch_err + + info = 0 + name = 'psb_dspins' + call psb_erractionsave(err_act) + + + icontxt = desc_a%matrix_data(psb_ctxt_) + dectype = desc_a%matrix_data(psb_dec_type_) + mglob = desc_a%matrix_data(m_) + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (npcol.ne.1) then + info = 2030 + call psb_errpush(info,name) + goto 9999 + endif + if (.not.is_ok_dec(dectype)) then + info = 3110 + call psb_errpush(info,name) + goto 9999 + endif + + if (nz <= 0) then + info = 1111 + call psb_errpush(info,name) + goto 9999 + end if + if (size(ia) < nz) then + info = 1111 + call psb_errpush(info,name) + goto 9999 + end if + + if (size(ja) < nz) then + info = 1111 + call psb_errpush(info,name) + goto 9999 + end if + if (size(val) < nz) then + info = 1111 + call psb_errpush(info,name) + goto 9999 + end if + + + spstate = a%infoa(state_) + if (is_bld_dec(dectype)) then + call psb_dscins(nz,ia,ja,desc_a,info) + if (info /= 0) then + info=4010 + ch_err='psb_dscins' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + nrow = desc_a%matrix_data(psb_n_row_) + ncol = desc_a%matrix_data(psb_n_col_) + + if (spstate == spmat_bld) then + call psb_coins(nz,ia,ja,val,a,desc_a%glob_to_loc,1,nrow,1,ncol,info) + if (info /= 0) then + info=4010 + ch_err='psb_coins' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + else + info = 1123 + call psb_errpush(info,name) + goto 9999 + end if + else if (is_asb_dec(dectype)) then + nrow = desc_a%matrix_data(psb_n_row_) + ncol = desc_a%matrix_data(psb_n_col_) + call psb_coins(nz,ia,ja,val,a,desc_a%glob_to_loc,1,nrow,1,ncol,info) + if (info /= 0) then + info=4010 + ch_err='psb_coins' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + else + info = 1122 + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return + +end subroutine psb_dspins + diff --git a/src/tools/psb_dsprn.f90 b/src/tools/psb_dsprn.f90 new file mode 100644 index 00000000..90f23e79 --- /dev/null +++ b/src/tools/psb_dsprn.f90 @@ -0,0 +1,97 @@ +! File: psb_dsprn.f90 +! +! Subroutine: psb_dsprn +! Reinit sparse matrix structure for psblas routines. +! +! Parameters: +! a - type(). The sparse matrix to be reinitiated. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! +Subroutine psb_dsprn(a, desc_a,info) + + use psb_descriptor_type + use psb_spmat_type + use psb_serial_mod + use psb_const_mod + use psb_error_mod + Implicit None + + !....Parameters... + Type(psb_desc_type), intent(in) :: desc_a + Type(psb_dspmat_type), intent(inout) :: a + integer, intent(out) :: info + + !locals + Integer :: icontxt + Integer :: nprow,npcol,me,mypcol,err,err_act + integer, parameter :: ione=1, itwo=2,root=0 + logical, parameter :: debug=.false. + integer :: int_err(5) + real(kind(1.d0)) :: real_err(5) + character(len=20) :: name, ch_err + + info = 0 + err = 0 + int_err(1)=0 + name = 'psb_dsprn' + call psb_erractionsave(err_act) + + icontxt = desc_a%matrix_data(psb_ctxt_) + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (debug) & + &write(*,*) 'starting spalloc ',icontxt,nprow,npcol,me + + ! ....verify blacs grid correctness.. + if (npcol.ne.1) then + info = 2030 + call psb_errpush(info,name) + goto 9999 + endif + + if (debug) & + &write(*,*) 'got through igamx2d ' + + + if (.not.is_asb_dec(desc_a%matrix_data(psb_dec_type_))) then + info=590 + call psb_errpush(info,name) + goto 9999 + endif + + if (a%infoa(state_) == spmat_asb) then + + a%aspk(:) = 0.0 + if (ibits(a%infoa(upd_),2,1)==1) then + if(a%fida(1:3).eq.'JAD') then + a%ia1(a%infoa(upd_pnt_)+nnz_) = 0 + else + a%ia2(a%infoa(upd_pnt_)+nnz_) = 0 + endif + endif + a%infoa(state_) = spmat_upd + else if (a%infoa(state_) == spmat_bld) then + ! in this case do nothing. this allows sprn to be called + ! right after allocate, with spins doing the right thing. + ! hopefully :-) + else if (a%infoa(state_) == spmat_upd) then + + else + info=591 + call psb_errpush(info,name) + endif + + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return + +end subroutine psb_dsprn diff --git a/src/tools/psb_dspupdate.f90 b/src/tools/psb_dspupdate.f90 new file mode 100644 index 00000000..9437b593 --- /dev/null +++ b/src/tools/psb_dspupdate.f90 @@ -0,0 +1,240 @@ +! File: psb_dspupdate.f90 +! +! Subroutine: psb_dspupdate +! Updates a sparse matrix. +! +! Parameters: +! a - type(). +! ia - integer, dimension(:). +! ja - integer, dimension(:). +! blck - type(). +! desc_a - type(). +! info - integer. +! ix - integer(optional). +! jx - integer(optional). +! updflag - integer(optional). +! +subroutine psb_dspupdate(a, ia, ja, blck, desc_a,info,ix,jx,updflag) + + use psb_descriptor_type + use psb_spmat_type + use psbserial_mod + use psb_error_mod + implicit none + + !....parameters... + type(psb_desc_type), intent(in) :: desc_a + type(psb_dspmat_type), intent(inout) :: a + integer, intent(in) :: ia,ja + type(psb_dspmat_type), intent(in) :: blck + integer, intent(out) :: info + integer, optional, intent(in) :: ix,jx + integer, optional, intent(in) :: updflag + + !locals..... + + + interface + subroutine dcsupd(m,n,fida,descra,a,ia1,ia2,infoa,ia,ja,& + & fidh,descrh,h,ih1,ih2,infoh,ih,jh,& + & flag,glob_to_loc,iwork,liwork,ierror) + implicit none + ! .. scalar arguments .. + integer, intent(in) :: m, n, liwork,ia,ja,ih,jh, flag + integer, intent(out) :: ierror + ! .. array arguments .. + double precision, intent(in) :: h(*) + double precision, intent(inout) :: a(*) + integer, intent(in) :: ih1(*), ih2(*), infoh(10), glob_to_loc(*) + integer, intent(inout) :: ia1(*), ia2(*), infoa(10), iwork(*) + character, intent(in) :: fida*5, fidh*5,descra*11, descrh*11 + + end subroutine dcsupd + end interface + + integer :: icontxt,i,loc_row,prec_loc_row ,glob_row,row,& + & k ,start_row,end_row,first_loc_row,n_row,j,int_err(5),& + &locix,locjx,allocated_prcv, dectype, flag,err_act,err + integer,pointer :: prcv(:),gtl(:), ltg(:) + integer :: nprow,npcol, me ,mypcol, lr, lc, nrow,ncol + integer :: m,n, iupdflag + integer,pointer :: iworkaux(:) + character(len=20) :: name, ch_err + + info=0 + name='psb_dspupdate' + call psb_erractionsave(err_act) + + if (present(ix)) then + locix=ix + else + locix=1 + endif + + if (present(updflag)) then + iupdflag = updflag + else + iupdflag = upd_glb + endif + + if (present(jx)) then + locjx=jx + else + locjx=1 + endif + icontxt=desc_a%matrix_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow.eq.-1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol.ne.1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name,int_err) + goto 9999 + endif + + gtl => desc_a%glob_to_loc + ltg => desc_a%loc_to_glob + nrow = desc_a%matrix_data(psb_n_row_) + ncol = desc_a%matrix_data(psb_n_col_) + dectype = desc_a%matrix_data(psb_dec_type_) + ! check if a is already allocated (called psdalloc) + if (.not.is_upd_dec(dectype)) then + info = 290 + int_err(1) = dectype + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + allocate(prcv(nprow),iworkaux(3*ncol+4),stat=info) + if (info.ne.0) then + info = 2023 + int_err(1) = max(1,nprow,3*ncol+4) + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + flag = 2 + + m = blck%m + n = blck%k + + if (iupdflag == upd_glb) then + + row = ia + i = 1 + blckr: do while (i.le.m) + !loop over all blck's rows + + ! row actual block row + row = locix+i-1 + glob_row = ia+i-1 + + lr = gtl(glob_row) + + if ((1 <= lr) .and. (lr <= nrow)) then + ! at least one row belongs to me + + start_row=row + do + ! loop until actual row belong to me + ! and all actual row to insert are ordered + + prec_loc_row=loc_row + + ! --if loc_row is != -1 is already assigned + ! local index to globrow whith value loc_row + ! --if loc:row == -1 it isn't assigned local row to + ! glob_row + loc_row=gtl(glob_row) + if (start_row.eq.i) first_loc_row=loc_row + ! next blck's row + i=i+1 + if (i.le.m) then + row=locix+i-1 + glob_row=ia+i-1 + k = gtl(glob_row) + if ((.not.((1 <= lr) .and. (lr <= nrow)))& + & .or.((prec_loc_row+1.ne.loc_row).and.& + & (start_row+1.ne.i)).or.(i.gt.m)) exit + else + exit + endif + enddo + + end_row=i-1 + ! insert blck submatrix + call dcsupd(end_row-start_row+1,n,a%fida,a%descra,a%aspk,& + & a%ia1,a%ia2,a%infoa,first_loc_row, ja, blck%fida ,& + & blck%descra,blck%aspk,blck& + & %ia1,blck%ia2,blck%infoa,start_row, locjx, flag,& + & desc_a%glob_to_loc,& + & iworkaux, size(iworkaux),info) + if (info.ne.0) exit blckr + endif + ! next blck's row + i=i+1 + enddo blckr + + if (info.ne.0) then + info = 4010 + ch_err='dcsupd' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + endif + + + else if (iupdflag == upd_loc) then + + ! insert blck submatrix + call dcsupd(m,n,a%fida,a%descra,a%aspk,& + & a%ia1,a%ia2,a%infoa,ia, ja, blck%fida ,& + & blck%descra,blck%aspk,blck& + & %ia1,blck%ia2,blck%infoa,locix,locjx, flag,& + & desc_a%glob_to_loc,& + & iworkaux, size(iworkaux),info) + + if (info.ne.0) then + info = 4010 + ch_err='dcsupd' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + endif + else + ! fix next error code + info = 999 + call psb_errpush(info,name) + goto 9999 + endif + deallocate(prcv,iworkaux,stat=info) + if (info.ne.0) then + info=2040 + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_ret) then + return + else + call psb_error(icontxt) + end if + return + +end subroutine psb_dspupdate + + + + + + + + diff --git a/src/tools/psb_glob_to_loc.f90 b/src/tools/psb_glob_to_loc.f90 new file mode 100644 index 00000000..5b24057e --- /dev/null +++ b/src/tools/psb_glob_to_loc.f90 @@ -0,0 +1,203 @@ +! File: psb_glob_to_loc.f90 +! +! Subroutine: psb_glob_to_loc2 +! Performs global to local indexes translation +! +! Parameters: +! x - integer, dimension(:). Array containing the indices to be translated. +! y - integer, dimension(:). Array containing the indices to be translated. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! iact - integer(optional). A character defining the behaviour of this subroutine when is found an index not belonging to the calling process +! +subroutine psb_glob_to_loc2(x,y,desc_a,info,iact) + + use psb_descriptor_type + use psb_const_mod + use psb_error_mod + implicit none + + !...parameters.... + type(desc_type), intent(in) :: desc_a + integer, intent(in) :: x(:) + integer, intent(out) :: y(:), info + character, intent(in), optional :: iact + + !....locals.... + integer :: err, n, i, tmp, icontxt + character :: strings, act + integer :: int_err(5), err_act + real(kind(1.d0)) :: real_val + integer, parameter :: zero=0 + character(len=20) :: name, char_err + + info=0 + name = 'glob_to_loc' + call psb_erractionsave(err_act) + + if (present(iact)) then + act=iact + else + act='A' + endif + + int_err=0 + real_val = 0.d0 + + n=size(x) + do i=1,n + if ((x(i).gt.desc_a%matrix_data(m_)).or.& + & (x(i).le.zero)) then + if(act.eq.'I') then + y(i)=-3*desc_a%matrix_data(m_) + else + info=140 + int_err(1)=x(i) + int_err(2)=desc_a%matrix_data(m_) + exit + end if + else + tmp=desc_a%glob_to_loc(x(i)) + if((tmp.gt.zero).or.(tmp.le.desc_a%matrix_data(psb_n_col_))) then + y(i)=tmp + else if (tmp.le.zero) then + info = 150 + int_err(1)=tmp + exit + else if (tmp.gt.desc_a%matrix_data(psb_n_col_)) then + info = 140 + int_err(1)=tmp + int_err(2)=desc_a%matrix_data(psb_n_col_) + exit + end if + end if + enddo + + if (info.ne.0) then + select case(act) + case('E','I') + call psb_erractionrestore(err_act) + return + case('W') + write(0,'("Error ",i5," in subroutine glob_to_loc")') info + case('A') + call psb_errpush(info,name) + goto 9999 + end select + endif + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_ret) then + return + else + call psb_error() + end if + return + + +end subroutine psb_glob_to_loc2 + + +! Subroutine: psb_glob_to_loc +! Performs global to local indexes translation +! +! Parameters: +! x - integer, dimension(:). Array containing the indices to be translated. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! iact - integer(optional). A character defining the behaviour of this subroutine when is found an index not belonging to the calling process +! +subroutine psb_glob_to_loc(x,desc_a,info,iact) + + use psb_descriptor_type + use psb_const_mod + use psb_error_mod + implicit none + + !...parameters.... + type(desc_type), intent(in) :: desc_a + integer, intent(inout) :: x(:) + integer, intent(out) :: info + character, intent(in), optional :: iact + + !....locals.... + integer :: n, i, tmp, icontxt, err + character :: act + integer :: int_err(5), err_act + real(kind(1.d0)) :: real_val + integer, parameter :: zero=0 + character(len=20) :: name, char_err + + info=0 + name = 'glob_to_loc' + call psb_erractionsave(err_act) + + if (present(iact)) then + act=iact + else + act='A' + endif + + real_val = 0.d0 + n=size(x) + do i=1,n + if ((x(i).gt.desc_a%matrix_data(m_)).or.& + & (x(i).le.zero)) then + if(act.eq.'I') then + x(i)=-3*desc_a%matrix_data(m_) + else + info=140 + int_err(1)=x(i) + int_err(2)=desc_a%matrix_data(m_) + exit + end if + else + tmp=desc_a%glob_to_loc(x(i)) + if((tmp.gt.zero).or.(tmp.le.desc_a%matrix_data(psb_n_col_))) then + x(i)=tmp + else if (tmp.le.zero) then + info = 150 + int_err(1)=tmp + exit + else if (tmp.ge.desc_a%matrix_data(psb_n_col_)) then + info = 140 + int_err(1)=tmp + int_err(2)=desc_a%matrix_data(psb_n_col_) + exit + end if + end if + enddo + + if (info.ne.0) then + select case(act) + case('E','I') + call psb_erractionrestore(err_act) + return + case('W') + write(0,'("Error ",i5," in subroutine glob_to_loc")') info + case('A') + call psb_errpush(info,name) + goto 9999 + end select + endif + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_ret) then + return + else + call psb_error() + end if + return + +end subroutine psb_glob_to_loc + diff --git a/src/tools/psb_ialloc.f90 b/src/tools/psb_ialloc.f90 new file mode 100644 index 00000000..0f42e216 --- /dev/null +++ b/src/tools/psb_ialloc.f90 @@ -0,0 +1,281 @@ +! File: psb_ialloc.f90 +! +! Function: psb_ialloc +! Allocates dense integer matrix for PSBLAS routines +! +! Parameters: +! m - integer. The number of rows. +! n - integer. The number of columns. +! x - integer,dimension(:,:). The matrix to be allocated. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code +! js - integer(optional). The starting column +subroutine psb_ialloc(m, n, x, desc_a, info,js) + !....allocate dense matrix for psblas routines..... + use psb_descriptor_type + use psb_const_mod + use psb_error_mod + implicit none + + !....parameters... + integer, intent(in) :: m,n + integer, pointer :: x(:,:) + type(psb_desc_type), intent(inout) :: desc_a + integer, intent(out) :: info + integer, optional, intent(in) :: js + + !locals + integer :: j,nprow,npcol,me,mypcol,& + & n_col,n_row, err_act + integer :: icontxt,dectype + integer :: int_err(5),temp(1),exch(3) + real(kind(1.d0)) :: real_err(5) + integer, parameter :: ione=1, itwo=2, ithree=3,root=0 + character(len=20) :: name, char_err + + info=0 + name='psb_ialloc' + call psb_erractionsave(err_act) + + icontxt=desc_a%matrix_data(psb_ctxt_) + + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + ! ....verify blacs grid correctness.. + if (nprow.eq.-1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol.ne.1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name,int_err) + goto 9999 + endif + + dectype=desc_a%matrix_data(psb_dec_type_) + !... check m and n parameters.... + if (m.lt.0) then + info = 10 + int_err(1) = 1 + int_err(2) = m + call psb_errpush(info,name,int_err) + goto 9999 + else if (n.lt.0) then + info = 10 + int_err(1) = 2 + int_err(2) = n + call psb_errpush(info,name,int_err) + goto 9999 + else if (.not.is_ok_dec(dectype)) then + info = 3110 + call psb_errpush(info,name) + goto 9999 + else if (m.ne.desc_a%matrix_data(n_)) then + info = 300 + int_err(1) = 1 + int_err(2) = m + int_err(3) = 4 + int_err(4) = n_ + int_err(5) = desc_a%matrix_data(n_) + call psb_errpush(info,name,int_err) + goto 9999 + endif + + if (present(js)) then + j=js + else + j=1 + endif + !global check on m and n parameters + if (me.eq.root) then + exch(1)=m + exch(2)=n + exch(3)=j + call igebs2d(icontxt,all,topdef, ithree,ione, exch, ithree) + else + call igebr2d(icontxt,all,topdef, ithree,ione, exch, ithree, root, 0) + if (exch(1).ne.m) then + info=550 + int_err(1)=1 + call psb_errpush(info,name,int_err) + goto 9999 + else if (exch(2).ne.n) then + info=550 + int_err(1)=2 + call psb_errpush(info,name,int_err) + goto 9999 + else if (exch(3).ne.j) then + info=550 + int_err(1)=3 + call psb_errpush(info,name,int_err) + goto 9999 + endif + endif + + !....allocate x ..... + if (is_asb_dec(dectype).or.is_upd_dec(dectype)) then + n_col = max(1,desc_a%matrix_data(psb_n_col_)) + allocate(x(n_col,j:j+n-1),stat=info) + if (info.ne.0) then + info=2025 + int_err(1)=n_col + call psb_errpush(info,name,int_err) + goto 9999 + endif + else if (is_bld_dec(dectype)) then + n_row = max(1,desc_a%matrix_data(psb_n_row_)) + allocate(x(n_row,j:j+n-1),stat=info) + if (info.ne.0) then + info=2025 + int_err(1)=n_row + call psb_errpush(info,name,int_err) + goto 9999 + endif + endif + + x = 0 + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_ret) then + return + else + call psb_error(icontxt) + end if + return + +end subroutine psb_ialloc + + + +! Function: psb_iallocv +! Allocates dense matrix for PSBLAS routines +! +! Parameters: +! m - integer. The number of rows. +! x - integer,dimension(:). The matrix to be allocated. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code +subroutine psb_iallocv(m, x, desc_a, info) + !....allocate sparse matrix structure for psblas routines..... + use psb_descriptor_type + use psb_const_mod + use psb_error_mod + implicit none + + !....parameters... + integer, intent(in) :: m + integer, pointer :: x(:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + + !locals + integer :: nprow,npcol,me,mypcol,err,n_col,n_row,dectype,err_act + integer :: icontxt + integer :: int_err(5),temp(1),exch(2) + real(kind(1.d0)) :: real_err(5) + integer, parameter :: ione=1, itwo=2,root=0 + logical, parameter :: debug=.false. + character(len=20) :: name, char_err + + info=0 + name='psb_iallocv' + call psb_erractionsave(err_act) + + icontxt=desc_a%matrix_data(psb_ctxt_) + + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + ! ....verify blacs grid correctness.. + if (nprow.eq.-1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol.ne.1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name,int_err) + goto 9999 + endif + + dectype=desc_a%matrix_data(psb_dec_type_) + if (debug) write(0,*) 'dall: dectype',dectype + if (debug) write(0,*) 'dall: is_ok? dectype',is_ok_dec(dectype) + !... check m and n parameters.... + if (m.lt.0) then + info = 10 + int_err(1) = 1 + int_err(2) = m + call psb_errpush(info,name,int_err) + goto 9999 + else if (.not.is_ok_dec(dectype)) then + info = 3110 + call psb_errpush(info,name) + goto 9999 + else if (m.ne.desc_a%matrix_data(n_)) then + info = 300 + int_err(1) = 1 + int_err(2) = m + int_err(3) = 4 + int_err(4) = n_ + int_err(5) = desc_a%matrix_data(n_) + call psb_errpush(info,name,int_err) + goto 9999 + endif + + !global check on m and n parameters + if (me.eq.root) then + exch(1) = m + call igebs2d(icontxt,all,topdef, ione,ione, exch, ione) + else + call igebr2d(icontxt,all,topdef, ione,ione, exch, ione, root, 0) + if (exch(1) .ne. m) then + info = 550 + int_err(1) = 1 + call psb_errpush(info,name,int_err) + goto 9999 + endif + endif + + + !....allocate x ..... + if (is_asb_dec(dectype).or.is_upd_dec(dectype)) then + n_col = max(1,desc_a%matrix_data(psb_n_col_)) + allocate(x(n_col),stat=info) + if (info.ne.0) then + info=2025 + int_err(1)=n_col + call psb_errpush(info,name,int_err) + goto 9999 + endif + else if (is_bld_dec(dectype)) then + n_row = max(1,desc_a%matrix_data(psb_n_row_)) + allocate(x(n_row),stat=info) + if (info.ne.0) then + info=2025 + int_err(1)=n_row + call psb_errpush(info,name,int_err) + goto 9999 + endif + endif + + x = 0 + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_ret) then + return + else + call psb_error(icontxt) + end if + return + +end subroutine psb_iallocv + diff --git a/src/tools/psb_iasb.f90 b/src/tools/psb_iasb.f90 new file mode 100644 index 00000000..07553c13 --- /dev/null +++ b/src/tools/psb_iasb.f90 @@ -0,0 +1,179 @@ +! File: psb_iasb.f90 +! +! Subroutine: psb_iasb +! Assembles a dense matrix for PSBLAS routines +! +! Parameters: +! x - integer,pointer,dimension(:,:). The matrix to be assembled. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code +subroutine psb_iasb(x, desc_a, info) + !....assembly dense matrix x ..... + use psb_descriptor_type + use psb_const_mod + use psb_psblas_mod + use psb_error_mod + implicit none + + type(psb_desc_type), intent(in) :: desc_a + integer, pointer :: x(:,:) + integer, intent(out) :: info + + ! local variables + integer :: icontxt,nprow,npcol,me,mypcol,temp,lwork,nrow,ncol,err_act + integer, pointer :: itemp(:,:) + integer :: int_err(5), i1sz, i2sz, dectype, i + real(kind(1.d0)) :: real_err(5) + integer, parameter :: ione=1 + real(kind(1.d0)),parameter :: one=1 + logical, parameter :: debug=.false. + character(len=20) :: name, char_err + + info=0 + name='psb_iasb' + call psb_erractionsave(err_act) + + if ((.not.associated(desc_a%matrix_data))) then + info=3110 + call psb_errpush(info,name) + return + endif + + icontxt=desc_a%matrix_data(psb_ctxt_) + dectype=desc_a%matrix_data(psb_dec_type_) + + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow.eq.-1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol.ne.1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name,int_err) + goto 9999 + endif + + ! check size + icontxt=desc_a%matrix_data(psb_ctxt_) + nrow=desc_a%matrix_data(psb_n_row_) + ncol=desc_a%matrix_data(psb_n_col_) + i1sz = size(x,dim=1) + i2sz = size(x,dim=2) + if (debug) write(*,*) 'asb: ',i1sz,i2sz,nrow,ncol + if (i1sz.lt.ncol) then + allocate(itemp(ncol,i2sz),stat=info) + if (info.ne.0) then + info=2025 + int_err(1)=ncol + call psb_errpush(info,name,int_err) + goto 9999 + endif + itemp(nrow+1:,:) = 0 + itemp(1:nrow,:) = x(1:nrow,:) + deallocate(x) + x => itemp + endif + + ! ..update halo elements.. + call psb_halo(x,desc_a,info,alpha=one) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return + +end subroutine psb_iasb + + + +! Subroutine: psb_iasbv +! Assembles a dense matrix for PSBLAS routines +! +! Parameters: +! x - integer,pointer,dimension(:). The matrix to be assembled. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code +subroutine psb_iasbv(x, desc_a, info) + !....assembly dense matrix x ..... + use psb_descriptor_type + use psb_const_mod + use psb_psblas_mod + use psb_error_mod + implicit none + + type(psb_desc_type), intent(in) :: desc_a + integer, pointer :: x(:) + integer, intent(out) :: info + + ! local variables + integer :: icontxt,nprow,npcol,me,mypcol,temp,lwork, err_act + integer :: int_err(5), i1sz,nrow,ncol, dectype, i + integer, pointer :: itemp(:) + real(kind(1.d0)) :: real_err(5) + integer, parameter :: ione=1 + real(kind(1.d0)),parameter :: one=1 + logical, parameter :: debug=.false. + character(len=20) :: name, ch_err + + info=0 + call psb_erractionsave(err_act) + name = 'psb_iasbv' + + + icontxt=desc_a%matrix_data(psb_ctxt_) + dectype=desc_a%matrix_data(psb_dec_type_) + + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow.eq.-1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol.ne.1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name,int_err) + goto 9999 + endif + + nrow=desc_a%matrix_data(psb_n_row_) + ncol=desc_a%matrix_data(psb_n_col_) + if (debug) write(*,*) name,' sizes: ',nrow,ncol + i1sz = size(x) + if (debug) write(*,*) 'dasb: sizes ',i1sz,ncol + if (i1sz.lt.ncol) then + allocate(itemp(ncol),stat=info) + if (info.ne.0) then + info=2025 + int_err(1)=ncol + call psb_errpush(info,name,int_err) + goto 9999 + endif + itemp(nrow+1:) = 0 + itemp(1:nrow) = x(1:nrow) + deallocate(x) + x => itemp + endif + + ! ..update halo elements.. + call psb_halo(x,desc_a,info,alpha=one) + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return + +end subroutine psb_iasbv + diff --git a/src/tools/psb_ifree.f90 b/src/tools/psb_ifree.f90 new file mode 100644 index 00000000..e5973635 --- /dev/null +++ b/src/tools/psb_ifree.f90 @@ -0,0 +1,162 @@ +! File: psb_ifree.f90 +! +! Subroutine: psb_ifree +! frees a dense integer matrix structure +! +! Parameters: +! x - integer, pointer, dimension(:,:). The dense matrix to be freed. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code +subroutine psb_ifree(x, desc_a, info) + !...free dense matrix structure... + use psb_const_mod + use psb_descriptor_type + use psb_error_mod + implicit none + + !....parameters... + integer, pointer :: x(:,:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + + !...locals.... + integer :: int_err(5) + integer :: temp(1) + real(kind(1.d0)) :: real_err(5) + integer :: icontxt,nprow,npcol,me,mypcol,err_act + integer,parameter :: ione=1 + character(len=20) :: name, ch_err + + info=0 + call psb_erractionsave(err_act) + name = 'psb_ifree' + + if (.not.associated(desc_a%matrix_data)) then + info=295 + call psb_errpush(info,name) + return + end if + + icontxt=desc_a%matrix_data(psb_ctxt_) + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + ! ....verify blacs grid correctness.. + if (nprow.eq.-1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol.ne.1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name,int_err) + goto 9999 + endif + + if (.not.associated(x)) then + info=290 + call psb_errpush(info,name) + goto 9999 + end if + + !deallocate x + deallocate(x,stat=info) + if (info.ne.0) then + info=2045 + call psb_errpush(info,name) + goto 9999 + else + nullify(x) + endif + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return + +end subroutine psb_ifree + + + +! Subroutine: psb_ifreev +! frees a dense integer matrix structure +! +! Parameters: +! x - integer, pointer, dimension(:). The dense matrix to be freed. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code +subroutine psb_ifreev(x, desc_a,info) + !...free dense matrix structure... + use psb_const_mod + use psb_descriptor_type + use psb_error_mod + implicit none + !....parameters... + integer, pointer :: x(:) + type(psb_desc_type), intent(in) :: desc_a + integer, intent(out) :: info + !...locals.... + integer :: int_err(5) + integer :: temp(1) + real(kind(1.d0)) :: real_err(5) + integer :: icontxt,nprow,npcol,me,mypcol,err_act + integer,parameter :: ione=1 + character(len=20) :: name, ch_err + + info=0 + call psb_erractionsave(err_act) + name = 'psb_ifreev' + + + if (.not.associated(desc_a%matrix_data)) then + info=295 + call psb_errpush(info,name) + return + end if + + icontxt=desc_a%matrix_data(psb_ctxt_) + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + ! ....verify blacs grid correctness.. + if (nprow.eq.-1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol.ne.1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name,int_err) + goto 9999 + endif + + if (.not.associated(x)) then + info=290 + call psb_errpush(info,name,int_err) + goto 9999 + end if + + !deallocate x + deallocate(x,stat=info) + if (info.ne.0) then + info=2045 + call psb_errpush(info,name,int_err) + goto 9999 + else + nullify(x) + endif + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return + +end subroutine psb_ifreev diff --git a/src/tools/psb_iins.f90 b/src/tools/psb_iins.f90 new file mode 100644 index 00000000..1e77d718 --- /dev/null +++ b/src/tools/psb_iins.f90 @@ -0,0 +1,391 @@ +! File: psb_iins.f90 +! +! Subroutine: psb_iins +! Insert dense integer submatrix to dense integer matrix. +! +! Parameters: +! m - integer. Rows number of submatrix belonging to blck to be inserted. +! n - integer. Cols number of submatrix belonging to blck to be inserted. +! x - integer, pointer, dimension(:,:). The destination dense matrix. +! ix - integer. x global-row corresponding to position at which blck submatrix must be inserted. +! jx - integer. x global-col corresponding to position at which blck submatrix must be inserted. +! blck - integer, pointer, dimension(:,:). The source dense submatrix. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code +! iblck - integer(optional). First row of submatrix belonging to blck to be inserted. +! jblck - integer(optional). First col of submatrix belonging to blck to be inserted. +subroutine psb_iins(m, n, x, ix, jx, blck, desc_a, info,& + & iblck, jblck) + !....insert dense submatrix to dense matrix ..... + use psb_descriptor_type + use psb_const_mod + use psb_error_mod + implicit none + + !....parameters... + integer, intent(in) :: m,n + type(psb_desc_type), intent(in) :: desc_a + integer, pointer :: x(:,:) + integer, intent(in) :: ix,jx + integer, intent(in) :: blck(:,:) + integer, intent(out) :: info + integer, optional, intent(in) :: iblck,jblck + + !locals..... + + integer :: icontxt,i,loc_row,glob_row,& + & loc_cols,col,iblock, jblock, mglob + integer :: nprow,npcol, me ,mypcol, int_err(5),err_act + character(len=20) :: name, ch_err + + info=0 + call psb_erractionsave(err_act) + name = 'psb_iins' + + + if ((.not.associated(desc_a%matrix_data))) then + info=3110 + call psb_errpush(info, name) + return + end if + + icontxt=desc_a%matrix_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow.eq.-1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol.ne.1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name,int_err) + goto 9999 + endif + + if (.not.associated(desc_a%glob_to_loc)) then + info=3110 + call psb_errpush(info,name,int_err) + goto 9999 + end if + + !... check parameters.... + if (m.lt.0) then + info = 10 + int_err(1) = 1 + int_err(2) = m + call psb_errpush(info,name,int_err) + goto 9999 + else if (n.lt.0) then + info = 10 + int_err(1) = 2 + int_err(2) = n + call psb_errpush(info,name,int_err) + goto 9999 + else if (ix.lt.1) then + info = 20 + int_err(1) = 6 + int_err(2) = ix + call psb_errpush(info,name,int_err) + goto 9999 + else if (jx.lt.1) then + info = 20 + int_err(1) = 7 + int_err(2) = jx + call psb_errpush(info,name,int_err) + goto 9999 + else if (.not.is_ok_dec(desc_a%matrix_data(psb_dec_type_))) then + info = 3110 + int_err(1) = desc_a%matrix_data(psb_dec_type_) + call psb_errpush(info,name,int_err) + goto 9999 + else if (size(x, dim=1).lt.desc_a%matrix_data(psb_n_row_)) then + info = 310 + int_err(1) = 5 + int_err(2) = 4 + call psb_errpush(info,name,int_err) + goto 9999 + else if (size(x, dim=2).lt.n) then + ! check if dimension of x is greater than dimension of submatrix + ! to insert + info = 320 + int_err(1) = 2 + int_err(2) = size(x, dim=2) + int_err(3) = n + call psb_errpush(info,name,int_err) + goto 9999 + endif + + loc_cols = desc_a%matrix_data(psb_n_col_) + mglob = desc_a%matrix_data(m_) + if (present(iblck)) then + iblock = iblck + else + iblock = 1 + endif + + if (present(jblck)) then + jblock = jblck + else + jblock = 1 + endif + + do i = 1, m + !loop over all blck's rows + + ! row actual block row + glob_row=ix+i-1 + if (glob_row > mglob) exit + loc_row=desc_a%glob_to_loc(glob_row) + if (loc_row.ge.1) then + ! this row belongs to me + ! copy i-th row of block blck in x + do col = 1, n + x(loc_row,jx+col-1) = blck(iblock+i-1,jblock+col-1) + enddo + end if + enddo + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return + +end subroutine psb_iins + + + + + +! Subroutine: psb_iinsvm +! Insert dense integer submatrix to dense integer matrix. +! +! Parameters: +! m - integer. Rows number of submatrix belonging to blck to be inserted. +! x - integer, pointer, dimension(:,:). The destination dense matrix. +! ix - integer. x global-row corresponding to position at which blck submatrix must be inserted. +! jx - integer. x global-col corresponding to position at which blck submatrix must be inserted. +! blck - integer, pointer, dimension(:,:). The source dense submatrix. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code +! iblck - integer(optional). First row of submatrix belonging to blck to be inserted. +subroutine psb_iinsvm(m, x, ix, jx, blck, desc_a, info,& + & iblck) + !....insert dense submatrix to dense matrix ..... + use psb_descriptor_type + use psb_const_mod + use psb_error_mod + implicit none + + ! m rows number of submatrix belonging to blck to be inserted + + ! iblck first row of submatrix belonging to blck to be inserted + + ! ix x global-row corresponding to position at which blck submatrix + ! must be inserted + + ! jx x global-col corresponding to position at which blck submatrix + ! must be inserted + + !....parameters... + integer, intent(in) :: m + type(psb_desc_type), intent(in) :: desc_a + integer, pointer :: x(:,:) + integer, intent(in) :: ix,jx + integer, intent(in) :: blck(:) + integer, intent(out) :: info + integer, optional, intent(in) :: iblck + + !locals..... + integer :: icontxt,i,loc_row,glob_row,& + & loc_cols,iblock, jblock,mglob, err_act, int_err(5) + integer :: nprow,npcol, me ,mypcol + character(len=20) :: name, ch_err + + info=0 + name = 'psb_iinsvm' + call psb_erractionsave(err_act) + + + loc_cols=desc_a%matrix_data(psb_n_col_) + mglob = desc_a%matrix_data(m_) + + if (present(iblck)) then + iblock = iblck + else + iblock = 1 + endif + + do i = 1, m + !loop over all blck's rows + + ! row actual block row + glob_row=ix+i-1 + if (glob_row > mglob) exit + + loc_row=desc_a%glob_to_loc(glob_row) + if (loc_row.ge.1) then + ! this row belongs to me + ! copy i-th row of block blck in x + x(loc_row,jx) = blck(iblock+i-1) + end if + enddo + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return + +end subroutine psb_iinsvm + + + +! Subroutine: psb_iinsvv +! Insert dense integer submatrix to dense integer matrix. +! +! Parameters: +! m - integer. Rows number of submatrix belonging to blck to be inserted. +! x - integer, pointer, dimension(:,:). The destination dense matrix. +! ix - integer. x global-row corresponding to position at which blck submatrix must be inserted. +! blck - integer, pointer, dimension(:,:). The source dense submatrix. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code +! iblck - integer(optional). First row of submatrix belonging to blck to be inserted. +subroutine psb_iinsvv(m, x, ix, blck, desc_a, info,& + & iblck) + !....insert dense submatrix to dense matrix ..... + use psb_descriptor_type + use psb_const_mod + use psb_error_mod + implicit none + + ! m rows number of submatrix belonging to blck to be inserted + + ! iblck first row of submatrix belonging to blck to be inserted + + ! ix x global-row corresponding to position at which blck submatrix + ! must be inserted + + !....parameters... + integer, intent(in) :: m + type(psb_desc_type), intent(in) :: desc_a + integer, pointer :: x(:) + integer, intent(in) :: ix + integer, intent(in) :: blck(:) + integer, intent(out) :: info + integer, optional, intent(in) :: iblck + + !locals..... + integer :: icontxt,i,loc_row,glob_row,k,& + & loc_rows,loc_cols,col,iblock, jblock, mglob, err_act, int_err(5) + integer :: nprow,npcol, me ,mypcol + character(len=20) :: name, ch_err + + info=0 + name = 'psb_iinsvv' + call psb_erractionsave(err_act) + + if ((.not.associated(desc_a%matrix_data))) then + info=3110 + call psb_errpush(info,name) + return + end if + icontxt=desc_a%matrix_data(psb_ctxt_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow.eq.-1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol.ne.1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name,int_err) + goto 9999 + endif + + if (.not.associated(desc_a%glob_to_loc)) then + info=3110 + call psb_errpush(info,name) + goto 9999 + end if + + !... check parameters.... + if (m.lt.0) then + info = 10 + int_err(1) = 1 + int_err(2) = m + call psb_errpush(info,name,int_err) + goto 9999 + else if (ix.lt.1) then + info = 20 + int_err(1) = 6 + int_err(2) = ix + call psb_errpush(info,name,int_err) + goto 9999 + else if (.not.is_ok_dec(desc_a%matrix_data(psb_dec_type_))) then + info = 3110 + int_err(1) = desc_a%matrix_data(psb_dec_type_) + call psb_errpush(info,name,int_err) + goto 9999 + else if (size(x, dim=1).lt.desc_a%matrix_data(psb_n_row_)) then + info = 310 + int_err(1) = 5 + int_err(2) = 4 + call psb_errpush(info,name,int_err) + goto 9999 + endif + + loc_rows=desc_a%matrix_data(psb_n_row_) + loc_cols=desc_a%matrix_data(psb_n_col_) + mglob = desc_a%matrix_data(m_) + + if (present(iblck)) then + iblock = iblck + else + iblock = 1 + endif + + do i = 1, m + !loop over all blck's rows + + ! row actual block row + glob_row=ix+i-1 + if (glob_row > mglob) exit + + loc_row=desc_a%glob_to_loc(glob_row) + if (loc_row.ge.1) then + ! this row belongs to me + ! copy i-th row of block blck in x + x(loc_row) = blck(iblock+i-1) + end if + enddo + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return + +end subroutine psb_iinsvv + + diff --git a/src/tools/psb_loc_to_glob.f90 b/src/tools/psb_loc_to_glob.f90 new file mode 100644 index 00000000..8e09fabf --- /dev/null +++ b/src/tools/psb_loc_to_glob.f90 @@ -0,0 +1,185 @@ +! File: psb_loc_to_glob.f90 +! +! Subroutine: psb_loc_to_glob2 +! Performs local to global indexes translation +! +! Parameters: +! x - integer, dimension(:). Array containing the indices to be translated. +! y - integer, dimension(:). Array containing the indices to be translated. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! iact - integer(optional). A character defining the behaviour of this subroutine when is found an index not belonging to the calling process +! +subroutine psb_loc_to_glob2(x,y,desc_a,info,iact) + + use psb_descriptor_type + use psb_const_mod + use psb_error_mod + implicit none + + !...parameters.... + type(psb_desc_type), intent(in) :: desc_a + integer, intent(in) :: x(:) + integer, intent(out) :: y(:) + integer, intent(out) :: info + character, intent(in), optional :: iact + + !....locals.... + integer :: err, n, i, tmp, icontxt + character :: strings,act + integer :: int_err(5), err_act + real(kind(1.d0)) :: real_val + integer, parameter :: zero=0 + character(len=20) :: name, char_err + + info=0 + name='psb_loc_to_glob2' + call psb_erractionsave(err_act) + + if (present(iact)) then + act=iact + else + act='A' + endif + + real_val = 0.d0 + + n=size(x) + do i=1,n + if ((x(i).gt.desc_a%matrix_data(psb_n_col_)).or.& + & (x(i).le.zero)) then + info=140 + int_err(1)=tmp + int_err(2)=desc_a%matrix_data(m_) + exit + else + tmp=desc_a%loc_to_glob(x(i)) + if((tmp.gt.zero).or.(tmp.le.desc_a%matrix_data(m_))) then + y(i)=tmp + else + info = 140 + int_err(1)=tmp + int_err(2)=desc_a%matrix_data(psb_n_col_) + exit + end if + end if + enddo + + if (info.ne.0) then + select case(act) + case('E') + call psb_erractionrestore(err_act) + return + case('W') + write(0,'("Error ",i5," in subroutine glob_to_loc")') info + case('A') + call psb_errpush(info,name) + goto 9999 + end select + endif + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_ret) then + return + else + call psb_error() + end if + return + +end subroutine psb_loc_to_glob2 + + +! Subroutine: psb_loc_to_glob +! Performs local to global indexes translation +! +! Parameters: +! x - integer, dimension(:). Array containing the indices to be translated. +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! iact - integer(optional). A character defining the behaviour of this subroutine when is found an index not belonging to the calling process +! +subroutine psb_loc_to_glob(x,desc_a,info,iact) + + use psb_descriptor_type + use psb_const_mod + use psb_error_mod + implicit none + + !...parameters.... + type(psb_desc_type), intent(in) :: desc_a + integer, intent(inout) :: x(:) + integer, intent(out) :: info + character, intent(in), optional :: iact + + !....locals.... + integer :: err, n ,i, tmp,icontxt, err_act + character :: act + integer :: int_err(5) + real(kind(1.d0)) :: real_val + integer, parameter :: zero=0 + character(len=20) :: name, char_err + + info=0 + name='psb_loc_to_glob' + call psb_erractionsave(err_act) + + if (present(iact)) then + act=iact + else + act='A' + endif + + real_val = 0.d0 + + n=size(x) + do i=1,n + if ((x(i).gt.desc_a%matrix_data(psb_n_col_)).or.& + & (x(i).le.zero)) then + info=140 + int_err(1)=x(i) + int_err(2)=desc_a%matrix_data(psb_n_col_) + exit + else + tmp=desc_a%loc_to_glob(x(i)) + if((tmp.gt.zero).or.(tmp.le.desc_a%matrix_data(m_))) then + x(i)=tmp + else + info = 140 + exit + end if + end if + enddo + + if (info.ne.0) then + select case(act) + case('E') + call psb_erractionrestore(err_act) + return + case('W') + write(0,'("Error ",i5," in subroutine glob_to_loc")') info + case('A') + call psb_errpush(info,name) + goto 9999 + end select + endif + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + + if (err_act.eq.act_ret) then + return + else + call psb_error() + end if + return + +end subroutine psb_loc_to_glob + diff --git a/src/tools/psb_ptasb.f90 b/src/tools/psb_ptasb.f90 new file mode 100644 index 00000000..764c4aac --- /dev/null +++ b/src/tools/psb_ptasb.f90 @@ -0,0 +1,218 @@ +! File: psb_ptasb.f90 +! +! Subroutine: psb_ptasb +! ??? +! +! Parameters: +! desc_a - type(). The communication descriptor. +! info - integer. Eventually returns an error code. +! +subroutine psb_ptasb(desc_a,info) + + use psb_descriptor_type + use psb_serial_mod + use psb_const_mod + use psi_mod + use psb_error_mod + implicit none + + + !...parameters.... + type(psb_desc_type), intent(inout) :: desc_a + integer,intent(out) :: info + !....locals.... + integer :: int_err(5) + integer,pointer :: ovrlap_index(:),halo_index(:) + real(kind(1.d0)) :: real_err(5) + integer,pointer :: work5(:) + integer :: err_act,& + & i,nprow,npcol,me,mypcol ,size_req,& + & lovrlap,lhalo,nhalo,novrlap,max_size,max_size1,& + & max_halo,size_req1,n_col,lwork5,ldesc_halo,& + & ldesc_ovrlap, dectype + integer :: icontxt,temp(1),n_row + integer, parameter :: ione=1 + real(kind(1.d0)) :: time(10), mpi_wtime + external mpi_wtime + logical, parameter :: debug=.false., debugwrt=.false. + character(len=20) :: name, ch_err + + info=0 + name = 'psb_ptasb' + call psb_erractionsave(err_act) + + time(1) = mpi_wtime() + + + icontxt = desc_a%matrix_data(psb_ctxt_) + dectype = desc_a%matrix_data(psb_dec_type_) + n_row = desc_a%matrix_data(psb_n_row_) + n_col = desc_a%matrix_data(psb_n_col_) + + ! check on blacs grid + call blacs_gridinfo(icontxt, nprow, npcol, me, mypcol) + if (nprow.eq.-1) then + info = 2010 + call psb_errpush(info,name) + goto 9999 + else if (npcol.ne.1) then + info = 2030 + int_err(1) = npcol + call psb_errpush(info,name,int_err) + goto 9999 + endif + + if (.not.is_ok_dec(dectype)) then + info = 600 + int_err(1) = dectype + call psb_errpush(info, name, int_err) + goto 9999 + endif + + if (debug) write (*, *) ' begin matrix assembly...' + + if (is_bld_dec(dectype)) then + if (debug) write(0,*) 'ptasb: checking rows insertion' + ! check if all local row are inserted + do i=1,desc_a%matrix_data(psb_n_col_) + if (desc_a%loc_to_glob(i).lt.0) then + write(0,*) 'error on index: ',i,desc_a%loc_to_glob(i) + info=3100 + exit + endif + enddo + + if(info.ne.no_err) then + call psb_errpush(info, name) + goto 9999 + end if + + ! comm desc_size is size requested for temporary comm descriptors + ! (expressed in no of dble element) + ldesc_halo = (((3*(n_col-n_row)+1)+1)) + ovrlap_index => desc_a%ovrlap_index + nullify(desc_a%ovrlap_index) + halo_index => desc_a%halo_index + nullify(desc_a%halo_index) + + lhalo = 1 + do while (halo_index(lhalo) /= -1) + lhalo = lhalo + 1 + enddo + nhalo = (lhalo-1)/3 + lovrlap=1 + do while (ovrlap_index(lovrlap) /= -1) + lovrlap=lovrlap+1 + enddo + novrlap = (lovrlap-1)/3 + + if (debug) write(0,*) 'ptasb: from asbx',& + & nhalo,lhalo,halo_index(lhalo) + + ! allocate final comm psblas descriptors + + ! compute necessary dimension of halo index + max_halo=max(nhalo,1) + max_size= max(1,min(3*desc_a%matrix_data(psb_n_row_),novrlap*3)) + max_size1=max_size + + call igamx2d(icontxt, all, topdef, ione, ione, max_size,& + & ione,temp ,temp,-ione ,-ione,-ione) + call igamx2d(icontxt, all, topdef, ione, ione,max_halo,& + & ione,temp ,temp,-ione ,-ione,-ione) + + ldesc_halo=3*max_halo+3*nhalo+1 + + ! allocate halo_index field + allocate(desc_a%halo_index(ldesc_halo),stat=info) + ! check on allocate + if (info.ne.0) then + info=2023 + int_err(1)=ldesc_halo + call psb_errpush(info, name, int_err) + goto 9999 + endif + + ! compute necessary dimension of ovrlap index + ldesc_ovrlap=2*lovrlap+1 + + ! allocate ovrlap_index field + allocate(desc_a%ovrlap_index(ldesc_ovrlap),stat=info) + ! check on allocate + if (info.ne.0) then + info=2023 + int_err(1)=ldesc_ovrlap + call psb_errpush(info, name, int_err) + goto 9999 + endif + + size_req1=max((max_size+max_size1),(nprow*4)*(nprow+1)*5) + + size_req=((nprow*4)*(nprow+1)*5)+(nprow+1)+(max& + & (nhalo,size_req1)+1) + if (info.ne.0) info =2040 + allocate(work5(size_req),stat=info) + if (info.ne.0) then + info=2025 + int_err(1)=size_req + call psb_errpush(info, name, int_err) + goto 9999 + endif + lwork5=size(work5) + + if (debug) write(0,*) 'ptasb: calling convert_comm',& + & nhalo,lhalo,halo_index(lhalo) + !.... convert comunication stuctures.... + call psi_convert_comm(desc_a%matrix_data,& + & halo_index, ovrlap_index,& + & desc_a%halo_index,size(desc_a%halo_index),& + & desc_a%ovrlap_index,size(desc_a%ovrlap_index),& + & desc_a%ovrlap_elem,size(desc_a%ovrlap_elem),& + & desc_a%bnd_elem,& + & desc_a%loc_to_glob,desc_a%glob_to_loc,info) + if(info /= 0) then + info=4010 + ch_err='psi_convert_comm' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + ! ok, register into matrix_data & free temporary work areas + desc_a%matrix_data(psb_dec_type_) = desc_asb + deallocate(halo_index,ovrlap_index,& + & work5, stat=info) + if (info.ne.0) then + info =2040 + call psb_errpush(info, name, int_err) + goto 9999 + end if + + else + info = 600 + if (debug) write(0,*) 'dectype 2 :',dectype,desc_bld,& + &desc_asb,desc_upd + call psb_errpush(info, name, int_err) + goto 9999 + endif + + time(4) = mpi_wtime() + time(4) = time(4) - time(3) + if (debug) then + call dgamx2d(icontxt, all, topdef, ione, ione, time(4),& + & ione,temp ,temp,-ione ,-ione,-ione) + + write (*, *) ' comm structs assembly: ', time(4)*1.d-3 + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return + +end subroutine psb_ptasb diff --git a/test/Fileread/Makefile b/test/Fileread/Makefile new file mode 100644 index 00000000..ec3664d2 --- /dev/null +++ b/test/Fileread/Makefile @@ -0,0 +1,133 @@ +include ../../Make.inc +# +# Libraries used +# +LIBDIR=../../LIB/ +PSBLAS_LIB= -L$(LIBDIR) -lpsblas +SPARKER_LIB= -L$(LIBDIR) -lsparker +BLAS90LIB=-L$(LIBDIR) -lpsblas90 $(SLU) +#METHD90LIB=-L$(LIBDIR) -lmethd90 +#TOOLS90LIB=-L$(LIBDIR) -ltools90 +#PREC90LIB=-L$(LIBDIR) -lprec90 + + + +# +# We are using the public domain tool METIS from U. Minnesota. To get it +# check URL http://www.cs.umn.edu:~karypis +# +METIS_LIB = -L$(HOME)/NUMERICAL/metis-4.0 -lmetis + + +# +CCOPT= -g +INCDIRS=-I$(LIBDIR) + + +TMMOBJS=partgraph.o part_block.o read_mat.o getp.o \ + mmio.o mat_dist.o testmm.o +DFOBJS=partgraph.o part_block.o read_mat.o getp.o \ + mmio.o mat_dist.o df_sample.o lowerc.o part_blk2.o +DAOBJS=partgraph.o part_block.o getp.o \ + mmio.o mat_dist.o read_mat.o d_aggr.o part_blk2.o lowerc.o +DFLOBJS=partgraph.o part_block.o read_mat.o getp.o \ + mmio.o mat_dist.o df_samplelog.o + +ZFOBJS=partgraph.o part_block.o read_mat.o getp.o \ + mmio.o mat_dist.o zf_sample.o +DFMOBJS=partgraph.o part_block.o mmio.o read_mat.o \ + mat_dist.o df_samplem.o part_blk2.o lowerc.o +DFBOBJS=partgraph.o part_block.o mmio.o read_mat.o \ + mat_dist.o comm_info.o df_bench.o part_blk2.o lowerc.o +DFCOBJS=partgraph.o part_block.o mmio.o read_mat.o \ + mat_dist.o comm_info.o df_comm.o part_blk2.o lowerc.o + + + +EXEDIR=./RUNS + +all: df_sample zf_sample df_samplelog testmm df_samplem + +read_mat.o: mmio.o + +df_sample: $(DFOBJS) + $(F90LINK) $(LINKOPT) $(DFOBJS) -o df_sample\ + $(METHD90LIB) $(TOOLS90LIB) $(BLAS90LIB) $(METIS_LIB)\ + $(SPARKER_LIB) $(PREC90LIB) $(BLAS90LIB) $(TOOLS90LIB) $(PSBLAS_LIB) \ + $(BLAS) $(SPARKER_LIB) $(BLACS) $(BLAS) + /bin/mv df_sample $(EXEDIR) + +d_aggr: $(DAOBJS) + $(F90LINK) $(LINKOPT) $(DAOBJS) -o d_aggr\ + $(METHD90LIB) $(TOOLS90LIB) $(BLAS90LIB) $(METIS_LIB)\ + $(SPARKER_LIB) $(PREC90LIB) $(BLAS90LIB) $(TOOLS90LIB) $(PSBLAS_LIB) \ + $(BLAS) $(SPARKER_LIB) $(BLACS) $(BLAS) + /bin/mv d_aggr $(EXEDIR) + + +df_samplem: $(DFMOBJS) + $(F90LINK) $(LINKOPT) $(DFMOBJS) -o df_samplem\ + $(METHD90LIB) $(TOOLS90LIB) $(BLAS90LIB) $(METIS_LIB)\ + $(SPARKER_LIB) $(PREC90LIB) $(BLAS90LIB) $(TOOLS90LIB) $(PSBLAS_LIB) \ + $(BLAS) $(SPARKER_LIB) $(BLACS) $(BLAS) + /bin/mv df_samplem $(EXEDIR) +df_bench: $(DFBOBJS) + $(F90LINK) $(LINKOPT) $(DFBOBJS) -o df_bench\ + $(METHD90LIB) $(TOOLS90LIB) $(BLAS90LIB) $(METIS_LIB)\ + $(SPARKER_LIB) $(PREC90LIB) $(BLAS90LIB) $(TOOLS90LIB) $(PSBLAS_LIB) \ + $(BLAS) $(SPARKER_LIB) $(BLACS) $(BLAS) + /bin/mv df_bench $(EXEDIR) + +df_comm: $(DFCOBJS) + $(F90LINK) $(LINKOPT) $(DFCOBJS) -o df_comm\ + $(METHD90LIB) $(TOOLS90LIB) $(BLAS90LIB) $(METIS_LIB)\ + $(SPARKER_LIB) $(PREC90LIB) $(BLAS90LIB) $(TOOLS90LIB) $(PSBLAS_LIB) \ + $(BLAS) $(SPARKER_LIB) $(BLACS) $(BLAS) + /bin/mv df_comm $(EXEDIR) + +testmm: $(TMMOBJS) + $(F90LINK) $(LINKOPT) $(TMMOBJS) -o testmm\ + $(METHD90LIB) $(TOOLS90LIB) $(BLAS90LIB) $(METIS_LIB)\ + $(SPARKER_LIB) $(PREC90LIB) $(BLAS90LIB) $(TOOLS90LIB) $(PSBLAS_LIB) \ + $(BLAS) $(SPARKER_LIB) $(BLACS) $(BLAS) + /bin/mv testmm $(EXEDIR) + +df_samplelog: $(DFLOBJS) + $(F90LINK) $(LINKOPT) $(DFLOBJS) -o df_samplelog\ + $(METHD90LIB) $(TOOLS90LIB) $(BLAS90LIB) $(METIS_LIB)\ + $(SPARKER_LIB) $(PREC90LIB) $(BLAS90LIB) $(PSBLAS_LIB) \ + $(BLAS) $(SPARKER_LIB) $(BLACS) $(BLAS) -llmpe -lmpe + /bin/mv df_samplelog $(EXEDIR) + +zf_sample: $(ZFOBJS) + $(F90LINK) $(LINKOPT) $(ZFOBJS) -o zf_sample\ + $(METHD90LIB) $(TOOLS90LIB) $(BLAS90LIB) $(METIS_LIB)\ + $(PSBLAS_LIB) $(SPARKER_LIB) $(BLAS)\ + $(BLACS) + /bin/mv zf_sample $(EXEDIR) + +aggr.o: mmio.o +aggr: aggr.o mmio.o + $(F90LINK) $(LINKOPT) aggr.o mmio.o -o aggr \ + $(METHD90LIB) $(TOOLS90LIB) $(BLAS90LIB) $(METIS_LIB)\ + $(PSBLAS_LIB) $(SPARKER_LIB) $(BLAS)\ + $(BLACS) + /bin/mv aggr $(EXEDIR) + +.f90.o: + $(MPF90) $(F90COPT) $(INCDIRS) -c $< +#mmio.o: mmio.f90 +# $(MPF90) -O0 $(INCDIRS) -c $< + + +#$(DFLOBJS) $(ZFOBJS) $(DFOBJS):$(MODS) + +clean: + /bin/rm -f $(FOBJS) ppde90.o part_block.o $(DFOBJS) $(DFBOBJS) $(DFCOBJS) $(ZHOBJS) $(DAOBJS)\ + *$(.mod) $(EXEDIR)/df_sample $(EXEDIR)/zf_sample $(EXEDIR)/df_comm $(EXEDIR)/df_samplelog $(EXEDIR)/df_bench + +lib: + (cd ../../; make lib) +verycleanlib: + (cd ../../; make veryclean) + diff --git a/test/Fileread/RUNS/Makefile b/test/Fileread/RUNS/Makefile new file mode 100644 index 00000000..3a5efd31 --- /dev/null +++ b/test/Fileread/RUNS/Makefile @@ -0,0 +1,15 @@ +lib: + (cd ..; $(MAKE) lib) +clean: + (cd ..; $(MAKE) clean) +verycleanlib: + (cd ..; $(MAKE) verycleanlib) +testmm: + (cd ..; $(MAKE) testmm) +df_sample: + (cd ..; $(MAKE) clean clean df_sample) +df_samplem: + (cd ..; $(MAKE) clean clean df_samplem) +zf_sample: + (cd ..; $(MAKE) clean zf_sample) +.PHONY: df_sample zf_sample testmm df_samplem diff --git a/test/Fileread/RUNS/rtb.inp b/test/Fileread/RUNS/rtb.inp new file mode 100644 index 00000000..51eeeb77 --- /dev/null +++ b/test/Fileread/RUNS/rtb.inp @@ -0,0 +1,48 @@ +BICGSTAB iterative method to use +CSR Matrix format +2 IPART: Partition method: 0: BLOCK 1: BLK2 2:GRAPH +00100 ITMAX +-1 ITRACE +2 ISTOPC 1: NBE Infty 2: |r|2/|b|2 +10 IRST Restart parameter for GMRES and BiCGSTAB(L) +0 RENUM: 0: none 1: global indices (2: GPS band reduction) +1 NTRY for each comb. print out best timings +1 6 13 NPRECS 0:none 1:diagsc 2:BJC 3:ASM 4:ASH 5:RAS 6:RASH +14 2 5 10 11 14 15 61 55 0 1 2 3 4 5 6 7 8 9 10 11 12 3 12 9 0 1 2 5 3 +2 Number of overlaps to try (only when iprec>2) +2 1 0 1 2 4 +1.D-6 EPS +0.38d0 OMEGA for smoothed preconditioners (10 & 11) +-1 GLBSMTH -1 no 1 yes for smoothed preconditioners (10 & 11) +1 MATOP 0 no store 1 store +4 Jacobi sweeps for precs. 14 & 64 +2 Number of matrices +kivap004.mtx none +sherman3.mtx none +nos4.mtx none +sherman3.mtx none +kivap004.mtx none +sherman3.mtx none +kivap001.mtx none +kivap007.mtx none +kivap004.mtx none +180x180_2659.mtx none +thm200x120.mtx none +sherman3.mtx none +kivap001.mtx none +kivap004.mtx none +kivap007.mtx none +sherman3.mtx none +kivap007.mtx none +126150.mtx none +sherman3.mtx none +kivap001.mtx none +sherman3.mtx none +matpress.mtx none +kivap004.mtx none +a_sparse_gps.mtx none +mx2000.mtx mx2000.rhs +sylbes.mtx sylbes.rhs +mx200.mtx mx200.rhs +sylbes.mtx sylbes.rhs +mx2000.mtx mx2000.rhs diff --git a/test/Fileread/RUNS/rtcomm.inp b/test/Fileread/RUNS/rtcomm.inp new file mode 100644 index 00000000..1dc5ea13 --- /dev/null +++ b/test/Fileread/RUNS/rtcomm.inp @@ -0,0 +1,21 @@ +2 IPART: Partition method: 0: BLOCK 1: BLK2 2:GRAPH +18 Number of matrices +bcsstk14.mtx +bcsstk17.mtx +e40r3000.mtx +kivap001.mtx +kivap002.mtx +kivap003.mtx +kivap004.mtx +kivap005.mtx +kivap006.mtx +kivap007.mtx +kivap008.mtx +kivap009.mtx +kivat001.mtx +kv300p01.mtx +matpress.mtx +raefsky3.mtx +sherman3.mtx +thm1000x600.mtx + diff --git a/test/Fileread/RUNS/rtm.inp b/test/Fileread/RUNS/rtm.inp new file mode 100644 index 00000000..29286ae1 --- /dev/null +++ b/test/Fileread/RUNS/rtm.inp @@ -0,0 +1,19 @@ +BICGSTAB iterative method to use +CSR Matrix format +1 IPART: Partition method: 0: BLOCK 1: BLK2 2:GRAPH +00800 ITMAX +-1 ITRACE +2 ISTOPC 1: NBE Infty 2: |r|2/|b|2 +0 RENUM: 0: none 1: global indices (2: GPS band reduction) +2 NPRECS 0:none 1:diagsc 2:BJC 3:ASM 4:ASH 5:RAS 6:RASH +5 8 2 7 8 7 8 0 1 2 5 3 4 5 6 7 0 1 2 3 4 5 6 +4 2 Number of overlaps to try (only when iprec>2) +0 1 2 4 +1.D-6 EPS +1 Number of matrices +kivap007.mtx none +kivap001.mtx none +kivap004.mtx none +kivap009.mtx none +try400.mtx none +kp1st.mtx none diff --git a/test/Fileread/RUNS/rtst.inp b/test/Fileread/RUNS/rtst.inp new file mode 100644 index 00000000..69b2db56 --- /dev/null +++ b/test/Fileread/RUNS/rtst.inp @@ -0,0 +1,13 @@ +11 Number of inputs +kivap007.mtx This (and others) from: http://math.nist.gov/MatrixMarket/ +NONE +BICGSTAB +ILU !!!! Actually, it's IPREC below. Should take this line out. +CSR +0 IPART: Partition method +2 ISTOPC +00800 ITMAX +6 ITRACE +2 IPREC 0:NONE 1:DIAGSC 2:ILU 3: AS 4: RAS 5,6: variants +1 ML +1.d-6 EPS diff --git a/test/Fileread/RUNS/tst.inp b/test/Fileread/RUNS/tst.inp new file mode 100644 index 00000000..965661ad --- /dev/null +++ b/test/Fileread/RUNS/tst.inp @@ -0,0 +1,14 @@ + +12 Number of inputs +dwg961a.mtx This (and others) from: http://math.nist.gov/MatrixMarket/ +NONE +BICGSTAB +NONE +CSR +2 IPART: Partition method +1 ISTOPC +15800 ITMAX +6 ITRACE +2 IPREC +3 ML +1.D-12 EPS diff --git a/test/Fileread/comm_info.f90 b/test/Fileread/comm_info.f90 new file mode 100644 index 00000000..b4be0af5 --- /dev/null +++ b/test/Fileread/comm_info.f90 @@ -0,0 +1,56 @@ +module comminfo +contains + + + subroutine get_comminfo(icontxt,desc_a,comm_info) + use typedesc + implicit none + + type(desc_type) :: desc_a + integer,pointer:: comm_info(:,:) + integer :: icontxt, nprow, npcol, myprow, mypcol,& + & i,cnt,proc,n_elem_recv,n_elem_send + integer,pointer:: sndbuf(:) + + + + call blacs_gridinfo(icontxt, nprow, npcol, myprow, mypcol) + +! write(0,*)'inside comminfo',nprow,npcol,myprow,mypcol + allocate(sndbuf(nprow)) + sndbuf(:)=0 + cnt=1 + + do while(desc_a%halo_index(cnt).ne.-1) + proc=desc_a%halo_index(cnt+proc_id_) + n_elem_recv=desc_a%halo_index(cnt+n_elem_recv_) + n_elem_send=desc_a%halo_index(cnt+n_elem_recv+n_elem_send_) + cnt=cnt+n_elem_recv+n_elem_send+3 + sndbuf(proc+1)=n_elem_send + end do + + + if(myprow.eq.0) then + comm_info(1,:)=sndbuf(:) + deallocate(sndbuf) + do i=1,nprow-1 + sndbuf=>comm_info(i+1,:) +! call igerv2d( icontxt, 1, nprow, comm_info(i+1,:), nprow, i, 0) + call igerv2d( icontxt, nprow,1, sndbuf, nprow, i, 0 ) +! write(0,'("Root has received from process n.",i3)'),i +! write(0,*) comm_info(i+1,:) +! write(0,*) sndbuf + end do + + else + +! write(0,'("Process n.",i3," is sending to root")'),myprow +! write(0,*) sndbuf + call igesd2d( icontxt, nprow,1, sndbuf, nprow, 0, 0 ) + + end if + + end subroutine get_comminfo + + +end module comminfo diff --git a/test/Fileread/desym.f b/test/Fileread/desym.f new file mode 100644 index 00000000..be11654f --- /dev/null +++ b/test/Fileread/desym.f @@ -0,0 +1,157 @@ +* SUBROUTINE DESYM(NROW,A,JA,IA,AS,JAS,IAS,IAW,NNZERO) * +* * +* Purpose * +* ======= * +* Utility routine to convert from symmetric storage * +* to full format (CSR mode). * +* * +* Parameter * +* ========= * +* INPUT= * +* * +* SYMBOLIC NAME: NROW * +* POSITION: Parameter No.1 * +* ATTRIBUTES: INTEGER * +* VALUES: NROW>0 * +* DESCRIPTION: On entry NROW specifies the number of rows of the * +* input sparse matrix. The number of column of the input * +* sparse matrix mest be the same. * +* Unchanged on exit. * +* * +* SYMBOLIC NAME: A * +* POSITION: Parameter No.2 * +* ATTRIBUTES: DOUBLE PRECISION ARRAY of Dimension (NNZERO) * +* VALUES: * +* DESCRIPTION: A specifies the values of the input sparse matrix. * +* This matrix is stored in CSR mode * +* Unchanged on exit. * +* * +* SYMBOLIC NAME: JA * +* POSITION: Parameter No. 3 * +* ATTRIBUTES: INTEGER ARRAY(IA(NNZERO)) * +* VALUES: > 0 * +* DESCRIPTION: Column indices stored by rows refered to the input * +* sparse matrix. * +* Unchanged on exit. * +* * +* SYMBOLIC NAME: IA * +* POSITION: Parameter No. 4 * +* ATTRIBUTES: INTEGER ARRAY(NROW+1) * +* VALUES: >0; increasing. * +* DESCRIPTION: Row pointer array: it contains the starting * +* position of each row of A in array JA. * +* Unchanged on exit. * +* * +* SYMBOLIC NAME: IAW * +* POSITION: Parameter No. 7 * +* ATTRIBUTES: INTEGER ARRAY of Dimension (NROW+1) * +* VALUES: >0; * +* DESCRIPTION: Work Area. * +* * +* SYMBOLIC NAME: WORK * +* POSITION: Parameter No. 8 * +* ATTRIBUTES: REAL*8 ARRAY of Dimension (NROW+1) * +* VALUES: >0; * +* DESCRIPTION: Work Area. * +* * +* SYMBOLIC NAME: NNZERO * +* POSITION: Parameter No. 9 * +* ATTRIBUTES: INTEGER * +* VALUES: >0; * +* DESCRIPTION: On entry contains: the number of the non zero * +* entry of the input matrix. * +* Unchanged on exit. * +* OUTPUT== * +* * +* * +* SYMBOLIC NAME: AS * +* POSITION: Parameter No.5 * +* ATTRIBUTES: DOUBLE PRECISION ARRAY of Dimension (*) * +* VALUES: * +* DESCRIPTION: On exit A specifies the values of the output sparse * +* matrix. * +* This matrix correspondes to A rapresented in FULL-CSR * +* mode * +* * +* SYMBOLIC NAME: JAS * +* POSITION: Parameter No. 6 * +* ATTRIBUTES: INTEGER ARRAY(IAS(NROW+1)-1) * +* VALUES: > 0 * +* DESCRIPTION: Column indices stored by rows refered to the output * +* sparse matrix. * +* * +* SYMBOLIC NAME: IAS * +* POSITION: Parameter No. S * +* ATTRIBUTES: INTEGER ARRAY(NROW+1) * +* VALUES: >0; increasing. * +* DESCRIPTION: Row pointer array: it contains the starting * +* position of each row of AS in array JAS. * +***************************************************************************** + +C + SUBROUTINE DESYM(NROW,A,JA,IA,AS,JAS,IAS,AUX,WORK,NNZERO, + + PTR, NZR, VALUE) + IMPLICIT NONE +C .. Scalar Arguments .. + INTEGER NROW,NNZERO,VALUE,INDEX,PTR, NZR +C .. Array Arguments .. + DOUBLE PRECISION A(*),AS(*),WORK(*) + INTEGER IA(*),IAS(*),JAS(*),JA(*),AUX(*) +C .. Local Scalars .. + INTEGER I,IAW1,IAW2,IAWT,J,JPT,K,KPT,LDIM,NZL,JS,IRET,NEL,DIAGEL +C REAL*8 BUF +C .. + + + NEL = 0 + DIAGEL=0 + + DO I=1, NNZERO + IF(JA(I).LE.IA(I)) THEN + NEL = NEL+1 + AS(I) = A(I) + JAS(I) = JA(I) + IAS(I) = IA(I) + IF(JA(I).NE.IA(I)) THEN !This control avoids malfunctions in the cases + ! where the matrix is declared symmetric but all + !his elements are explicitly stored + ! see young1c.mtx from "Matrix Market" + AS(NNZERO+I) = A(I) + JAS(NNZERO+I) = IA(I) + IAS(NNZERO+I) = JA(I) + ELSE + DIAGEL = DIAGEL+1 + END IF + END IF + END DO + + +C .... Order with key IAS ... + CALL MRGSRT(2*NNZERO,IAS,AUX,IRET) + IF (IRET.EQ.0) CALL REORDVN(2*NNZERO,AS,IAS,JAS,AUX) +C .... Order with key JAS ... + + I = 1 + J = I + DO WHILE (I.LE.(2*NNZERO)) + DO WHILE ((IAS(J).EQ.IAS(I)).AND. + + (J.LE.2*NNZERO)) + J = J+1 + ENDDO + NZL = J - I + CALL MRGSRT(NZL,JAS(I),AUX,IRET) + IF (IRET.EQ.0) CALL REORDVN(NZL,AS(I),IAS(I),JAS(I), + + AUX) + I = J + ENDDO + + NZR = NEL*2 - DIAGEL + PTR = 2*NNZERO-NZR+1 + + RETURN + + END + + + + diff --git a/test/Fileread/df_bench.f90 b/test/Fileread/df_bench.f90 new file mode 100644 index 00000000..f7b67ee5 --- /dev/null +++ b/test/Fileread/df_bench.f90 @@ -0,0 +1,735 @@ +program df_bench + use f90sparse + use mat_dist + use read_mat + use partgraph + use errormod + implicit none + + ! input parameters + character(len=20) :: cmethd, prec + character(len=40) :: mtrx_file, rhs_file + character(len=20) :: mtrx_name, name, ch_err + character(len=10) :: ptype + character(len=200) :: charbuf + integer :: inparms(20) + double precision ddot + external ddot + interface + ! .....user passed subroutine..... + subroutine part_block(global_indx,n,np,pv,nv) + implicit none + integer, intent(in) :: global_indx, n, np + integer, intent(out) :: nv + integer, intent(out) :: pv(*) + end subroutine part_block + end interface ! local variables + interface + ! .....user passed subroutine..... + subroutine part_blk2(global_indx,n,np,pv,nv) + implicit none + integer, intent(in) :: global_indx, n, np + integer, intent(out) :: nv + integer, intent(out) :: pv(*) + end subroutine part_blk2 + end interface ! Local variables + + + integer, parameter :: izero=0, ione=1 + character, parameter :: order='r' + real(kind(1.d0)), pointer, save :: b_col(:), x_col(:), r_col(:), & + & b_col_glob(:), x_col_glob(:), r_col_glob(:), b_glob(:,:), & + &z(:), q(:),z1(:), xm(:,:), ym(:,:) + integer :: iargc, check_descr, convert_descr + real(kind(1.d0)), parameter :: dzero = 0.d0, one = 1.d0 + real(kind(1.d0)) :: mpi_wtime, t1, t2, t3, tprec, tslv, ttot, & + &r_amax, b_amax,bb(1,1), lambda,scale,resmx,resmxp, omega + integer :: nrhs, nrow, nx1, nx2, n_row, n_col, dim,iread,ip,io,no,nmats,& + & imat,irenum, igsmth, matop, jacswp + logical :: amroot + external iargc, mpi_wtime + integer bsze,overlap, nn, nprecs, novrs + common/part/bsze,overlap + integer, pointer :: work(:), precs(:), ovrs(:) + ! sparse matrices + type(d_spmat) :: a, aux_a, h + type(d_prec) :: pre +!!$ type(d_precn) :: aprc + ! dense matrices + real(kind(1.d0)), pointer :: aux_b(:,:) , aux1(:), aux2(:), vdiag(:), & + & aux_g(:,:), aux_x(:,:), d(:) + + ! communications data structure + type(desc_type) :: desc_a, desc_a_out + + ! blacs parameters + integer :: nprow, npcol, ictxt, iam, np, myprow, mypcol + + ! solver paramters + integer :: iter, itmax, ierr, itrace, ircode, ipart,& + & methd, istopc, irst,nv + integer, pointer :: ivg(:), ipv(:) + character(len=5) :: afmt + real(kind(1.d0)) :: err, eps + integer iparm(20) + real(kind(1.d0)) rparm(20) + + ! other variables + integer :: i,info,j, ntryslv + integer :: internal, m,ii,nnzero, itryslv + integer, parameter :: ncsw=4, ntcs=4 + real(kind(1.d0)), pointer :: tsthal(:,:) + real(kind(1.d0)) :: tswmpi(ntcs,ncsw),tswsyn(ntcs,ncsw),tswsdrv(ntcs,ncsw) + ! common area + integer m_problem, nproc, nc + + + ! initialize blacs + call blacs_pinfo(iam, np) + call blacs_get(izero, izero, ictxt) + + ! rectangular Grid, Np x 1 + + call blacs_gridinit(ictxt, order, np, ione) + call blacs_gridinfo(ictxt, nprow, npcol, myprow, mypcol) + amroot = (myprow==0).and.(mypcol==0) + + + info=0 + name='df_bench' + call psb_set_errverbosity(2) + call psb_set_erraction(0) + ! call psb_erractionsave(err_act) + ! + ! Get parameters from file + ! + if (amroot) then + read(*,*) cmethd + do i = 1, len(cmethd) + inparms(i) = iachar(cmethd(i:i)) + end do + call igebs2d(ictxt,'all',' ',20,1,inparms,20) + + read(*,*) afmt + + do i = 1, len(afmt) + inparms(i) = iachar(afmt(i:i)) + end do + call igebs2d(ictxt,'all',' ',20,1,inparms,20) + read(*,*) ipart + read(*,*) itmax + read(*,*) itrace + read(*,*) istopc + read(*,*) irst + read(*,*) irenum + read(*,*) ntryslv + read(*,*) nprecs + inparms(1) = ipart + inparms(2) = itmax + inparms(3) = itrace + inparms(4) = irenum + inparms(5) = ntryslv + inparms(6) = nprecs + inparms(7) = istopc + inparms(8) = irst + call igebs2d(ictxt,'all',' ',8,1,inparms,20) +!!$ write(0,*) 'Sent nprecs: ',nprecs + allocate(precs(nprecs)) + read(*,*) (precs(i),i=1,nprecs) + call igebs2d(ictxt,'all',' ',nprecs,1,precs,nprecs) + read(*,*) novrs + call igebs2d(ictxt,'all',' ',1,1,novrs,1) +!!$ write(0,*) 'Sent novrs: ',novrs + allocate(ovrs(novrs)) + read(*,*) (ovrs(i),i=1,novrs) + call igebs2d(ictxt,'all',' ',novrs,1,ovrs,novrs) + read(*,*) eps + call dgebs2d(ictxt,'all',' ',1,1,eps,1) + read(*,*) omega + call dgebs2d(ictxt,'all',' ',1,1,omega,1) + read(*,*) igsmth + call igebs2d(ictxt,'all',' ',1,1,igsmth,1) + read(*,*) matop + call igebs2d(ictxt,'all',' ',1,1,matop,1) + read(*,*) jacswp + call igebs2d(ictxt,'all',' ',1,1,jacswp,1) + read(*,*) nmats + call igebs2d(ictxt,'all',' ',1,1,nmats,1) + + + else + call igebr2d(ictxt,'a',' ',20,1,inparms,20,0,0) + do i = 1, len(cmethd) + cmethd(i:i) = achar(inparms(i)) + end do + call igebr2d(ictxt,'a',' ',20,1,inparms,20,0,0) + do i = 1, len(afmt) + afmt(i:i) = achar(inparms(i)) + end do + + call igebr2d(ictxt,'all',' ',8,1,inparms,20,0,0) + ipart = inparms(1) + itmax = inparms(2) + itrace = inparms(3) + irenum = inparms(4) + ntryslv= inparms(5) + nprecs = inparms(6) + istopc = inparms(7) + irst = inparms(8) +!!$ write(0,*) 'Recvd nprecs: ',nprecs + allocate(precs(nprecs)) + call igebr2d(ictxt,'all',' ',nprecs,1,precs,nprecs,0,0) + call igebr2d(ictxt,'all',' ',1,1,novrs,1,0,0) +!!$ write(0,*) 'Recvd novrs: ',novrs + allocate(ovrs(novrs)) + call igebr2d(ictxt,'all',' ',novrs,1,ovrs,novrs,0,0) + call dgebr2d(ictxt,'all',' ',1,1,eps,1,0,0) + call dgebr2d(ictxt,'all',' ',1,1,omega,1,0,0) + call igebr2d(ictxt,'all',' ',1,1,igsmth,1,0,0) + call igebr2d(ictxt,'all',' ',1,1,matop,1,0,0) + call igebr2d(ictxt,'all',' ',1,1,jacswp,1,0,0) + call igebr2d(ictxt,'all',' ',1,1,nmats,1,0,0) + endif + + do imat=1,nmats + + if (amroot) then +!!$ read(*,*) mtrx_file,rhs_file + read(*,'(a)') charbuf + charbuf=adjustl(charbuf) + i=index(charbuf," ") + mtrx_file=charbuf(1:i-1) + rhs_file=adjustl(charbuf(i+1:)) + i=index(mtrx_file,"/",back=.true.) + mtrx_name=trim(mtrx_file(i+1:)) + write(0,*) 'Read mtx rhs : "',& + & mtrx_file,'" "',rhs_file,'" "',mtrx_name,'"' + endif + + call blacs_barrier(ictxt,'A') + t1 = mpi_wtime() + ! read the input matrix to be processed and (possibly) the RHS + nrhs = 1 + nproc = nprow + + if (amroot) then + nullify(aux_b) + call readmat(mtrx_file, aux_a, ictxt) +!!$ write(0,*) 'from readmat: ',aux_a%fida,aux_a%m,':',& +!!$ &aux_a%ia2(aux_a%m+1)-1,':',aux_a%ia1(1:10) + m_problem = aux_a%m + call igebs2d(ictxt,'a',' ',1,1,m_problem,1) + + if (rhs_file /= 'none') then + ! reading an rhs + call read_rhs(rhs_file,aux_b,ictxt) + end if + + if (associated(aux_b).and.size(aux_b,1)==m_problem) then + ! if any rhs were present, broadcast the first one +!!$ write(0,*) 'ok, got an rhs ',aux_b(m_problem,1) + b_col_glob =>aux_b(:,1) + else +!!$ write(0,*) 'inventing an rhs ' + allocate(aux_b(m_problem,1), stat=ircode) + if (ircode /= 0) then + write(0,*) 'memory allocation failure in df_sample' + call blacs_abort(ictxt,-1) + stop + endif + b_col_glob =>aux_b(:,1) + do i=1, m_problem + b_col_glob(i) = 1.d0 + enddo + endif + call dgebs2d(ictxt,'a',' ',m_problem,1,b_col_glob,m_problem) + else + call igebr2d(ictxt,'a',' ',1,1,m_problem,1,0,0) +!!$ write(0,*) 'Receiving AUX_B' + allocate(aux_b(m_problem,1), stat=ircode) + if (ircode /= 0) then + write(0,*) 'Memory allocation failure in DF_SAMPLE' + call blacs_abort(ictxt,-1) + stop + endif + b_col_glob =>aux_b(:,1) + call dgebr2d(ictxt,'A',' ',m_problem,1,b_col_glob,m_problem,0,0) + end if + + ! switch over different partition types + if (ipart.eq.0) then + call blacs_barrier(ictxt,'A') + if (.true.) then + allocate(ivg(m_problem),ipv(np)) + do i=1,m_problem + call part_block(i,m_problem,np,ipv,nv) + ivg(i) = ipv(1) + enddo + call matdist(aux_a, a, ivg, ictxt, & + & desc_a,b_col_glob,b_col,info,fmt=afmt) + else + call matdist(aux_a, a, part_block, ictxt, & + & desc_a,b_col_glob,b_col,info,fmt=afmt) + endif + else if (ipart.eq.1) then + call blacs_barrier(ictxt,'A') + call matdist(aux_a, a, part_blk2, ictxt, & + & desc_a,b_col_glob,b_col,info,fmt=afmt) + else if (ipart.eq.2) then + if (amroot) then + call build_grppart(aux_a%m,aux_a%fida,aux_a%ia1,aux_a%ia2,np) + endif + call distr_grppart(0,0,ictxt) + if (.false.) then + call matdist(aux_a, a, part_graph, ictxt, & + & desc_a,b_col_glob,b_col,info,fmt=afmt) + else + call getv_grppart(ivg) + call matdist(aux_a, a, ivg, ictxt, & + & desc_a,b_col_glob,b_col,info,fmt=afmt) + endif + else + call matdist(aux_a, a, part_block, ictxt, & + & desc_a,b_col_glob,b_col,info,fmt=afmt) + end if + + if(info /= 0) then + info=4010 + ch_err='matdist' + goto 9999 + end if + + call f90_psdsall(m_problem,x_col,desc_a,info) + if(info /= 0) then + info=4010 + ch_err='f90_psdsall' + goto 9999 + end if + x_col(:) =0.0 + call f90_psdsasb(x_col,desc_a,info) + call f90_psdsasb(b_col,desc_a,info) + if(info /= 0) then + info=4010 + ch_err='f90_psdsasb' + goto 9999 + end if + + call f90_psdsall(m_problem,r_col,desc_a,info) + if(info /= 0) then + info=4010 + ch_err='f90_psdsall' + goto 9999 + end if + r_col(:) =0.0 + call f90_psdsasb(r_col,desc_a,info) + if(info /= 0) then + info=4010 + ch_err='f90_psdsasb' + goto 9999 + end if + t2 = mpi_wtime() - t1 + + + dim=size(a%aspk) + + call dgamx2d(ictxt, 'a', ' ', ione, ione, t2, ione,& + & t1, t1, -1, -1, -1) + + if (amroot) then + write(6,*) 'time to Read and Partition Matrix : ',T2 + END IF +!!$ call blacs_barrier(ictxt,'all') +!!$ do i=0, nprow-1 +!!$ if (myprow==i) then +!!$ write(6,*) 'Main descriptor for process ',i,' ',mtrx_file +!!$ call descprt(6,desc_a,short=.true.) +!!$ endif +!!$ call blacs_barrier(ictxt,'all') +!!$ enddo + + + ! + ! Prepare the preconditioning matrix. Note the availability + ! of optional parameters + ! + + do ip=1,nprecs + + pre%prec=precs(ip) + if (precs(ip) > 2) then + no=novrs + else + no=1 + endif + + do io=1, no + + ttot = 1.d300 + + do itryslv=1,ntryslv + ! Zero initial guess. + select case(precs(ip)) + case(noprec_) + ptype='noprec' + call psb_precset(pre,ptype) + case(diagsc_) + ptype='diagsc' + call psb_precset(pre,ptype) + case(bja_) + ptype='bja' + call psb_precset(pre,ptype) + case(asm_) + ptype='asm' + call psb_precset(pre,ptype,iv=(/ovrs(io),halo_,sum_/)) + case(ash_) + ptype='asm' + call psb_precset(pre,ptype,iv=(/ovrs(io),nohalo_,sum_/)) + case(ras_) + ptype='asm' + call psb_precset(pre,ptype,iv=(/ovrs(io),halo_,none_/)) + case(50+ras_) + pre%prec = ras_ + ptype='asm' + call psb_precset(pre,ptype,iv=(/ovrs(io),halo_,none_,f_slu_/)) + case(rash_) + ptype='asm' + call psb_precset(pre,ptype,iv=(/ovrs(io),nohalo_,none_/)) + case(ras2lv_) + ptype='asm' + call psb_precset(pre,ptype,iv=(/ovrs(io),halo_,none_/)) + ptype='ml' + call psb_precset(pre,ptype,& + &iv=(/add_ml_prec_,glb_aggr_,pre_smooth_,igsmth,matop/),rs=0.d0) + case(ras2lvm_) + ptype='asm' + call psb_precset(pre,ptype,iv=(/ovrs(io),halo_,none_/)) + ptype='ml' + call psb_precset(pre,ptype,& + & iv=(/mult_ml_prec_,glb_aggr_,pre_smooth_,igsmth,matop/),rs=0.d0) + case(lv2mras_) + ptype='asm' + call psb_precset(pre,ptype,iv=(/ovrs(io),halo_,none_/)) + ptype='ml' + call psb_precset(pre,ptype,& + & iv=(/mult_ml_prec_,glb_aggr_,post_smooth_,igsmth,matop/),rs=0.d0) + case(50+lv2mras_) + pre%prec = lv2mras_ + ptype='asm' + call psb_precset(pre,ptype,iv=(/ovrs(io),halo_,none_/)) + ptype='ml' + call psb_precset(pre,ptype,& + & iv=(/mult_ml_prec_,glb_aggr_,post_smooth_,igsmth,matop,f_slu_/),& + & rs=0.d0) + case(lv2smth_) + ptype='asm' + call psb_precset(pre,ptype,iv=(/ovrs(io),halo_,none_/)) + ptype='ml' + if (omega>0.0d0) then + call psb_precset(pre,'ml',& + & iv=(/mult_ml_prec_,glb_aggr_,post_smooth_,igsmth,matop/),& + & rs=omega) + else + call psb_precset(pre,'ml',& + & iv=(/mult_ml_prec_,glb_aggr_,post_smooth_,igsmth,matop/)) + endif + case(50+lv2smth_) + pre%prec = lv2smth_ + ptype='asm' + call psb_precset(pre,ptype,iv=(/ovrs(io),halo_,none_/)) + ptype='ml' + if (omega>0.0d0) then + call psb_precset(pre,ptype,& + & iv=(/mult_ml_prec_,glb_aggr_,post_smooth_,igsmth,matop,f_slu_/),& + & rs=omega) + else + call psb_precset(pre,ptype,& + & iv=(/mult_ml_prec_,glb_aggr_,post_smooth_,igsmth,matop,f_slu_/)) + endif + case(lv2lsm_) + ptype='asm' + call psb_precset(pre,ptype,iv=(/ovrs(io),halo_,none_/)) + ptype='ml' + if (omega>0.0d0) then + call psb_precset(pre,ptype,& + & iv=(/mult_ml_prec_,loc_aggr_,post_smooth_,igsmth,matop/),& + & rs=omega) + else + call psb_precset(pre,ptype,& + & iv=(/mult_ml_prec_,loc_aggr_,post_smooth_,igsmth,matop/)) + endif + case(50+lv2lsm_) + pre%prec = lv2lsm_ + ptype='asm' + call psb_precset(pre,ptype,iv=(/ovrs(io),halo_,none_/)) + ptype='ml' + if (omega>0.0d0) then + call psb_precset(pre,ptype,& + & iv=(/mult_ml_prec_,loc_aggr_,post_smooth_,igsmth,matop,f_slu_/),& + & rs=omega) + else + call psb_precset(pre,ptype,& + & iv=(/mult_ml_prec_,loc_aggr_,post_smooth_,igsmth,matop,f_slu_/)) + endif + case(sl2sm_) + ptype='asm' + call psb_precset(pre,ptype,iv=(/ovrs(io),halo_,sum_/)) + ptype='ml' + if (omega>0.0d0) then + call psb_precset(pre,ptype,& + & iv=(/add_ml_prec_,loc_aggr_,post_smooth_,igsmth,matop/),rs=omega) + else + call psb_precset(pre,ptype,& + & iv=(/add_ml_prec_,loc_aggr_,post_smooth_,igsmth,matop/)) + endif + case(new_loc_smth_) + ptype='asm' + call psb_precset(pre,ptype,iv=(/ovrs(io),halo_,none_/)) + ptype='ml' + if (omega>0.0d0) then + call psb_precset(pre,ptype,& + & iv=(/new_ml_prec_,new_loc_aggr_,post_smooth_,1,& + & matop,f_ilu_n_,jacswp/), rs=omega) + else + call psb_precset(pre,ptype,& + & iv=(/new_ml_prec_,new_loc_aggr_,post_smooth_,1,& + & matop,f_ilu_n_,jacswp/)) + endif + case(50+new_loc_smth_) + pre%prec = new_loc_smth_ + ptype='asm' + call psb_precset(pre,ptype,iv=(/ovrs(io),halo_,none_/)) + ptype='ml' + if (omega>0.0d0) then + call psb_precset(pre,ptype,& + & iv=(/new_ml_prec_,new_loc_aggr_,post_smooth_,1,& + & matop,f_slu_,jacswp/), rs=omega) + else + call psb_precset(pre,ptype,& + & iv=(/new_ml_prec_,new_loc_aggr_,post_smooth_,1,& + & matop,f_slu_,jacswp/)) + endif + case(new_glb_smth_) + ptype='asm' + call psb_precset(pre,ptype,iv=(/ovrs(io),halo_,none_/)) + ptype='ml' + if (omega>0.0d0) then + call psb_precset(pre,ptype,& + & iv=(/new_ml_prec_,new_glb_aggr_,post_smooth_,1,& + & matop,f_ilu_n_,1/), rs=omega) + else + call psb_precset(pre,ptype,& + & iv=(/new_ml_prec_,new_glb_aggr_,post_smooth_,1,& + & matop,f_ilu_n_,1/)) + endif + case(50+new_glb_smth_) + pre%prec = new_glb_smth_ + ptype='asm' + call psb_precset(pre,ptype,iv=(/ovrs(io),halo_,none_/)) + ptype='ml' + if (omega>0.0d0) then + call psb_precset(pre,ptype,& + & iv=(/new_ml_prec_,new_glb_aggr_,post_smooth_,1,& + & matop,f_slu_,1/), rs=omega) + else + call psb_precset(pre,ptype,& + & iv=(/new_ml_prec_,new_glb_aggr_,post_smooth_,1,& + & matop,f_slu_,1/)) + endif + case default + write(0,*) 'Unknown iprec, defaulting to BJA' + ptype='bja' + call psb_precset(pre,ptype) + end select + + call blacs_barrier(ictxt,'All') + call f90_psaxpby(0.d0,b_col,0.d0,x_col,desc_a,info) + if(info /= 0) then + info=4010 + ch_err='f90_psaxpby' + goto 9999 + end if + + + call blacs_barrier(ictxt,'All') + t1 = mpi_wtime() + call psb_precbld(a,pre,desc_a,info)!,'f') + t2 = mpi_wtime()-t1 + + if(info /= 0) then + info=4010 + ch_err='psb_precbld' + goto 9999 + end if + + call dgamx2d(ictxt,'a',' ',ione, ione,t2,ione,t1,t1,-1,-1,-1) + + if (info /= 0) then + write(0,*) 'error in preconditioner :',info + call blacs_abort(ictxt,-1) + stop + end if + + iparm = 0 + rparm = 0.d0 + + call blacs_barrier(ictxt,'all') + t1 = mpi_wtime() + if (cmethd.eq.'CG') Then + call f90_cg(a,pre,b_col,x_col,eps,desc_a,info,& + & itmax,iter,err,itrace,istop=istopc) + if(info /= 0) then + info=4010 + ch_err='f90_cg' + goto 9999 + end if + else if (cmethd.eq.'BICGSTAB') Then + call f90_bicgstab(a,pre,b_col,x_col,eps,desc_a,info,& + & itmax,iter,err,itrace,istop=istopc) + if(info /= 0) then + info=4010 + ch_err='f90_bicgstab' + goto 9999 + end if + ELSE IF (CMETHD.EQ.'BICG') Then + call f90_bicg(a,pre,b_col,x_col,eps,desc_a,info,& + & itmax,iter,err,itrace,istop=istopc) + if(info /= 0) then + info=4010 + ch_err='f90_bicg' + goto 9999 + end if + ELSE IF (CMETHD.EQ.'CGS') Then + call f90_cgs(a,pre,b_col,x_col,eps,desc_a,info,& + & itmax,iter,err,itrace,istop=istopc) + if(info /= 0) then + info=4010 + ch_err='f90_cg' + goto 9999 + end if + ELSE IF (CMETHD.EQ.'GMRES') Then + call f90_rgmres(a,pre,b_col,x_col,eps,desc_a,info,& + & itmax,iter,err,itrace,irst=irst,istop=istopc) + if(info /= 0) then + info=4010 + ch_err='f90_rgmres' + goto 9999 + end if + else + write(*,*) 'Unknown method : "',cmethd,'"' + ENDIF + call blacs_barrier(ictxt,'all') + t3 = mpi_wtime() - t1 + call dgamx2d(ictxt,'a',' ',ione, ione,t3,ione,t1,t1,-1,-1,-1) + + if (amroot) then + write(10,'(a18,3(1x,i2),1x,i5,5(1x,g9.4))') mtrx_name,nprow,& + & precs(ip),pre%baseprec%iprcparm(n_ovr_),iter,t2,t3,t2+t3 + endif + if (itryslv < ntryslv) then + call psb_precfree(pre,info) + if(info /= 0) then + info=4010 + ch_err='psb_precfree' + goto 9999 + end if + else + if (amroot) call prec_descr(6,pre) + end if + if ((t2+t3)aux_b(:,1) + do i=1, m_problem + b_col_glob(i) = 1.d0 + enddo + + call dgebs2d(ictxt,'a',' ',m_problem,1,b_col_glob,m_problem) + else + call igebr2d(ictxt,'a',' ',1,1,m_problem,1,0,0) + + allocate(aux_b(m_problem,1), stat=ircode) + if (ircode /= 0) then + write(0,*) 'Memory allocation failure in DF_SAMPLE' + call blacs_abort(ictxt,-1) + stop + endif + b_col_glob =>aux_b(:,1) + call dgebr2d(ictxt,'A',' ',m_problem,1,b_col_glob,m_problem,0,0) + end if + + + ! switch over different partition types + if (ipart.eq.0) then + call blacs_barrier(ictxt,'A') + write(0,*) 'Partition type: BLOCK' + allocate(ivg(m_problem)) + call bld_partblock(m_problem,np,ivg) + call matdist(aux_a, a, ivg, ictxt, & + & desc_a,b_col_glob,b_col,fmt=afmt) + else if (ipart.eq.1) then + call blacs_barrier(ictxt,'A') + write(0,*) 'partition type: BLK2' + allocate(ivg(m_problem)) + call bld_partblk2(m_problem,np,ivg) + call matdist(aux_a, a, ivg, ictxt, & + & desc_a,b_col_glob,b_col,fmt=afmt) + else if (ipart.eq.2) then + write(0,*) 'partition type: GRAPH' + if (amroot) then + call build_grppart(aux_a%m,aux_a%fida,aux_a%ia1,aux_a%ia2,np) + endif + call distr_grppart(0,0,ictxt) + if (.false.) then + call matdist(aux_a, a, part_graph, ictxt, & + & desc_a,b_col_glob,b_col,fmt=afmt) + else + call getv_grppart(ivg) + call matdist(aux_a, a, ivg, ictxt, & + & desc_a,b_col_glob,b_col,fmt=afmt) + endif + end if + + if(amroot) then + allocate(comm_info(np,np)) + comm_info(:,:)=0 + end if + call blacs_barrier(ictxt,'all') + write(0,'("Getting communication info")') + call get_comminfo(ictxt,desc_a,comm_info) + if(amroot) then + open(2,file=out_file,action='write',position='append') + write(2,'("Exchange table:")') + do i=1,np + write(2,*)'Row ',i,' : ',comm_info(i,:) + end do + write(2,'(" ")') + write(2,'(" ")') + close(2) + end if + + + n_row = desc_a%matrix_data(n_row_) + n_col = desc_a%matrix_data(n_col_) + write(0,'("Allocating vectors")') + call f90_psdsall(m_problem,ncsw,tsthal,ierrv,desc_a) + forall (i=1:n_row) + forall (j=1:ncsw) + tsthal(i,j) = j * desc_a%loc_to_glob(i) + end forall + end forall + tsthal(n_row+1:,:) = -1.d0 + + tswmpi = 1.d200 + tswsyn = 1.d200 + tswsdrv = 1.d200 + + write(0,'("Cycling")') + do nc=1, ncsw + do i=1, ntcs + tsthal(n_row+1:,:) = -1.d0 + t1=mpi_wtime() + call f90_pshalo(tsthal(:,1:nc),desc_a,trans='T',mode=SWAP_MPI) + t2=mpi_wtime()-t1 + call dgamx2d(ictxt,'a',' ',ione, ione,t2,ione,t1,t1,-1,-1,-1) + tswmpi(i,nc) = t2 + !! Add correctness check + tsthal(n_row+1:,:) = -1.d0 + t1=mpi_wtime() + call f90_pshalo(tsthal(:,1:nc),desc_a,trans='T',mode=SWAP_SYNC) + t2=mpi_wtime()-t1 + call dgamx2d(ictxt,'a',' ',ione, ione,t2,ione,t1,t1,-1,-1,-1) + tswsyn(i,nc) = t2 + !! Add correctness check + tsthal(n_row+1:,:) = -1.d0 + t1=mpi_wtime() + call f90_pshalo(tsthal(:,1:nc),desc_a,trans='T',mode=IOR(SWAP_SEND,SWAP_RECV)) + t2=mpi_wtime()-t1 + call dgamx2d(ictxt,'a',' ',ione, ione,t2,ione,t1,t1,-1,-1,-1) + tswsdrv(i,nc) = t2 + !! Add correctness check + end do + end do + + if (amroot) then + open(2,file=out_file,action='write',position='append') + do nc=1, ncsw + write(*,'(a18,1x,a4,1(1x,i2),1x,i5,5(1x,g9.4))') mtrx_name,'MPI',& + & nprow,nc,minval(tswmpi(:,nc)),maxval(tswmpi(:,nc)),& + & sum(tswmpi(:,nc))/ntcs + write(2,'(a18,1x,a4,1(1x,i2),1x,i5,5(1x,g9.4))') mtrx_name,'MPI',& + & nprow,nc,minval(tswmpi(:,nc)),maxval(tswmpi(:,nc)),& + & sum(tswmpi(:,nc))/ntcs + write(*,'(a18,1x,a4,1(1x,i2),1x,i5,5(1x,g9.4))') mtrx_name,'SYNC',& + & nprow,nc,minval(tswsyn(:,nc)),maxval(tswsyn(:,nc)),& + & sum(tswsyn(:,nc))/ntcs + write(2,'(a18,1x,a4,1(1x,i2),1x,i5,5(1x,g9.4))') mtrx_name,'SYNC',& + & nprow,nc,minval(tswsyn(:,nc)),maxval(tswsyn(:,nc)),& + & sum(tswsyn(:,nc))/ntcs + write(*,'(a18,1x,a4,1(1x,i2),1x,i5,5(1x,g9.4))') mtrx_name,'SDRV',& + & nprow,nc,minval(tswsdrv(:,nc)),maxval(tswsdrv(:,nc)),& + & sum(tswsdrv(:,nc))/ntcs + write(2,'(a18,1x,a4,1(1x,i2),1x,i5,5(1x,g9.4))') mtrx_name,'SDRV',& + & nprow,nc,minval(tswsdrv(:,nc)),maxval(tswsdrv(:,nc)),& + & sum(tswsdrv(:,nc))/ntcs + end do + close(2) + end if + call f90_psdsfree(tsthal, desc_a) + call f90_psdsfree(b_col, desc_a) + call f90_psspfree(a, desc_a) + call f90_psdscfree(desc_a,info) + + end do + deallocate(ovrs,precs,ierrv, stat=info) + write(0,*) 'Info from deallocate ',info + call blacs_gridexit(ictxt) + call blacs_exit(0) + +end program df_comm + + + + diff --git a/test/Fileread/df_sample.f90 b/test/Fileread/df_sample.f90 new file mode 100644 index 00000000..4d561883 --- /dev/null +++ b/test/Fileread/df_sample.f90 @@ -0,0 +1,320 @@ +PROGRAM DF_SAMPLE + USE F90SPARSE + USE MAT_DIST + USE READ_MAT + USE PARTGRAPH + USE GETP + IMPLICIT NONE + + ! Input parameters + CHARACTER*20 :: CMETHD, PREC, MTRX_FILE, RHS_FILE + CHARACTER*80 :: CHARBUF + + DOUBLE PRECISION DDOT + EXTERNAL DDOT + INTERFACE + ! .....user passed subroutine..... + SUBROUTINE PART_BLOCK(GLOBAL_INDX,N,NP,PV,NV) + IMPLICIT NONE + INTEGER, INTENT(IN) :: GLOBAL_INDX, N, NP + INTEGER, INTENT(OUT) :: NV + INTEGER, INTENT(OUT) :: PV(*) + END SUBROUTINE PART_BLOCK + END INTERFACE ! Local variables + INTERFACE + ! .....user passed subroutine..... + SUBROUTINE PART_BLK2(GLOBAL_INDX,N,NP,PV,NV) + IMPLICIT NONE + INTEGER, INTENT(IN) :: GLOBAL_INDX, N, NP + INTEGER, INTENT(OUT) :: NV + INTEGER, INTENT(OUT) :: PV(*) + END SUBROUTINE PART_BLK2 + END INTERFACE ! Local variables + + + INTEGER, PARAMETER :: IZERO=0, IONE=1 + CHARACTER, PARAMETER :: ORDER='R' + REAL(KIND(1.D0)), POINTER, SAVE :: B_COL(:), X_COL(:), R_COL(:), & + & B_COL_GLOB(:), X_COL_GLOB(:), R_COL_GLOB(:), B_GLOB(:,:), & + &Z(:), Q(:),Z1(:) + INTEGER :: IARGC, CHECK_DESCR, CONVERT_DESCR + Real(Kind(1.d0)), Parameter :: Dzero = 0.d0, One = 1.d0 + Real(Kind(1.d0)) :: MPI_WTIME, T1, T2, TPREC, R_AMAX, B_AMAX,bb(1,1),& + &lambda,scale,resmx,resmxp + integer :: nrhs, nrow, nx1, nx2, n_row, dim,iread + logical :: amroot + External IARGC, MPI_WTIME + integer bsze,overlap + common/part/bsze,overlap + INTEGER, POINTER :: WORK(:) + ! Sparse Matrices + TYPE(D_SPMAT) :: A, AUX_A, H + TYPE(D_PREC) :: PRE +!!$ TYPE(D_PRECN) :: APRC + ! Dense Matrices + REAL(KIND(1.D0)), POINTER :: AUX_B(:,:) , AUX1(:), AUX2(:), VDIAG(:), & + & AUX_G(:,:), AUX_X(:,:), D(:) + + ! Communications data structure + TYPE(desc_type) :: DESC_A, DESC_A_OUT + + ! BLACS parameters + INTEGER :: NPROW, NPCOL, ICTXT, IAM, NP, MYPROW, MYPCOL + + ! Solver paramters + INTEGER :: ITER, ITMAX, IERR, ITRACE, IRCODE, IPART,& + & METHD, ISTOPC, ML, iprec, novr + integer, pointer :: ierrv(:) + character(len=5) :: afmt + REAL(KIND(1.D0)) :: ERR, EPS + integer iparm(20) + real(kind(1.d0)) rparm(20) + + ! Other variables + INTEGER :: I,INFO,J + INTEGER :: INTERNAL, M,II,NNZERO + + ! common area + INTEGER M_PROBLEM, NPROC + + + allocate(ierrv(6)) + ! Initialize BLACS + CALL BLACS_PINFO(IAM, NP) + CALL BLACS_GET(IZERO, IZERO, ICTXT) + + ! Rectangular Grid, Np x 1 + + CALL BLACS_GRIDINIT(ICTXT, ORDER, NP, IONE) + CALL BLACS_GRIDINFO(ICTXT, NPROW, NPCOL, MYPROW, MYPCOL) + AMROOT = (MYPROW==0).AND.(MYPCOL==0) + + ! + ! Get parameters + ! + CALL GET_PARMS(ICTXT,MTRX_FILE,RHS_FILE,CMETHD,PREC,& + & IPART,AFMT,ISTOPC,ITMAX,ITRACE,novr,iprec,EPS) + + CALL BLACS_BARRIER(ICTXT,'A') + T1 = MPI_WTIME() + ! Read the input matrix to be processed and (possibly) the RHS + NRHS = 1 + NPROC = NPROW + + IF (AMROOT) THEN + NULLIFY(AUX_B) + CALL READMAT(MTRX_FILE, AUX_A, ICTXT) + + M_PROBLEM = AUX_A%M + CALL IGEBS2D(ICTXT,'A',' ',1,1,M_PROBLEM,1) + + IF(RHS_FILE /= 'NONE') THEN + ! Reading an RHS + CALL READ_RHS(RHS_FILE,AUX_B,ICTXT) + END IF + + IF (ASSOCIATED(AUX_B).and.SIZE(AUX_B,1)==M_PROBLEM) THEN + ! If any RHS were present, broadcast the first one + write(0,*) 'Ok, got an RHS ',aux_b(m_problem,1) + B_COL_GLOB =>AUX_B(:,1) + ELSE + write(0,*) 'Inventing an RHS ' + ALLOCATE(AUX_B(M_PROBLEM,1), STAT=IRCODE) + IF (IRCODE /= 0) THEN + WRITE(0,*) 'Memory allocation failure in DF_SAMPLE' + CALL BLACS_ABORT(ICTXT,-1) + STOP + ENDIF + B_COL_GLOB =>AUX_B(:,1) + DO I=1, M_PROBLEM +!!$ B_COL_GLOB(I) = REAL(I)*2.0/REAL(M_PROBLEM) + B_COL_GLOB(I) = 1.D0 + ENDDO + ENDIF + CALL DGEBS2D(ICTXT,'A',' ',M_PROBLEM,1,B_COL_GLOB,M_PROBLEM) + ELSE + CALL IGEBR2D(ICTXT,'A',' ',1,1,M_PROBLEM,1,0,0) + WRITE(0,*) 'Receiving AUX_B' + ALLOCATE(AUX_B(M_PROBLEM,1), STAT=IRCODE) + IF (IRCODE /= 0) THEN + WRITE(0,*) 'Memory allocation failure in DF_SAMPLE' + CALL BLACS_ABORT(ICTXT,-1) + STOP + ENDIF + B_COL_GLOB =>AUX_B(:,1) + CALL DGEBR2D(ICTXT,'A',' ',M_PROBLEM,1,B_COL_GLOB,M_PROBLEM,0,0) + END IF + + ! Switch over different partition types + IF (IPART.EQ.0) THEN + CALL BLACS_BARRIER(ICTXT,'A') + WRITE(6,*) 'Partition type: BLOCK' + CALL MATDIST(AUX_A, A, PART_BLOCK, ICTXT, & + & DESC_A,B_COL_GLOB,B_COL,fmt=afmt) + ELSE IF (IPART.EQ.1) THEN + CALL BLACS_BARRIER(ICTXT,'A') + WRITE(6,*) 'Partition type: BLK2' + CALL MATDIST(AUX_A, A, PART_BLK2, ICTXT, & + & DESC_A,B_COL_GLOB,B_COL,fmt=afmt) + ELSE IF (IPART.EQ.2) THEN + WRITE(0,*) 'Partition type: GRAPH' + IF (AMROOT) THEN +!!$ WRITE(0,*) 'Call BUILD',size(aux_a%ia1),size(aux_a%ia2),np + WRITE(0,*) 'Build type: GRAPH ',aux_a%fida,& + &aux_a%m + CALL BUILD_GRPPART(AUX_A%M,AUX_A%FIDA,AUX_A%IA1,AUX_A%IA2,NP) + ENDIF + CALL BLACS_BARRIER(ICTXT,'A') +!!$ WRITE(0,*) myprow,'Done BUILD_GRPPART' + CALL DISTR_GRPPART(0,0,ICTXT) +!!$ WRITE(0,*) myprow,'Done DISTR_GRPPART' + CALL MATDIST(AUX_A, A, PART_GRAPH, ICTXT, & + & DESC_A,B_COL_GLOB,B_COL) + ELSE + WRITE(6,*) 'Partition type: BLOCK' + CALL MATDIST(AUX_A, A, PART_BLOCK, ICTXT, & + & DESC_A,B_COL_GLOB,B_COL,fmt=afmt) + END IF + + CALL F90_PSDSALL(M_PROBLEM,X_COL,IERRV,DESC_A) + X_COL(:) =0.0 + CALL F90_PSDSASB(X_COL,IERRV,DESC_A) + CALL F90_PSDSALL(M_PROBLEM,R_COL,IERRV,DESC_A) + R_COL(:) =0.0 + CALL F90_PSDSASB(R_COL,IERRV,DESC_A) + T2 = MPI_WTIME() - T1 + + + CALL DGAMX2D(ICTXT, 'A', ' ', IONE, IONE, T2, IONE,& + & T1, T1, -1, -1, -1) + + IF (AMROOT) THEN + WRITE(6,*) 'Time to Read and Partition Matrix : ',T2 + END IF + + ! + ! Prepare the preconditioning matrix. Note the availability + ! of optional parameters + ! + + IF (AMROOT) WRITE(6,*) 'Preconditioner : "',PREC(1:6),'" ',iprec + + ! Zero initial guess. + select case(iprec) + case(noprec_) + call psb_precset(pre,'noprec') + case(diagsc_) + call psb_precset(pre,'diagsc') + case(ilu_) + call psb_precset(pre,'ilu') + case(asm_) + call psb_precset(pre,'asm',iv=(/novr,halo_,sum_/)) + case(ash_) + call psb_precset(pre,'asm',iv=(/novr,nohalo_,sum_/)) + case(ras_) + call psb_precset(pre,'asm',iv=(/novr,halo_,none_/)) + case(rash_) + call psb_precset(pre,'asm',iv=(/novr,nohalo_,none_/)) + end select + + + T1 = MPI_WTIME() + + CALL psb_precbld(A,PRE,DESC_A,INFO)!,'F') + TPREC = MPI_WTIME()-T1 + + + CALL DGAMX2D(ICTXT,'A',' ',IONE, IONE,TPREC,IONE,T1,T1,-1,-1,-1) + + WRITE(0,*) 'Preconditioner Time :',TPREC,' ',& + &prec,pre%prec + IF (INFO /= 0) THEN + WRITE(0,*) 'Error in preconditioner :',INFO + CALL BLACS_ABORT(ICTXT,-1) + STOP + END IF + + IPARM = 0 + RPARM = 0.D0 + CALL BLACS_BARRIER(ICTXT,'All') + T1 = MPI_WTIME() + IF (CMETHD.EQ.'BICGSTAB') Then + CALL F90_BICGSTAB(A,PRE,B_COL,X_COL,EPS,DESC_A,& + & ITMAX,ITER,ERR,IERR,ITRACE,istop=istopc) +!!$ ELSE IF (CMETHD.EQ.'BICG') Then +!!$ CALL F90_BICG(A,IPREC,L,U,VDIAG,B_COL,X_COL,EPS,DESC_A,& +!!$ & ITMAX,ITER,ERR,IERR,ITRACE) +!!$ ELSE IF (CMETHD.EQ.'CGS') Then +!!$ CALL F90_CGS(A,IPREC,L,U,VDIAG,B_COL,X_COL,EPS,DESC_A,& +!!$ & ITMAX,ITER,ERR,IERR,ITRACE) +!!$ ELSE IF (CMETHD.EQ.'BICGSTABL') Then +!!$ CALL F90_BICGSTABL(A,IPREC,L,U,VDIAG,B_COL,X_COL,EPS,DESC_A,& +!!$ & ITMAX,ITER,ERR,IERR,ITRACE,ML) + ENDIF + CALL BLACS_BARRIER(ICTXT,'All') + T2 = MPI_WTIME() - T1 + CALL DGAMX2D(ICTXT,'A',' ',IONE, IONE,T2,IONE,T1,T1,-1,-1,-1) + call f90_psaxpby(1.d0,b_col,0.d0,r_col,desc_A) + call f90_psspmm(-1.d0,a,x_col,1.d0,r_col,desc_a) + call f90_nrm2(resmx,r_col,desc_a) +!!$ where (b_col /= 0.d0) +!!$ r_col = r_col/b_col +!!$ end where + call f90_amax(resmxp,r_col,desc_a) + +!!$ ITER=IPARM(5) +!!$ ERR = RPARM(2) + if (amroot) then + call prec_descr(6,pre) + call csprt(60+myprow,a) +!!$ write(6,*) 'Number of iterations : ',iter +!!$ WRITE(6,*) 'Error on exit : ',ERR + write(6,*) 'Matrix: ',mtrx_file + write(6,*) 'Computed solution on ',NPROW,' processors.' + write(6,*) 'Iterations to convergence: ',iter + write(6,*) 'Error indicator on exit:',err + write(6,*) 'Time to Buil Prec. : ',TPREC + write(6,*) 'Time to Solve Matrix : ',T2 + WRITE(6,*) 'Time per iteration : ',T2/(ITER) + write(6,*) 'Total Time : ',T2+TPREC + write(6,*) 'Residual norm 2 = ',resmx + write(6,*) 'Residual norm inf = ',resmxp + END IF + + allocate(x_col_glob(m_problem),r_col_glob(m_problem),stat=ierr) + if (ierr.ne.0) then + write(0,*) 'Allocation error: no data collection' + else + call f90_psdgatherm(x_col_glob,x_col,desc_a,iroot=0) + call f90_psdgatherm(r_col_glob,r_col,desc_a,iroot=0) + if (amroot) then + write(0,*) 'Saving X on file' + write(20,*) 'Matrix: ',mtrx_file + write(20,*) 'Computed solution on ',NPROW,' processors.' + write(20,*) 'Iterations to convergence: ',iter + write(20,*) 'Error indicator (infinity norm) on exit:', & + & ' ||r||/(||A||||x||+||b||) = ',err + write(20,*) 'Max residual = ',resmx, resmxp + do i=1,m_problem + write(20,998) i,x_col_glob(i),r_col_glob(i),b_col_glob(i) + enddo + end if + end if +998 format(i8,4(2x,g20.14)) +993 format(i6,4(1x,e12.6)) + + + CALL F90_PSDSFREE(B_COL, DESC_A) + CALL F90_PSDSFREE(X_COL, DESC_A) + CALL F90_PSSPFREE(A, DESC_A) + CALL psb_precfree(PRE,info) + CALL F90_PSDSCFREE(DESC_A,info) + CALL BLACS_GRIDEXIT(ICTXT) + CALL BLACS_EXIT(0) + +END PROGRAM DF_SAMPLE + + + + + diff --git a/test/Fileread/df_samplelog.f90 b/test/Fileread/df_samplelog.f90 new file mode 100644 index 00000000..d37e7fc0 --- /dev/null +++ b/test/Fileread/df_samplelog.f90 @@ -0,0 +1,350 @@ +PROGRAM DF_SAMPLE + USE F90SPARSE + USE MAT_DIST + USE READ_MAT + USE PARTGRAPH + USE GETP + use mpi + IMPLICIT NONE + + ! Input parameters + CHARACTER*20 :: CMETHD, PREC, MTRX_FILE, RHS_FILE + CHARACTER*80 :: CHARBUF + + DOUBLE PRECISION DDOT + EXTERNAL DDOT + INTERFACE + ! .....user passed subroutine..... + SUBROUTINE PART_BLOCK(GLOBAL_INDX,N,NP,PV,NV) + IMPLICIT NONE + INTEGER, INTENT(IN) :: GLOBAL_INDX, N, NP + INTEGER, INTENT(OUT) :: NV + INTEGER, INTENT(OUT) :: PV(*) + END SUBROUTINE PART_BLOCK + END INTERFACE ! Local variables + + + INTEGER, PARAMETER :: IZERO=0, IONE=1 + CHARACTER, PARAMETER :: ORDER='R' + REAL(KIND(1.D0)), POINTER, SAVE :: B_COL(:), X_COL(:), R_COL(:), & + & B_COL_GLOB(:), X_COL_GLOB(:), R_COL_GLOB(:), B_GLOB(:,:), & + &Z(:), Q(:),Z1(:) + INTEGER :: IARGC, CHECK_DESCR, CONVERT_DESCR + Real(Kind(1.d0)), Parameter :: Dzero = 0.d0, One = 1.d0 + Real(Kind(1.d0)) :: T1, T2, TPREC, R_AMAX, B_AMAX,bb(1,1),& + &lambda,scale,resmx,resmxp + integer :: nrhs, nrow, nx1, nx2, n_row, dim,iread + logical :: amroot + integer mpe_log_get_event_number,mpe_Describe_state,mpe_log_event + + External IARGC + integer bsze,overlap + common/part/bsze,overlap + INTEGER, POINTER :: WORK(:) + ! Sparse Matrices + TYPE(D_SPMAT) :: A, AUX_A, H + TYPE(D_PREC) :: PRE +!!$ TYPE(D_PRECN) :: APRC + ! Dense Matrices + REAL(KIND(1.D0)), POINTER :: AUX_B(:,:) , AUX1(:), AUX2(:), VDIAG(:), & + & AUX_G(:,:), AUX_X(:,:), D(:) + + ! Communications data structure + TYPE(desc_type) :: DESC_A, DESC_A_OUT + + ! BLACS parameters + INTEGER :: NPROW, NPCOL, ICTXT, IAM, NP, MYPROW, MYPCOL + + ! Solver paramters + INTEGER :: ITER, ITMAX, IERR, ITRACE, IRCODE, IPART,& + & METHD, ISTOPC, ML + integer, pointer :: ierrv(:) + REAL(KIND(1.D0)) :: ERR, EPS + integer iparm(20) + real(kind(1.d0)) rparm(20) + + ! Other variables + INTEGER :: I,INFO,J, iprecb,iprece,islvb,islve + INTEGER :: INTERNAL, M,II,NNZERO + + ! common area + INTEGER M_PROBLEM, NPROC + + + allocate(ierrv(6)) + ! Initialize BLACS + CALL BLACS_PINFO(IAM, NP) + CALL BLACS_GET(IZERO, IZERO, ICTXT) + iprecb = mpe_log_get_event_number() + iprece = mpe_log_get_event_number() + islvb = mpe_log_get_event_number() + islve = mpe_log_get_event_number() + if (iam==0) then + info = mpe_describe_state(iprecb,iprece,"Preconditioner","OrangeRed") + info = mpe_describe_state(islvb,islve,"Solver","DarkGreen") + endif + + ! Rectangular Grid, Np x 1 + + CALL BLACS_GRIDINIT(ICTXT, ORDER, NP, IONE) + CALL BLACS_GRIDINFO(ICTXT, NPROW, NPCOL, MYPROW, MYPCOL) + AMROOT = (MYPROW==0).AND.(MYPCOL==0) + + ! + ! Get parameters + ! + CALL GET_PARMS(ICTXT,MTRX_FILE,RHS_FILE,CMETHD,PREC,& + & IPART,ISTOPC,ITMAX,ITRACE,ML,PRE%PREC,EPS) + + CALL BLACS_BARRIER(ICTXT,'A') + T1 = MPI_WTIME() + ! Read the input matrix to be processed and (possibly) the RHS + NRHS = 1 + NPROC = NPROW + + IF (AMROOT) THEN + NULLIFY(AUX_B) + CALL READMAT(MTRX_FILE, AUX_A, ICTXT) + + M_PROBLEM = AUX_A%M + CALL IGEBS2D(ICTXT,'A',' ',1,1,M_PROBLEM,1) + + IF(RHS_FILE /= 'NONE') THEN + ! Reading an RHS + CALL READ_RHS(RHS_FILE,AUX_B,ICTXT) + END IF + + IF (ASSOCIATED(AUX_B).and.SIZE(AUX_B,1)==M_PROBLEM) THEN + ! If any RHS were present, broadcast the first one + write(0,*) 'Ok, got an RHS ',aux_b(m_problem,1) + B_COL_GLOB =>AUX_B(:,1) + ELSE + write(0,*) 'Inventing an RHS ' + ALLOCATE(AUX_B(M_PROBLEM,1), STAT=IRCODE) + IF (IRCODE /= 0) THEN + WRITE(0,*) 'Memory allocation failure in DF_SAMPLE' + CALL BLACS_ABORT(ICTXT,-1) + STOP + ENDIF + B_COL_GLOB =>AUX_B(:,1) + DO I=1, M_PROBLEM + B_COL_GLOB(I) = REAL(I)*2.0/REAL(M_PROBLEM) + ENDDO + ENDIF + CALL DGEBS2D(ICTXT,'A',' ',M_PROBLEM,1,B_COL_GLOB,M_PROBLEM) + ELSE + CALL IGEBR2D(ICTXT,'A',' ',1,1,M_PROBLEM,1,0,0) + WRITE(0,*) 'Receiving AUX_B' + ALLOCATE(AUX_B(M_PROBLEM,1), STAT=IRCODE) + IF (IRCODE /= 0) THEN + WRITE(0,*) 'Memory allocation failure in DF_SAMPLE' + CALL BLACS_ABORT(ICTXT,-1) + STOP + ENDIF + B_COL_GLOB =>AUX_B(:,1) + CALL DGEBR2D(ICTXT,'A',' ',M_PROBLEM,1,B_COL_GLOB,M_PROBLEM,0,0) + END IF + + ! Switch over different partition types + IF (IPART.EQ.0) THEN + CALL BLACS_BARRIER(ICTXT,'A') + WRITE(6,*) 'Partition type: BLOCK' + CALL MATDIST(AUX_A, A, PART_BLOCK, ICTXT, & + & DESC_A,B_COL_GLOB,B_COL) + ELSE IF (IPART.EQ.2) THEN + WRITE(0,*) 'Partition type: GRAPH' + IF (AMROOT) THEN +!!$ WRITE(0,*) 'Call BUILD',size(aux_a%ia1),size(aux_a%ia2),np + CALL BUILD_GRPPART(AUX_A%M,AUX_A%FIDA,AUX_A%IA1,AUX_A%IA2,NP) + ENDIF + CALL BLACS_BARRIER(ICTXT,'A') +!!$ WRITE(0,*) myprow,'Done BUILD_GRPPART' + CALL DISTR_GRPPART(0,0,ICTXT) +!!$ WRITE(0,*) myprow,'Done DISTR_GRPPART' + CALL MATDIST(AUX_A, A, PART_GRAPH, ICTXT, & + & DESC_A,B_COL_GLOB,B_COL) + ELSE + WRITE(6,*) 'Partition type: BLOCK' + CALL MATDIST(AUX_A, A, PART_BLOCK, ICTXT, & + & DESC_A,B_COL_GLOB,B_COL) + END IF + + CALL F90_PSDSALL(M_PROBLEM,X_COL,IERRV,DESC_A) + X_COL(:) =0.0 + CALL F90_PSDSASB(X_COL,IERRV,DESC_A) + CALL F90_PSDSALL(M_PROBLEM,R_COL,IERRV,DESC_A) + R_COL(:) =0.0 + CALL F90_PSDSASB(R_COL,IERRV,DESC_A) + T2 = MPI_WTIME() - T1 + + + DIM=SIZE(A%ASPK) + +!!$ ALLOCATE(H%ASPK(DIM),H%IA1(DIM),H%IA2(DIM),H%PL(SIZE(A%PL)),& +!!$ & H%PL(SIZE(A%PL)),D(SIZE(A%PL)),& +!!$ & DESC_A_OUT%MATRIX_DATA(SIZE(DESC_A%MATRIX_DATA)),& +!!$ & DESC_A_OUT%HALO_INDEX(SIZE(DESC_A%HALO_INDEX)),& +!!$ & DESC_A_OUT%OVRLAP_INDEX(SIZE(DESC_A%OVRLAP_INDEX)),& +!!$ & DESC_A_OUT%OVRLAP_ELEM(SIZE(DESC_A%OVRLAP_ELEM)),& +!!$ & DESC_A_OUT%LOC_TO_GLOB(SIZE(DESC_A%LOC_TO_GLOB)),& +!!$ & DESC_A_OUT%GLOB_TO_LOC(SIZE(DESC_A%GLOB_TO_LOC)), WORK(dim)) +!!$ check_descr=15 +! work(5)=9 +!!$ WRITE(0,*)'CALLING VERIFY' +!!$ CALL F90_PSVERIFY(D,A,DESC_A,CHECK_DESCR,CONVERT_DESCR,H,& +!!$ & DESC_A_OUT,WORK) +!!$ WRITE(0,*)'VERIFY DONE',CONVERT_DESCR + +! deallocate(work) + + + CALL DGAMX2D(ICTXT, 'A', ' ', IONE, IONE, T2, IONE,& + & T1, T1, -1, -1, -1) + + IF (AMROOT) THEN + WRITE(6,*) 'Time to Read and Partition Matrix : ',T2 + END IF + + ! + ! Prepare the preconditioning matrix. Note the availability + ! of optional parameters + ! + + IF (AMROOT) WRITE(6,*) 'Preconditioner : "',PREC(1:6),'" ',PRE%PREC + + +!!$ do i=1,a%m +!!$ do j=a%ia2(i),a%ia2(i+1)-1 +!!$ write(0,*)'a ',i,a%ia1(j),a%aspk(j) +!!$ end do +!!$ end do +!!$ +!!$ write(0,*)'halo_index',desc_a%halo_index(:) +!!$ write(0,*)'ovrlap_index',desc_a%ovrlap_index(:) +!!$ write(0,*)'ovrlap_elem',desc_a%ovrlap_elem(:) + + info = MPE_Log_event( iprecb, 0, "start Precond" ) + T1 = MPI_WTIME() + + CALL PRECONDITIONER(A,PRE,DESC_A,INFO)!,'F') + TPREC = MPI_WTIME()-T1 + info = MPE_Log_event( iprece, 0, "end Precond" ) + + + CALL DGAMX2D(ICTXT,'A',' ',IONE, IONE,TPREC,IONE,T1,T1,-1,-1,-1) + + WRITE(0,*) 'Preconditioner Time :',TPREC,' ',& + &prec,pre%prec + IF (INFO /= 0) THEN + WRITE(0,*) 'Error in preconditioner :',INFO + CALL BLACS_ABORT(ICTXT,-1) + STOP + END IF + + IPARM = 0 + RPARM = 0.D0 + CALL BLACS_BARRIER(ICTXT,'All') + info = MPE_Log_event( islvb, 0, "start Solver" ) + T1 = MPI_WTIME() + IF (CMETHD.EQ.'BICGSTAB') Then + CALL F90_BICGSTAB(A,PRE,B_COL,X_COL,EPS,DESC_A,& + & ITMAX,ITER,ERR,IERR,ITRACE) +!!$ ELSE IF (CMETHD.EQ.'BICG') Then +!!$ CALL F90_BICG(A,IPREC,L,U,VDIAG,B_COL,X_COL,EPS,DESC_A,& +!!$ & ITMAX,ITER,ERR,IERR,ITRACE) +!!$ ELSE IF (CMETHD.EQ.'CGS') Then +!!$ CALL F90_CGS(A,IPREC,L,U,VDIAG,B_COL,X_COL,EPS,DESC_A,& +!!$ & ITMAX,ITER,ERR,IERR,ITRACE) +!!$ ELSE IF (CMETHD.EQ.'BICGSTABL') Then +!!$ CALL F90_BICGSTABL(A,IPREC,L,U,VDIAG,B_COL,X_COL,EPS,DESC_A,& +!!$ & ITMAX,ITER,ERR,IERR,ITRACE,ML) + ENDIF + T2 = MPI_WTIME() - T1 + info = MPE_Log_event( islve, 0, "end Solver" ) + CALL DGAMX2D(ICTXT,'A',' ',IONE, IONE,T2,IONE,T1,T1,-1,-1,-1) + call f90_psaxpby(1.d0,b_col,0.d0,r_col,desc_A) + call f90_psspmm(-1.d0,a,x_col,1.d0,r_col,desc_a) + call f90_amax(resmx,r_col,desc_a) + where (b_col /= 0.d0) + r_col = r_col/b_col + end where + call f90_amax(resmxp,r_col,desc_a) + +!!$ ITER=IPARM(5) +!!$ ERR = RPARM(2) + if (amroot) then + write(6,*) 'methd iprec istopc : ',pre%prec, istopc + write(6,*) 'Number of iterations : ',iter + write(6,*) 'Time to Solve Matrix : ',T2 + WRITE(6,*) 'Time per iteration : ',T2/(ITER) + WRITE(6,*) 'Error on exit : ',ERR + END IF + + allocate(x_col_glob(m_problem),r_col_glob(m_problem),stat=ierr) + if (ierr.ne.0) then + write(0,*) 'Allocation error: no data collection' + else + call f90_psdgatherm(x_col_glob,x_col,desc_a,iroot=0) + call f90_psdgatherm(r_col_glob,r_col,desc_a,iroot=0) + if (amroot) then + write(0,*) 'Saving X on file' + write(20,*) 'Matrix: ',mtrx_file + write(20,*) 'Computed solution on ',NPROW,' processors.' + write(20,*) 'Iterations to convergence: ',iter + write(20,*) 'Error indicator (infinity norm) on exit:', & + & ' ||r||/(||A||||x||+||b||) = ',err + write(20,*) 'Max residual = ',resmx, resmxp + do i=1,m_problem + write(20,998) i,x_col_glob(i),r_col_glob(i),b_col_glob(i) + enddo + end if + end if +998 format(i8,4(2x,g20.14)) +993 format(i6,4(1x,e12.6)) + + +!!$ ! +!!$ ! Raleygh quotients for first eigenvalue +!!$ ! +!!$ CALL F90_PSDSall(M_problem,Q,ierrv,DESC_A) +!!$ CALL F90_PSDSall(M_problem,Z,ierrv,DESC_A) +!!$ CALL F90_PSDSall(M_problem,Z1,ierrv,DESC_A) +!!$ CALL F90_PSDSasb(Q,ierrv,DESC_A) +!!$ CALL F90_PSDSasb(Z,ierrv,DESC_A) +!!$ CALL F90_PSDSasb(Z1,ierrv,DESC_A) +!!$ scale = f90_psnrm2(x_col,desc_a) +!!$ scale = one/scale +!!$ call f90_psaxpby(scale,x_col,dzero,q,desc_A) +!!$ call f90_psspmm(one,a,q,dzero,z,desc_a) +!!$ do i=1, itmax +!!$ scale = f90_psnrm2(z,desc_a) +!!$ scale = one/scale +!!$ call f90_psaxpby(one,z,dzero,z1,desc_a) +!!$ call f90_psaxpby(scale,z,dzero,q,desc_a) +!!$ call f90_psspmm(one,a,q,dzero,z,desc_a) +!!$ lambda = f90_psdot(q,z,desc_A) +!!$ scale = f90_psnrm2(z,desc_A) +!!$ if (amroot) write(0,*) 'Lambda: ',i,lambda, scale +!!$ enddo +!!$ call f90_psaxpby(-one,z,one,z1,desc_a) +!!$ scale = f90_psnrm2(z1,desc_A) +!!$ if (amroot) write(0,*) 'Final check: ',i,lambda, scale +!!$ do i=1, desc_a%matrix_data(n_row_) +!!$ scale=z(i)/q(i) +!!$ write(0,*) 'Vector check: ',i,lambda, scale,abs(scale-lambda) +!!$ enddo + + CALL F90_PSDSFREE(B_COL, DESC_A) + CALL F90_PSDSFREE(X_COL, DESC_A) + CALL F90_PSSPFREE(A, DESC_A) + CALL F90_PSPRECFREE(PRE,info) + CALL F90_PSDSCFREE(DESC_A,info) + CALL BLACS_GRIDEXIT(ICTXT) + CALL BLACS_EXIT(0) + +END PROGRAM DF_SAMPLE + + + + + diff --git a/test/Fileread/df_samplem.f90 b/test/Fileread/df_samplem.f90 new file mode 100644 index 00000000..3ded29e1 --- /dev/null +++ b/test/Fileread/df_samplem.f90 @@ -0,0 +1,419 @@ +program df_samplem + use f90sparse + use mat_dist + use read_mat + use partgraph + implicit none + + ! input parameters + character*20 :: cmethd, prec, mtrx_file, rhs_file + character*80 :: charbuf + integer :: inparms(20) + double precision ddot + external ddot + interface + ! .....user passed subroutine..... + subroutine part_block(global_indx,n,np,pv,nv) + implicit none + integer, intent(in) :: global_indx, n, np + integer, intent(out) :: nv + integer, intent(out) :: pv(*) + end subroutine part_block + end interface ! local variables + interface + ! .....user passed subroutine..... + subroutine part_blk2(global_indx,n,np,pv,nv) + implicit none + integer, intent(in) :: global_indx, n, np + integer, intent(out) :: nv + integer, intent(out) :: pv(*) + end subroutine part_blk2 + end interface ! Local variables + + + integer, parameter :: izero=0, ione=1 + character, parameter :: order='r' + real(kind(1.d0)), pointer, save :: b_col(:), x_col(:), r_col(:), & + & b_col_glob(:), x_col_glob(:), r_col_glob(:), b_glob(:,:), & + &z(:), q(:),z1(:), xm(:,:), ym(:,:) + integer :: iargc, check_descr, convert_descr + real(kind(1.d0)), parameter :: dzero = 0.d0, one = 1.d0 + real(kind(1.d0)) :: mpi_wtime, t1, t2, tprec, r_amax, b_amax,bb(1,1),& + &lambda,scale,resmx,resmxp + integer :: nrhs, nrow, nx1, nx2, n_row, dim,iread,ip,io,no,nmats,imat,irenum + logical :: amroot + external iargc, mpi_wtime + integer bsze,overlap, nn, nprecs, novrs + common/part/bsze,overlap + integer, pointer :: work(:), precs(:), ovrs(:) + ! sparse matrices + type(d_spmat) :: a, aux_a, h + type(d_prec) :: pre +!!$ type(d_precn) :: aprc + ! dense matrices + real(kind(1.d0)), pointer :: aux_b(:,:) , aux1(:), aux2(:), vdiag(:), & + & aux_g(:,:), aux_x(:,:), d(:) + + ! communications data structure + type(desc_type) :: desc_a, desc_a_out + + ! blacs parameters + integer :: nprow, npcol, ictxt, iam, np, myprow, mypcol + + ! solver paramters + integer :: iter, itmax, ierr, itrace, ircode, ipart,& + & methd, istopc, ml + integer, pointer :: ierrv(:) + character(len=5) :: afmt + real(kind(1.d0)) :: err, eps + integer iparm(20) + real(kind(1.d0)) rparm(20) + + ! other variables + integer :: i,info,j + integer :: internal, m,ii,nnzero + + ! common area + integer m_problem, nproc + + + allocate(ierrv(6)) + ! initialize blacs + call blacs_pinfo(iam, np) + call blacs_get(izero, izero, ictxt) + + ! rectangular Grid, Np x 1 + + call blacs_gridinit(ictxt, order, np, ione) + call blacs_gridinfo(ictxt, nprow, npcol, myprow, mypcol) + amroot = (myprow==0).and.(mypcol==0) + + ! + ! Get parameters from file + ! + if (amroot) then + read(*,*) cmethd + do i = 1, len(cmethd) + inparms(i) = iachar(cmethd(i:i)) + end do + call igebs2d(ictxt,'all',' ',20,1,inparms,20) + + read(*,*) afmt + + do i = 1, len(afmt) + inparms(i) = iachar(afmt(i:i)) + end do + call igebs2d(ictxt,'all',' ',20,1,inparms,20) + read(*,*) ipart + read(*,*) itmax + read(*,*) itrace + read(*,*) istopc + read(*,*) irenum + read(*,*) nprecs + inparms(1) = ipart + inparms(2) = itmax + inparms(3) = itrace + inparms(4) = irenum + inparms(5) = nprecs + inparms(6) = istopc + call igebs2d(ictxt,'all',' ',6,1,inparms,20) +!!$ write(0,*) 'Sent nprecs: ',nprecs + allocate(precs(nprecs)) + read(*,*) (precs(i),i=1,nprecs) + call igebs2d(ictxt,'all',' ',nprecs,1,precs,nprecs) + read(*,*) novrs + call igebs2d(ictxt,'all',' ',1,1,novrs,1) +!!$ write(0,*) 'Sent novrs: ',novrs + allocate(ovrs(novrs)) + read(*,*) (ovrs(i),i=1,novrs) + call igebs2d(ictxt,'all',' ',novrs,1,ovrs,novrs) + read(*,*) eps + call dgebs2d(ictxt,'all',' ',1,1,eps,1) + read(*,*) nmats + call igebs2d(ictxt,'all',' ',1,1,nmats,1) + + + else + call igebr2d(ictxt,'a',' ',20,1,inparms,20,0,0) + do i = 1, len(cmethd) + cmethd(i:i) = achar(inparms(i)) + end do + call igebr2d(ictxt,'a',' ',20,1,inparms,20,0,0) + do i = 1, len(afmt) + afmt(i:i) = achar(inparms(i)) + end do + + call igebr2d(ictxt,'all',' ',6,1,inparms,20,0,0) + ipart = inparms(1) + itmax = inparms(2) + itrace = inparms(3) + irenum = inparms(4) + nprecs = inparms(5) + istopc = inparms(6) +!!$ write(0,*) 'Recvd nprecs: ',nprecs + allocate(precs(nprecs)) + call igebr2d(ictxt,'all',' ',nprecs,1,precs,nprecs,0,0) + call igebr2d(ictxt,'all',' ',1,1,novrs,1,0,0) +!!$ write(0,*) 'Recvd novrs: ',novrs + allocate(ovrs(novrs)) + call igebr2d(ictxt,'all',' ',novrs,1,ovrs,novrs,0,0) + call dgebr2d(ictxt,'all',' ',1,1,eps,1,0,0) + call igebr2d(ictxt,'all',' ',1,1,nmats,1,0,0) + endif + + + do imat=1,nmats + + if (amroot) then + read(*,*) mtrx_file, rhs_file +!!$ write(0,*) 'Read mtx rhs : "',mtrx_file,'" "',rhs_file,'"' +!!$ do i = 1, len(mtrx_file) +!!$ inparms(i) = iachar(mtrx_file(i:i)) +!!$ end do +!!$ ! broadcast parameters to all processors +!!$ call igebs2d(ictxt,'all',' ',20,1,inparms,20) +!!$ do i = 1, len(rhs_file) +!!$ inparms(i) = iachar(rhs_file(i:i)) +!!$ end do +!!$ ! broadcast parameters to all processors +!!$ call igebs2d(ictxt,'all',' ',20,1,inparms,20) +!!$ write(0,*) 'Sent mtx rhs : "',mtrx_file,'" "',rhs_file,'"' +!!$ else +!!$ call igebr2d(ictxt,'a',' ',20,1,inparms,20,0,0) +!!$ do i = 1, len(mtrx_file) +!!$ mtrx_file(i:i) = achar(inparms(i)) +!!$ end do +!!$ call igebr2d(ictxt,'a',' ',20,1,inparms,20,0,0) +!!$ do i = 1, len(rhs_file) +!!$ rhs_file(i:i) = achar(inparms(i)) +!!$ end do +!!$ write(0,*) 'Recvd mtx rhs : "',mtrx_file,'" "',rhs_file,'"' + endif + + call blacs_barrier(ictxt,'A') + t1 = mpi_wtime() + ! read the input matrix to be processed and (possibly) the RHS + nrhs = 1 + nproc = nprow + + if (amroot) then + nullify(aux_b) + call readmat(mtrx_file, aux_a, ictxt) +!!$ write(0,*) 'from readmat: ',aux_a%fida,aux_a%m,':',& +!!$ &aux_a%ia2(aux_a%m+1)-1,':',aux_a%ia1(1:10) + m_problem = aux_a%m + call igebs2d(ictxt,'a',' ',1,1,m_problem,1) + + if (rhs_file /= 'none') then + ! reading an rhs + call read_rhs(rhs_file,aux_b,ictxt) + end if + + if (associated(aux_b).and.size(aux_b,1)==m_problem) then + ! if any rhs were present, broadcast the first one +!!$ write(0,*) 'ok, got an rhs ',aux_b(m_problem,1) + b_col_glob =>aux_b(:,1) + else +!!$ write(0,*) 'inventing an rhs ' + allocate(aux_b(m_problem,1), stat=ircode) + if (ircode /= 0) then + write(0,*) 'memory allocation failure in df_sample' + call blacs_abort(ictxt,-1) + stop + endif + b_col_glob =>aux_b(:,1) + do i=1, m_problem + b_col_glob(i) = 1.d0 + enddo + endif + call dgebs2d(ictxt,'a',' ',m_problem,1,b_col_glob,m_problem) + else + call igebr2d(ictxt,'a',' ',1,1,m_problem,1,0,0) +!!$ write(0,*) 'Receiving AUX_B' + allocate(aux_b(m_problem,1), stat=ircode) + if (ircode /= 0) then + write(0,*) 'Memory allocation failure in DF_SAMPLE' + call blacs_abort(ictxt,-1) + stop + endif + b_col_glob =>aux_b(:,1) + call dgebr2d(ictxt,'A',' ',m_problem,1,b_col_glob,m_problem,0,0) + end if + + ! switch over different partition types + if (ipart.eq.0) then + call blacs_barrier(ictxt,'A') +!!$ write(0,*) 'Partition type: BLOCK' + call matdist(aux_a, a, part_block, ictxt, & + & desc_a,b_col_glob,b_col,fmt=afmt) + else if (ipart.eq.1) then + call blacs_barrier(ictxt,'A') +!!$ write(0,*) 'partition type: BLK2' + call matdist(aux_a, a, part_blk2, ictxt, & + & desc_a,b_col_glob,b_col,fmt=afmt) + else if (ipart.eq.2) then +!!$ write(0,*) 'partition type: GRAPH' + if (amroot) then +!!$ write(0,*) 'Call BUILD',size(aux_a%ia1),size(aux_a%ia2),np +!!$ write(0,*) 'Build type: GRAPH ',aux_a%fida,& +!!$ &aux_a%m + call build_grppart(aux_a%m,aux_a%fida,aux_a%ia1,aux_a%ia2,np) + endif + + call distr_grppart(0,0,ictxt) + + call matdist(aux_a, a, part_graph, ictxt, & + & desc_a,b_col_glob,b_col,fmt=afmt) + else +!!$ write(6,*) 'Partition type: BLOCK' + call matdist(aux_a, a, part_block, ictxt, & + & desc_a,b_col_glob,b_col,fmt=afmt) + end if + + call f90_psdsall(m_problem,x_col,ierrv,desc_a) + x_col(:) =0.0 + call f90_psdsasb(x_col,ierrv,desc_a) + call f90_psdsall(m_problem,r_col,ierrv,desc_a) + r_col(:) =0.0 + call f90_psdsasb(r_col,ierrv,desc_a) + t2 = mpi_wtime() - t1 + + + dim=size(a%aspk) + + + call dgamx2d(ictxt, 'a', ' ', ione, ione, t2, ione,& + & t1, t1, -1, -1, -1) + + if (amroot) then + write(6,*) 'time to Read and Partition Matrix : ',T2 + END IF + + ! + ! Prepare the preconditioning matrix. Note the availability + ! of optional parameters + ! + do ip=1,nprecs + pre%prec=precs(ip) + + + if (pre%prec>2) then + no=novrs + else + no=1 + endif + do io=1, no + if (pre%prec>2) then + pre%n_ovr=ovrs(io) + else + pre%n_ovr=0 + endif + pre%irenum = irenum +!!$ if (amroot) write(0,*) 'Preconditioner : ',& +!!$ &PRE%PREC,pre%n_ovr + +!!$ do i=1,a%m +!!$ do j=a%ia2(i),a%ia2(i+1)-1 +!!$ write(0,*)'a ',i,a%ia1(j),a%aspk(j) +!!$ end do +!!$ end do +!!$ +!!$ write(0,*)'halo_index',desc_a%halo_index(:) +!!$ write(0,*)'ovrlap_index',desc_a%ovrlap_index(:) +!!$ write(0,*)'ovrlap_elem',desc_a%ovrlap_elem(:) + + ! Zero initial guess. + call f90_psaxpby(0.d0,b_col,0.d0,x_col,desc_a) + call blacs_barrier(ictxt,'All') + t1 = mpi_wtime() + call preconditioner(a,pre,desc_a,info)!,'f') + tprec = mpi_wtime()-t1 + + write(0,*) myprow,' Preconditioner Time :',TPREC,' ',& + &pre%prec + + call DGAMX2D(ICTXT,'A',' ',IONE, IONE,TPREC,IONE,T1,T1,-1,-1,-1) + if (amroot) then + write(0,*) 'Preconditioner Time :',TPREC,' ',& + &pre%prec + endif + if (info /= 0) then + write(0,*) 'error in preconditioner :',info + call blacs_abort(ictxt,-1) + stop + end if + if (pre%prec>=ras2lv_) then + write(*,*) myprow, 'Aggregation checks: ',pre%na_f1,pre%nn_f1,pre%na_tot + if (amroot) write(*,*) 'Output local aggregates ',pre%nlaggr(:) + end if + iparm = 0 + rparm = 0.d0 + call blacs_barrier(ictxt,'all') + t1 = mpi_wtime() + if (cmethd.eq.'BICGSTAB') Then + call f90_bicgstab(a,pre,b_col,x_col,eps,desc_a,& + & itmax,iter,err,ierr,itrace,istop=istopc) +!!$ ELSE IF (CMETHD.EQ.'BICG') Then +!!$ CALL F90_BICG(A,IPREC,L,U,VDIAG,B_COL,X_COL,EPS,DESC_A,& +!!$ & ITMAX,ITER,ERR,IERR,ITRACE) +!!$ ELSE IF (CMETHD.EQ.'CGS') Then +!!$ CALL F90_CGS(A,IPREC,L,U,VDIAG,B_COL,X_COL,EPS,DESC_A,& +!!$ & ITMAX,ITER,ERR,IERR,ITRACE) +!!$ ELSE IF (CMETHD.EQ.'BICGSTABL') Then +!!$ CALL F90_BICGSTABL(A,IPREC,L,U,VDIAG,B_COL,X_COL,EPS,DESC_A,& +!!$ & ITMAX,ITER,ERR,IERR,ITRACE,ML) + endif + call blacs_barrier(ictxt,'all') + t2 = mpi_wtime() - t1 + call dgamx2d(ictxt,'a',' ',ione, ione,t2,ione,t1,t1,-1,-1,-1) + call f90_psaxpby(1.d0,b_col,0.d0,r_col,desc_A) + call f90_psspmm(-1.d0,a,x_col,1.d0,r_col,desc_a) + call f90_nrm2(resmx,r_col,desc_a) + call f90_amax(resmxp,r_col,desc_a) + + if (amroot) then + write(6,*) 'methd iprec istopc : ',pre%prec, pre%n_ovr, istopc +!!$ write(6,*) 'Number of iterations : ',iter +!!$ WRITE(6,*) 'Error on exit : ',ERR + write(6,*) 'Matrix: ',mtrx_file + write(6,*) 'Computed solution on ',NPROW,' processors.' + write(6,*) 'Iterations to convergence: ',iter + write(6,*) 'Error indicator on exit:',err + write(6,*) 'Time to Buil Prec. : ',TPREC + write(6,*) 'Time to Solve Matrix : ',T2 + WRITE(6,*) 'Time per iteration : ',T2/(ITER) + write(6,*) 'Total Time : ',T2+TPREC + write(6,*) 'Residual norm 2 = ',resmx + write(6,*) 'Residual norm inf = ',resmxp + write(6,*) + write(6,*) + + write(8,'(a18,3(1x,i2),1x,i5,5(1x,g9.4))') mtrx_file,nprow,pre%prec,pre%n_ovr,& + & iter,tprec,t2,t2+tprec,resmx,resmxp + END IF +!!$ write(0,*) 'Done matrix ',imat,ip,io + call blacs_barrier(ictxt,'all') + call f90_psprecfree(pre,info) +!!$ write(0,*) 'Done precfree' + call blacs_barrier(ictxt,'all') + end do + end do + deallocate(aux_b) + if (amroot) call spfree(aux_a,info) + call f90_psdsfree(b_col, desc_a) + call f90_psdsfree(x_col, desc_a) + call f90_psdsfree(r_col, desc_a) + call f90_psspfree(a, desc_a) + call f90_psdscfree(desc_a,info) + + end do + + call blacs_gridexit(ictxt) + call blacs_exit(0) + +end program df_samplem + + + + + diff --git a/test/Fileread/getp.f90 b/test/Fileread/getp.f90 new file mode 100644 index 00000000..db8a9fdd --- /dev/null +++ b/test/Fileread/getp.f90 @@ -0,0 +1,159 @@ +MODULE GETP + + PUBLIC GET_PARMS + PUBLIC PR_USAGE + +CONTAINS + ! + ! Get iteration parameters from the command line + ! + SUBROUTINE GET_PARMS(ICONTXT,MTRX_FILE,RHS_FILE,CMETHD,PREC,IPART,& + & AFMT,ISTOPC,ITMAX,ITRACE,ML,IPREC,EPS) + integer :: icontxt + Character*20 :: CMETHD, PREC, MTRX_FILE, RHS_FILE + Integer :: IRET, ISTOPC,ITMAX,ITRACE,IPART,IPREC,ML + Character*40 :: CHARBUF + real(kind(1.d0)) :: eps + character :: afmt*5 + INTEGER :: IARGC, NPROW, NPCOL, MYPROW, MYPCOL + EXTERNAL IARGC + INTEGER :: INPARMS(20), IP + + CALL BLACS_GRIDINFO(ICONTXT, NPROW, NPCOL, MYPROW, MYPCOL) + IF (MYPROW==0) THEN + ! Read Input Parameters + READ(*,*) IP + IF (IP.GE.3) THEN + READ(*,*) MTRX_FILE + READ(*,*) RHS_FILE + READ(*,*) CMETHD + READ(*,*) PREC + READ(*,*) AFMT + + ! Convert strings in array + DO I = 1, LEN(MTRX_FILE) + INPARMS(I) = IACHAR(MTRX_FILE(I:I)) + END DO + ! Broadcast parameters to all processors + CALL IGEBS2D(ICONTXT,'ALL',' ',20,1,INPARMS,20) + + ! Convert strings in array + DO I = 1, LEN(CMETHD) + INPARMS(I) = IACHAR(CMETHD(I:I)) + END DO + ! Broadcast parameters to all processors + CALL IGEBS2D(ICONTXT,'ALL',' ',20,1,INPARMS,20) + + DO I = 1, LEN(PREC) + INPARMS(I) = IACHAR(PREC(I:I)) + END DO + ! Broadcast parameters to all processors + CALL IGEBS2D(ICONTXT,'ALL',' ',20,1,INPARMS,20) + + DO I = 1, LEN(AFMT) + INPARMS(I) = IACHAR(AFMT(I:I)) + END DO + ! Broadcast parameters to all processors + CALL IGEBS2D(ICONTXT,'ALL',' ',20,1,INPARMS,20) + + READ(*,*) IPART + IF (IP.GE.6) THEN + READ(*,*) ISTOPC + ELSE + ISTOPC=1 + ENDIF + IF (IP.GE.7) THEN + READ(*,*) ITMAX + ELSE + ITMAX=500 + ENDIF + IF (IP.GE.8) THEN + READ(*,*) ITRACE + ELSE + ITRACE=-1 + ENDIF + IF (IP.GE.9) THEN + READ(*,*) IPREC + ELSE + IPREC=0 + ENDIF + IF (IP.GE.10) THEN + READ(*,*) ML + ELSE + ML = 1 + ENDIF + IF (IP.GE.11) THEN + READ(*,*) EPS + ELSE + EPS=1.D-6 + ENDIF + ! Broadcast parameters to all processors + + INPARMS(1) = IPART + INPARMS(2) = ISTOPC + INPARMS(3) = ITMAX + INPARMS(4) = ITRACE + INPARMS(5) = IPREC + INPARMS(6) = ML + CALL IGEBS2D(ICONTXT,'ALL',' ',6,1,INPARMS,6) + CALL DGEBS2D(ICONTXT,'ALL',' ',1,1,EPS,1) + + WRITE(6,*)'Solving matrix: ',mtrx_file + WRITE(6,*)' with BLOCK data distribution, NP=',NPROW,& + & ' Preconditioner=',PREC + else + CALL PR_USAGE(0) + CALL BLACS_ABORT(ICONTXT,-1) + STOP 1 + END IF + ELSE + ! Receive Parameters + CALL IGEBR2D(ICONTXT,'A',' ',20,1,INPARMS,20,0,0) + DO I = 1, 20 + MTRX_FILE(I:I) = ACHAR(INPARMS(I)) + END DO + + CALL IGEBR2D(ICONTXT,'A',' ',20,1,INPARMS,20,0,0) + DO I = 1, 20 + CMETHD(I:I) = ACHAR(INPARMS(I)) + END DO + + CALL IGEBR2D(ICONTXT,'A',' ',20,1,INPARMS,20,0,0) + DO I = 1, 20 + PREC(I:I) = ACHAR(INPARMS(I)) + END DO + CALL IGEBR2D(ICONTXT,'A',' ',20,1,INPARMS,20,0,0) + DO I = 1, 20 + AFMT(I:I) = ACHAR(INPARMS(I)) + END DO + + CALL IGEBR2D(ICONTXT,'A',' ',6,1,INPARMS,6,0,0) + + IPART = INPARMS(1) + ISTOPC = INPARMS(2) + ITMAX = INPARMS(3) + ITRACE = INPARMS(4) + IPREC = INPARMS(5) + ML = INPARMS(6) + CALL DGEBR2D(ICONTXT,'A',' ',1,1,EPS,1,0,0) + END IF + + END SUBROUTINE GET_PARMS + SUBROUTINE PR_USAGE(IOUT) + INTEGER IOUT + WRITE(IOUT, *) ' Number of parameters is incorrect!' + WRITE(IOUT, *) ' Use: hb_sample mtrx_file methd prec [ptype & + &itmax istopc itrace]' + WRITE(IOUT, *) ' Where:' + WRITE(IOUT, *) ' mtrx_file is stored in HB format' + WRITE(IOUT, *) ' methd may be: CGSTAB ' + WRITE(IOUT, *) ' prec may be: ILU DIAGSC NONE' + WRITE(IOUT, *) ' ptype Partition strategy default 0' + WRITE(IOUT, *) ' 0: BLOCK partition ' + WRITE(IOUT, *) ' itmax Max iterations [500] ' + WRITE(IOUT, *) ' istopc Stopping criterion [1] ' + WRITE(IOUT, *) ' itrace 0 (no tracing, default) or ' + WRITE(IOUT, *) ' >= 0 do tracing every ITRACE' + WRITE(IOUT, *) ' iterations ' + END SUBROUTINE PR_USAGE +END MODULE GETP diff --git a/test/Fileread/lowerc.f b/test/Fileread/lowerc.f new file mode 100644 index 00000000..25df3eef --- /dev/null +++ b/test/Fileread/lowerc.f @@ -0,0 +1,19 @@ + subroutine lowerc(string,pos,len) +cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc +c Convert uppercase letters to lowercase letters in string with +c starting postion pos and length len. +cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc + integer pos, len + character*(*) string + + character*26 lcase, ucase + save lcase,ucase + data lcase/'abcdefghijklmnopqrstuvwxyz'/ + data ucase/'ABCDEFGHIJKLMNOPQRSTUVWXYZ'/ + + do i=pos,len + k = index(ucase,string(i:i)) + if (k.ne.0) string(i:i) = lcase(k:k) + enddo + return + end diff --git a/test/Fileread/mat_dist.f90 b/test/Fileread/mat_dist.f90 new file mode 100644 index 00000000..4ca3548b --- /dev/null +++ b/test/Fileread/mat_dist.f90 @@ -0,0 +1,1115 @@ +module mat_dist + + interface matdist + module procedure dmatdistf, zmatdistf, dmatdistv + end interface + +contains + + subroutine dmatdistf (a_glob, a, parts, icontxt, desc_a,& + & b_glob, b, info, inroot,fmt) + ! + ! an utility subroutine to distribute a matrix among processors + ! according to a user defined data distribution, using pessl + ! sparse matrix subroutines. + ! + ! type(d_spmat) :: a_glob + ! on entry: this contains the global sparse matrix as follows: + ! a%fida =='csr' + ! a%aspk for coefficient values + ! a%ia1 for column indices + ! a%ia2 for row pointers + ! a%m for number of global matrix rows + ! a%k for number of global matrix columns + ! on exit : undefined, with unassociated pointers. + ! + ! type(d_spmat) :: a + ! on entry: fresh variable. + ! on exit : this will contain the local sparse matrix. + ! + ! interface parts + ! ! .....user passed subroutine..... + ! subroutine parts(global_indx,n,np,pv,nv) + ! implicit none + ! integer, intent(in) :: global_indx, n, np + ! integer, intent(out) :: nv + ! integer, intent(out) :: pv(*) + ! + ! end subroutine parts + ! end interface + ! on entry: subroutine providing user defined data distribution. + ! for each global_indx the subroutine should return + ! the list pv of all processes owning the row with + ! that index; the list will contain nv entries. + ! usually nv=1; if nv >1 then we have an overlap in the data + ! distribution. + ! + ! integer :: icontxt + ! on entry: blacs context. + ! on exit : unchanged. + ! + ! type (desc_type) :: desc_a + ! on entry: fresh variable. + ! on exit : the updated array descriptor + ! + ! real(kind(1.d0)), pointer, optional :: b_glob(:) + ! on entry: this contains right hand side. + ! on exit : + ! + ! real(kind(1.d0)), pointer, optional :: b(:) + ! on entry: fresh variable. + ! on exit : this will contain the local right hand side. + ! + ! integer, optional :: inroot + ! on entry: specifies processor holding a_glob. default: 0 + ! on exit : unchanged. + ! + use f90sparse + implicit none ! parameters + type(d_spmat) :: a_glob + real(kind(1.d0)), pointer :: b_glob(:) + integer :: icontxt + type(d_spmat) :: a + real(kind(1.d0)), pointer :: b(:) + type (desc_type) :: desc_a + integer, intent(out) :: info + integer, optional :: inroot + character(len=5), optional :: fmt + interface + ! .....user passed subroutine..... + subroutine parts(global_indx,n,np,pv,nv) + implicit none + integer, intent(in) :: global_indx, n, np + integer, intent(out) :: nv + integer, intent(out) :: pv(*) + end subroutine parts + end interface ! local variables + integer :: nprow, npcol, myprow, mypcol + integer :: ircode, length_row, i_count, j_count,& + & k_count, blockdim, root, liwork, nrow, ncol, nnzero, nrhs,& + & i,j,k, ll, isize, iproc, nnr, err, err_act, int_err(5) + integer, pointer :: iwork(:) + character :: afmt*5, atyp*5 + type(d_spmat) :: blck + integer, parameter :: nb=30 + real(kind(1.d0)) :: t0, t1, t2, t3, t4, t5, mpi_wtime + external :: mpi_wtime + logical, parameter :: newt=.true. + character(len=20) :: name, ch_err + + info = 0 + err = 0 + name = 'mat_distf' + call psb_erractionsave(err_act) + + ! executable statements + if (present(inroot)) then + root = inroot + else + root = 0 + end if + call blacs_gridinfo(icontxt, nprow, npcol, myprow, mypcol) + if (myprow == root) then + ! extract information from a_glob + if (a_glob%fida.ne. 'CSR') then + info=135 + ch_err='CSR' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + endif + nrow = a_glob%m + ncol = a_glob%k + if (nrow /= ncol) then + write(0,*) 'a rectangular matrix ? ',nrow,ncol + info=-1 + call psb_errpush(info,name) + goto 9999 + endif + nnzero = size(a_glob%aspk) + nrhs = 1 + ! broadcast informations to other processors + call igebs2d(icontxt, 'a', ' ', 1, 1, nrow, 1) + call igebs2d(icontxt, 'a', ' ', 1, 1, ncol, 1) + call igebs2d(icontxt, 'a', ' ', 1, 1, nnzero, 1) + call igebs2d(icontxt, 'a', ' ', 1, 1, nrhs, 1) + else !(myprow /= root) + ! receive informations + call igebr2d(icontxt, 'a', ' ', 1, 1, nrow, 1, root, 0) + call igebr2d(icontxt, 'a', ' ', 1, 1, ncol, 1, root, 0) + call igebr2d(icontxt, 'a', ' ', 1, 1, nnzero, 1, root, 0) + call igebr2d(icontxt, 'a', ' ', 1, 1, nrhs, 1, root, 0) + end if ! allocate integer work area + liwork = max(nprow, nrow + ncol) + allocate(iwork(liwork), stat = info) + if (info /= 0) then + write(0,*) 'matdist allocation failed' + info=2025 + int_err(1)=liwork + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + if (myprow == root) then + write (*, fmt = *) 'start matdist',root, size(iwork),& + &nrow, ncol, nnzero,nrhs + endif + if (newt) then + call psb_dscall(nrow,nrow,parts,icontxt,desc_a,info) + if(info/=0) then + info=4010 + ch_err='psb_dscall' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + else + call f90_psdscall(nrow,nrow,parts,icontxt,desc_a,info) + if(info/=0) then + info=4010 + ch_err='psb_psdscall' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + endif + call f90_psspall(a,desc_a,info,nnz=nnzero/nprow) + if(info/=0) then + info=4010 + ch_err='psb_psspall' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + call f90_psdsall(nrow,b,desc_a,info) + if(info/=0) then + info=4010 + ch_err='psb_psdsall' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + isize = max(3*nb,ncol) + + + blck%m = nb + blck%k = ncol + call spall(blck,nb*ncol,info) + if(info/=0) then + info=4010 + ch_err='spall' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + blck%fida = 'CSR' + i_count = 1 + + do while (i_count.le.nrow) + +!!$ write(0,*) myprow,'main loop in matdist',i_count,nrow +!!$ call blacs_barrier(icontxt,'all') + call parts(i_count,nrow,nprow,iwork, length_row) + + if (length_row.eq.1) then + j_count = i_count + iproc = iwork(1) + do + j_count = j_count + 1 + if (j_count-i_count >= nb) exit + if (j_count > nrow) exit + call parts(j_count,nrow,nprow,iwork, length_row) + if (length_row /= 1 ) exit + if (iwork(1) /= iproc ) exit + end do + + ! now we should insert rows i_count..j_count-1 + nnr = j_count - i_count +!!$ write(0,*) myprow,'main loop in matdist',i_count,nnr,iproc +!!$ call blacs_barrier(icontxt,'all') + if (myprow == root) then + + do j = i_count, j_count + blck%ia2(j-i_count+1) = a_glob%ia2(j) - & + & a_glob%ia2(i_count) + 1 + enddo + + k = a_glob%ia2(i_count) + do j = k, a_glob%ia2(j_count)-1 + blck%aspk(j-k+1) = a_glob%aspk(j) + blck%ia1(j-k+1) = a_glob%ia1(j) + enddo + + ll = blck%ia2(nnr+1) - 1 + blck%m = nnr + blck%k = nrow + if (iproc == myprow) then + call f90_psspins(a,i_count,1,blck,desc_a,info) + if(info/=0) then + info=4010 + ch_err='psspins' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + call f90_psdsins(nnr,b,i_count,b_glob(i_count:j_count-1),& + &desc_a,info) + if(info/=0) then + info=4010 + ch_err='psdsins' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + else + call igesd2d(icontxt,1,1,nnr,1,iproc,0) + call igesd2d(icontxt,1,1,ll,1,iproc,0) + call igesd2d(icontxt,nnr+1,1,blck%ia2,nnr+1,iproc,0) + call igesd2d(icontxt,ll,1,blck%ia1,ll,iproc,0) + call dgesd2d(icontxt,ll,1,blck%aspk,ll,iproc,0) + call dgesd2d(icontxt,nnr,1,b_glob(i_count:j_count-1),nnr,iproc,0) + call igerv2d(icontxt,1,1,ll,1,iproc,0) + endif + else if (myprow /= root) then + + if (iproc == myprow) then + call igerv2d(icontxt,1,1,nnr,1,root,0) + call igerv2d(icontxt,1,1,ll,1,root,0) +!!$ write(0,*) myprow,'rows and size ',nnr,ll,size(blck%ia2),size(blck%ia1) + call igerv2d(icontxt,nnr+1,1,blck%ia2,nnr+1,root,0) + if (ll > size(blck%ia1)) then + write(0,*) myprow,'need to reallocate ',ll + call spreall(blck,ll,info) + if(info/=0) then + info=4010 + ch_err='spreall' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + endif + call igerv2d(icontxt,ll,1,blck%ia1,ll,root,0) + call dgerv2d(icontxt,ll,1,blck%aspk,ll,root,0) + call dgerv2d(icontxt,nnr,1,b_glob(i_count:i_count+nnr-1),nnr,root,0) + call igesd2d(icontxt,1,1,ll,1,root,0) + blck%m = nnr + blck%k = nrow + call f90_psspins(a,i_count,1,blck,desc_a,info) + if(info/=0) then + info=4010 + ch_err='psspins' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + call f90_psdsins(nnr,b,i_count,b_glob(i_count:i_count+nnr-1),& + &desc_a,info) + if(info/=0) then + info=4010 + ch_err='psdsins' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + endif + endif + + i_count = j_count + + else + write(0,*) myprow,'unexpected turn' + ! here processors are counted 1..nprow + do j_count = 1, length_row + k_count = iwork(j_count) + if (myprow == root) then + blck%ia2(1) = 1 + blck%ia2(2) = 1 + do j = a_glob%ia2(i_count), a_glob%ia2(i_count+1)-1 + blck%aspk(blck%ia2(2)) = a_glob%aspk(j) + blck%ia1(blck%ia2(2)) = a_glob%ia1(j) + blck%ia2(2) =blck%ia2(2) + 1 + enddo + ll = blck%ia2(2) - 1 + if (k_count == myprow) then + blck%infoa(1) = ll + blck%infoa(2) = ll + blck%infoa(3) = 2 + blck%infoa(4) = 1 + blck%infoa(5) = 1 + blck%infoa(6) = 1 + blck%m = 1 + blck%k = nrow + + call f90_psspins(a,i_count,1,blck,desc_a,info) + if(info/=0) then + info=4010 + ch_err='psspins' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + call f90_psdsins(1,b,i_count,b_glob(i_count:i_count),& + &desc_a,info) + if(info/=0) then + info=4010 + ch_err='psdsins' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + else + call igesd2d(icontxt,1,1,ll,1,k_count,0) + call igesd2d(icontxt,ll,1,blck%ia1,ll,k_count,0) + call dgesd2d(icontxt,ll,1,blck%aspk,ll,k_count,0) + call dgesd2d(icontxt,1,1,b_glob(i_count),1,k_count,0) + call igerv2d(icontxt,1,1,ll,1,k_count,0) + endif + else if (myprow /= root) then + if (k_count == myprow) then + call igerv2d(icontxt,1,1,ll,1,root,0) + blck%ia2(1) = 1 + blck%ia2(2) = ll+1 + call igerv2d(icontxt,ll,1,blck%ia1,ll,root,0) + call dgerv2d(icontxt,ll,1,blck%aspk,ll,root,0) + call dgerv2d(icontxt,1,1,b_glob(i_count),1,root,0) + call igesd2d(icontxt,1,1,ll,1,root,0) + blck%m = 1 + blck%k = nrow + call f90_psspins(a,i_count,1,blck,desc_a,info) + if(info/=0) then + info=4010 + ch_err='psspins' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + call f90_psdsins(1,b,i_count,b_glob(i_count:i_count),& + &desc_a,info) + if(info/=0) then + info=4010 + ch_err='psdsins' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + endif + endif + end do + i_count = i_count + 1 + endif + end do + + if (present(fmt)) then + afmt=fmt + else + afmt = 'CSR' + endif + if (newt) then + + call blacs_barrier(icontxt,'all') + t0 = mpi_wtime() + call psb_dscasb(desc_a,info) + t1 = mpi_wtime() + if(info/=0)then + info=4010 + ch_err='psb_dscasb' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + call blacs_barrier(icontxt,'all') + t2 = mpi_wtime() + call psb_spasb(a,desc_a,info,dup=1,afmt=afmt) + t3 = mpi_wtime() + if(info/=0)then + info=4010 + ch_err='psb_spasb' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + + if (myprow == root) then + write(*,*) 'descriptor assembly: ',t1-t0 + write(*,*) 'sparse matrix assembly: ',t3-t2 + end if + + + else + call f90_psspasb(a,desc_a,info,dup=1,afmt=afmt) + if(info/=0)then + info=4010 + ch_err='psspasb' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + endif + + + call f90_psdsasb(b,desc_a,info) + if(info/=0)then + info=4010 + ch_err='psdsasb' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + call spfree(blck,info) + if(info/=0)then + info=4010 + ch_err='spfree' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + deallocate(iwork) + if (myprow == root) write (*, fmt = *) 'end matdist' + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return + + end subroutine dmatdistf + + + subroutine dmatdistv (a_glob, a, v, icontxt, desc_a,& + & b_glob, b, info, inroot,fmt) + ! + ! an utility subroutine to distribute a matrix among processors + ! according to a user defined data distribution, using pessl + ! sparse matrix subroutines. + ! + ! type(d_spmat) :: a_glob + ! on entry: this contains the global sparse matrix as follows: + ! a%fida =='csr' + ! a%aspk for coefficient values + ! a%ia1 for column indices + ! a%ia2 for row pointers + ! a%m for number of global matrix rows + ! a%k for number of global matrix columns + ! on exit : undefined, with unassociated pointers. + ! + ! type(d_spmat) :: a + ! on entry: fresh variable. + ! on exit : this will contain the local sparse matrix. + ! + ! interface parts + ! ! .....user passed subroutine..... + ! subroutine parts(global_indx,n,np,pv,nv) + ! implicit none + ! integer, intent(in) :: global_indx, n, np + ! integer, intent(out) :: nv + ! integer, intent(out) :: pv(*) + ! + ! end subroutine parts + ! end interface + ! on entry: subroutine providing user defined data distribution. + ! for each global_indx the subroutine should return + ! the list pv of all processes owning the row with + ! that index; the list will contain nv entries. + ! usually nv=1; if nv >1 then we have an overlap in the data + ! distribution. + ! + ! integer :: icontxt + ! on entry: blacs context. + ! on exit : unchanged. + ! + ! type (desc_type) :: desc_a + ! on entry: fresh variable. + ! on exit : the updated array descriptor + ! + ! real(kind(1.d0)), pointer, optional :: b_glob(:) + ! on entry: this contains right hand side. + ! on exit : + ! + ! real(kind(1.d0)), pointer, optional :: b(:) + ! on entry: fresh variable. + ! on exit : this will contain the local right hand side. + ! + ! integer, optional :: inroot + ! on entry: specifies processor holding a_glob. default: 0 + ! on exit : unchanged. + ! + use f90sparse + implicit none ! parameters + type(d_spmat) :: a_glob + real(kind(1.d0)), pointer :: b_glob(:) + integer :: icontxt, v(:) + type(d_spmat) :: a + real(kind(1.d0)), pointer :: b(:) + type (desc_type) :: desc_a + integer, intent(out) :: info + integer, optional :: inroot + character(len=5), optional :: fmt + + integer :: nprow, npcol, myprow, mypcol + integer :: ircode, length_row, i_count, j_count,& + & k_count, blockdim, root, liwork, nrow, ncol, nnzero, nrhs,& + & i,j,k, ll, isize, iproc, nnr, err, err_act, int_err(5) + integer, pointer :: iwork(:) + character :: afmt*5, atyp*5 + type(d_spmat) :: blck + integer, parameter :: nb=30 + logical, parameter :: newt=.true. + real(kind(1.d0)) :: t0, t1, t2, t3, t4, t5, mpi_wtime + external :: mpi_wtime + character(len=20) :: name, ch_err + + info = 0 + err = 0 + name = 'mat_distv' + call psb_erractionsave(err_act) + + ! executable statements + if (present(inroot)) then + root = inroot + else + root = 0 + end if + + call blacs_gridinfo(icontxt, nprow, npcol, myprow, mypcol) + if (myprow == root) then + ! extract information from a_glob + if (a_glob%fida.ne. 'CSR') then + info=135 + ch_err='CSR' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + endif + + nrow = a_glob%m + ncol = a_glob%k + if (nrow /= ncol) then + write(0,*) 'a rectangular matrix ? ',nrow,ncol + info=-1 + call psb_errpush(info,name) + goto 9999 + endif + + nnzero = size(a_glob%aspk) + nrhs = 1 + ! broadcast informations to other processors + call igebs2d(icontxt, 'a', ' ', 1, 1, nrow, 1) + call igebs2d(icontxt, 'a', ' ', 1, 1, ncol, 1) + call igebs2d(icontxt, 'a', ' ', 1, 1, nnzero, 1) + call igebs2d(icontxt, 'a', ' ', 1, 1, nrhs, 1) + else !(myprow /= root) + ! receive informations + call igebr2d(icontxt, 'a', ' ', 1, 1, nrow, 1, root, 0) + call igebr2d(icontxt, 'a', ' ', 1, 1, ncol, 1, root, 0) + call igebr2d(icontxt, 'a', ' ', 1, 1, nnzero, 1, root, 0) + call igebr2d(icontxt, 'a', ' ', 1, 1, nrhs, 1, root, 0) + end if ! allocate integer work area + liwork = max(nprow, nrow + ncol) + allocate(iwork(liwork), stat = info) + if (info /= 0) then + write(0,*) 'matdist allocation failed' + info=2025 + int_err(1)=liwork + call psb_errpush(info,name,i_err=int_err) + goto 9999 + endif + + if (myprow == root) then + write (*, fmt = *) 'start matdist v',root, size(iwork),& + &nrow, ncol, nnzero,nrhs + endif + + call psb_dscall(nrow,v,icontxt,desc_a,info) + if(info/=0) then + info=4010 + ch_err='psb_dscall' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + call f90_psspall(a,desc_a,info,nnz=((nnzero+nprow-1)/nprow)) + if(info/=0) then + info=4010 + ch_err='psb_psspall' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + call f90_psdsall(nrow,b,desc_a,info) + if(info/=0) then + info=4010 + ch_err='psb_psdsall' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + isize = max(3*nb,ncol) + + + blck%m = nb + blck%k = ncol + call spall(blck,nb*ncol,info) + if(info/=0) then + info=4010 + ch_err='spall' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + blck%fida = 'COO' + + i_count = 1 + + do while (i_count <= nrow) + + j_count = i_count + iproc = v(i_count) + + do + j_count = j_count + 1 + if (j_count-i_count >= nb) exit + if (j_count > nrow) exit + if (v(j_count) /= iproc ) exit + end do + + ! now we should insert rows i_count..j_count-1 + nnr = j_count - i_count + + if (myprow == root) then + ll = a_glob%ia2(j_count)-a_glob%ia2(i_count) + if (ll > size(blck%aspk)) then + call spreall(blck,ll,info) + if(info/=0) then + info=4010 + ch_err='spreall' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + endif + k = a_glob%ia2(i_count) + do i= i_count, j_count-1 + do j = a_glob%ia2(i),a_glob%ia2(i+1)-1 + blck%ia1(j-k+1) = i + blck%ia2(j-k+1) = a_glob%ia1(j) + blck%aspk(j-k+1) = a_glob%aspk(j) + end do + enddo + + blck%m = nnr + blck%k = nrow + blck%infoa(nnz_) = ll + if (iproc == myprow) then + call psb_spins(ll,blck%ia1,blck%ia2,blck%aspk,a,desc_a,info) + if(info/=0) then + info=4010 + ch_err='spins' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + call f90_psdsins(nnr,b,i_count,b_glob(i_count:j_count-1),& + &desc_a,info) + if(info/=0) then + info=4010 + ch_err='dsins' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + else + call igesd2d(icontxt,1,1,nnr,1,iproc,0) + call igesd2d(icontxt,1,1,ll,1,iproc,0) + call igesd2d(icontxt,ll,1,blck%ia1,ll,iproc,0) + call igesd2d(icontxt,ll,1,blck%ia2,ll,iproc,0) + call dgesd2d(icontxt,ll,1,blck%aspk,ll,iproc,0) + call dgesd2d(icontxt,nnr,1,b_glob(i_count:j_count-1),nnr,iproc,0) + call igerv2d(icontxt,1,1,ll,1,iproc,0) + endif + else if (myprow /= root) then + + if (iproc == myprow) then + call igerv2d(icontxt,1,1,nnr,1,root,0) + call igerv2d(icontxt,1,1,ll,1,root,0) + if (ll > size(blck%aspk)) then + write(0,*) myprow,'need to reallocate ',ll + call spreall(blck,ll,info) + if(info/=0) then + info=4010 + ch_err='spreall' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + endif + call igerv2d(icontxt,ll,1,blck%ia1,ll,root,0) + call igerv2d(icontxt,ll,1,blck%ia2,ll,root,0) + call dgerv2d(icontxt,ll,1,blck%aspk,ll,root,0) + call dgerv2d(icontxt,nnr,1,b_glob(i_count:i_count+nnr-1),nnr,root,0) + call igesd2d(icontxt,1,1,ll,1,root,0) + blck%m = nnr + blck%k = nrow + blck%infoa(nnz_) = ll + call psb_spins(ll,blck%ia1,blck%ia2,blck%aspk,a,desc_a,info) + if(info/=0) then + info=4010 + ch_err='spins' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + call f90_psdsins(nnr,b,i_count,b_glob(i_count:i_count+nnr-1),& + &desc_a,info) + if(info/=0) then + info=4010 + ch_err='psdsins' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + endif + endif + i_count = j_count + + end do + + ! default storage format for sparse matrix; we do not + ! expect duplicated entries. + + if (present(fmt)) then + afmt=fmt + else + afmt = 'CSR' + endif + call blacs_barrier(icontxt,'all') + t0 = mpi_wtime() + call psb_dscasb(desc_a,info) + t1 = mpi_wtime() + if(info/=0)then + info=4010 + ch_err='psb_dscasb' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + call blacs_barrier(icontxt,'all') + t2 = mpi_wtime() + call psb_spasb(a,desc_a,info,dup=1,afmt=afmt) + t3 = mpi_wtime() + if(info/=0)then + info=4010 + ch_err='psb_spasb' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if (myprow == root) then + write(*,*) 'descriptor assembly: ',t1-t0 + write(*,*) 'sparse matrix assembly: ',t3-t2 + end if + + if(info/=0)then + info=4010 + ch_err='psdsasb' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + call spfree(blck,info) + if(info/=0)then + info=4010 + ch_err='spfree' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + deallocate(iwork) + if (myprow == root) write (*, fmt = *) 'end matdist v' + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return + + end subroutine dmatdistv + + + subroutine zmatdistf (a_glob, a, parts, icontxt, desc_a,& + & b_glob, b, info,inroot,fmt) + ! + ! an utility subroutine to distribute a matrix among processors + ! according to a user defined data distribution, using pessl + ! sparse matrix subroutines. + ! + ! type(d_spmat) :: a_glob + ! on entry: this contains the global sparse matrix as follows: + ! a%fida =='csr' + ! a%aspk for coefficient values + ! a%ia1 for column indices + ! a%ia2 for row pointers + ! a%m for number of global matrix rows + ! a%k for number of global matrix columns + ! on exit : undefined, with unassociated pointers. + ! + ! type(d_spmat) :: a + ! on entry: fresh variable. + ! on exit : this will contain the local sparse matrix. + ! + ! interface parts + ! ! .....user passed subroutine..... + ! subroutine parts(global_indx,n,np,pv,nv) + ! implicit none + ! integer, intent(in) :: global_indx, n, np + ! integer, intent(out) :: nv + ! integer, intent(out) :: pv(*) + ! + ! end subroutine parts + ! end interface + ! on entry: subroutine providing user defined data distribution. + ! for each global_indx the subroutine should return + ! the list pv of all processes owning the row with + ! that index; the list will contain nv entries. + ! usually nv=1; if nv >1 then we have an overlap in the data + ! distribution. + ! + ! integer :: icontxt + ! on entry: blacs context. + ! on exit : unchanged. + ! + ! type (desc_type) :: desc_a + ! on entry: fresh variable. + ! on exit : the updated array descriptor + ! + ! real(kind(1.d0)), pointer, optional :: b_glob(:) + ! on entry: this contains right hand side. + ! on exit : + ! + ! real(kind(1.d0)), pointer, optional :: b(:) + ! on entry: fresh variable. + ! on exit : this will contain the local right hand side. + ! + ! integer, optional :: inroot + ! on entry: specifies processor holding a_glob. default: 0 + ! on exit : unchanged. + ! + use typesp + use typedesc + use f90tools + implicit none ! parameters + type(z_spmat) :: a_glob + complex(kind(1.d0)), pointer :: b_glob(:) + integer :: icontxt + type(z_spmat) :: a + complex(kind(1.d0)), pointer :: b(:) + type (desc_type) :: desc_a + integer, intent(out) :: info + integer, optional :: inroot + character(len=5), optional :: fmt + interface + ! .....user passed subroutine..... + subroutine parts(global_indx,n,np,pv,nv) + implicit none + integer, intent(in) :: global_indx, n, np + integer, intent(out) :: nv + integer, intent(out) :: pv(*) + end subroutine parts + end interface ! local variables + integer :: nprow, npcol, myprow, mypcol + integer :: ircode, length_row, i_count, j_count,& + & k_count, blockdim, root, liwork, nrow, ncol, nnzero, nrhs,& + & i,j,k, ll, isize, iproc, nnr, err, err_act, int_err(5) + integer, pointer :: iwork(:) + character :: afmt*5, atyp*5 + type(z_spmat) :: blck + integer, parameter :: nb = 30 + character(len=20) :: name, ch_err + + info = 0 + err = 0 + name = 'mat_distf' + call psb_erractionsave(err_act) + + ! executable statements + if (present(inroot)) then + root = inroot + else + root = 0 + end if + + call blacs_gridinfo(icontxt, nprow, npcol, myprow, mypcol) + if (myprow == root) then + ! extract information from a_glob + if (a_glob%fida.ne. 'CSR') then + write(0,*) 'unsupported input matrix format' + call blacs_abort(icontxt,-1) + endif + nrow = a_glob%m + ncol = a_glob%k + if (nrow /= ncol) then + write(0,*) 'a rectangular matrix ? ',nrow,ncol + call blacs_abort(icontxt,-1) + endif + nnzero = size(a_glob%aspk) + nrhs = 1 + ! broadcast informations to other processors + call igebs2d(icontxt, 'a', ' ', 1, 1, nrow, 1) + call igebs2d(icontxt, 'a', ' ', 1, 1, ncol, 1) + call igebs2d(icontxt, 'a', ' ', 1, 1, nnzero, 1) + call igebs2d(icontxt, 'a', ' ', 1, 1, nrhs, 1) + else !(myprow /= root) + ! receive informations + call igebr2d(icontxt, 'a', ' ', 1, 1, nrow, 1, root, 0) + call igebr2d(icontxt, 'a', ' ', 1, 1, ncol, 1, root, 0) + call igebr2d(icontxt, 'a', ' ', 1, 1, nnzero, 1, root, 0) + call igebr2d(icontxt, 'a', ' ', 1, 1, nrhs, 1, root, 0) + end if ! allocate integer work area + liwork = max(nprow, nrow + ncol) + allocate(iwork(liwork), stat = ircode) + if (ircode /= 0) then + write(0,*) 'matdist allocation failed' + return + endif + if (myprow == root) then + write (*, fmt = *) 'start matdist',root, size(iwork) + endif + call f90_psdscall(nrow,nrow,parts,icontxt,desc_a,info) + call f90_psspall(a,desc_a,info,nnz=nnzero/nprow) + call f90_psdsall(nrow,b,desc_a,info) + isize = max(3*nb,ncol) + + + allocate(blck%aspk(nnzero),blck%ia1(nnzero),blck%ia2(nnzero),stat=ircode) + if (ircode /= 0) then + write(0,*) 'error on allocating blck' + call blacs_abort(icontxt,-1) + stop + endif + + blck%m = 1 + blck%k = ncol + blck%fida = 'csr' + i_count = 1 + + do while (i_count.le.nrow) + call parts(i_count,nrow,nprow,iwork, length_row) + + if (length_row.eq.1) then + j_count = i_count + 1 + iproc = iwork(1) + call parts(j_count,nrow,nprow,iwork, length_row) + + do while ((j_count.le.nrow).and.(j_count-i_count.lt.nb)& + &.and.(length_row.eq.1).and.(iwork(1).eq.iproc)) + j_count = j_count + 1 + if (j_count.le.nrow) & + & call parts(j_count,nrow,nprow,iwork, length_row) + end do + + + ! now we should insert rows i_count..j_count-1 + nnr = j_count - i_count + + if (myprow == root) then + do j = i_count, j_count + blck%ia2(j-i_count+1) = a_glob%ia2(j) - & + & a_glob%ia2(i_count) + 1 + enddo + + k = a_glob%ia2(i_count) + do j = k, a_glob%ia2(j_count)-1 + blck%aspk(j-k+1) = a_glob%aspk(j) + blck%ia1(j-k+1) = a_glob%ia1(j) + enddo + + ll = blck%ia2(nnr+1) - 1 + blck%m = nnr + blck%k = nrow + if (iproc == myprow) then + call f90_psspins(a,i_count,1,blck,desc_a,info) + call f90_psdsins(nnr,b,i_count,b_glob(i_count:j_count-1),& + &desc_a,info) + else + call igesd2d(icontxt,1,1,nnr,1,iproc,0) + call igesd2d(icontxt,1,1,ll,1,iproc,0) + call igesd2d(icontxt,nnr+1,1,blck%ia2,nnr+1,iproc,0) + call igesd2d(icontxt,ll,1,blck%ia1,ll,iproc,0) + call zgesd2d(icontxt,ll,1,blck%aspk,ll,iproc,0) + call zgesd2d(icontxt,nnr,1,b_glob(i_count:j_count-1),nnr,iproc,0) + call igerv2d(icontxt,1,1,ll,1,iproc,0) + endif + + else if (myprow /= root) then + + if (iproc == myprow) then + call igerv2d(icontxt,1,1,nnr,1,root,0) + call igerv2d(icontxt,1,1,ll,1,root,0) + call igerv2d(icontxt,nnr+1,1,blck%ia2,nnr+1,root,0) + call igerv2d(icontxt,ll,1,blck%ia1,ll,root,0) + call zgerv2d(icontxt,ll,1,blck%aspk,ll,root,0) + call zgerv2d(icontxt,nnr,1,b_glob(i_count:i_count+nnr-1),nnr,root,0) + call igesd2d(icontxt,1,1,ll,1,root,0) + blck%m = nnr + blck%k = nrow + call f90_psspins(a,i_count,1,blck,desc_a,info) + call f90_psdsins(nnr,b,i_count,b_glob(i_count:i_count+nnr-1),& + &desc_a,info) + endif + endif + + i_count = j_count + else + ! here processors are counted 1..nprow + do j_count = 1, length_row + k_count = iwork(j_count) + if (myprow == root) then + blck%ia2(1) = 1 + blck%ia2(2) = 1 + do j = a_glob%ia2(i_count), a_glob%ia2(i_count+1)-1 + blck%aspk(blck%ia2(2)) = a_glob%aspk(j) + blck%ia1(blck%ia2(2)) = a_glob%ia1(j) + blck%ia2(2) =blck%ia2(2) + 1 + enddo + ll = blck%ia2(2) - 1 + if (k_count == myprow) then + blck%infoa(1) = ll + blck%infoa(2) = ll + blck%infoa(3) = 2 + blck%infoa(4) = 1 + blck%infoa(5) = 1 + blck%infoa(6) = 1 + blck%m = 1 + blck%k = nrow + + call f90_psspins(a,i_count,1,blck,desc_a,info) + call f90_psdsins(1,b,i_count,b_glob(i_count:i_count),& + &desc_a,info) + else + call igesd2d(icontxt,1,1,ll,1,k_count,0) + call igesd2d(icontxt,ll,1,blck%ia1,ll,k_count,0) + call zgesd2d(icontxt,ll,1,blck%aspk,ll,k_count,0) + call zgesd2d(icontxt,1,1,b_glob(i_count),1,k_count,0) + call igerv2d(icontxt,1,1,ll,1,k_count,0) + endif + else if (myprow /= root) then + if (k_count == myprow) then + call igerv2d(icontxt,1,1,ll,1,root,0) + blck%ia2(1) = 1 + blck%ia2(2) = ll+1 + call igerv2d(icontxt,ll,1,blck%ia1,ll,root,0) + call zgerv2d(icontxt,ll,1,blck%aspk,ll,root,0) + call zgerv2d(icontxt,1,1,b_glob(i_count),1,root,0) + call igesd2d(icontxt,1,1,ll,1,root,0) + blck%m = 1 + blck%k = nrow + call f90_psspins(a,i_count,1,blck,desc_a,info) + call f90_psdsins(1,b,i_count,b_glob(i_count:i_count),& + &desc_a,info) + endif + endif + end do + i_count = i_count + 1 + endif + end do + ! default storage format for sparse matrix; we do not + ! expect duplicated entries. + + if (present(fmt)) then + afmt=fmt + else + afmt = 'csr' + endif + call f90_psspasb(a,desc_a,info,dup=1) + call f90_psdsasb(b,desc_a,info) + call spfree(blck,info) + deallocate(iwork) + if (myprow == root) write (*, fmt = *) 'end matdist' + return + end subroutine zmatdistf + +end module mat_dist diff --git a/test/Fileread/mmio.f90 b/test/Fileread/mmio.f90 new file mode 100644 index 00000000..2d2e8fd4 --- /dev/null +++ b/test/Fileread/mmio.f90 @@ -0,0 +1,548 @@ +module mmio + use typesp + public mm_mat_read, mm_mat_write + interface mm_mat_read + module procedure dmm_mat_read, zmm_mat_read + end interface + interface mm_mat_write + module procedure dmm_mat_write + end interface + private desym,zdesym + +contains + + subroutine dmm_mat_read(a, iret, iunit, filename) + use typesp + implicit none + type(d_spmat), intent(out) :: a + integer, intent(out) :: iret + integer, optional, intent(in) :: iunit + character(len=*), optional, intent(in) :: filename + character :: mmheader*15, fmt*15, object*10, type*10, sym*15 + character(1024) :: line + integer :: indcrd, ptrcrd, totcrd,& + & valcrd, rhscrd, nrow, ncol, nnzero, neltvl, nrhs, nrhsix + real(kind(1.0d0)), pointer :: as_loc(:), dwork(:) + integer, pointer :: ia1_loc(:), ia2_loc(:), iwork(:), tmp(:), aux(:) + integer :: ircode, i,iel,ptr,nzr,infile,& + & j, liwork, ldwork, root, nprow, npcol, myprow, mypcol + logical, parameter :: debug=.false. + + iret = 0 + + if (present(filename)) then + if (filename=='-') then + infile=5 + else + if (present(iunit)) then + infile=iunit + else + infile=99 + endif + open(infile,file=filename, status='OLD', err=901, action='READ') + endif + else + if (present(iunit)) then + infile=iunit + else + infile=5 + endif + endif + + read(infile,fmt=*,end=902) mmheader, object, fmt, type, sym + call lowerc(object,1,10) + call lowerc(fmt,1,15) + + if ( (object .ne. 'matrix').or.(fmt.ne.'coordinate')) then + write(0,*) 'READ_MATRIX: input file type not yet supported' + iret=909 + return + end if + if (debug) write(*,*) mmheader,':', object, ':',fmt,':', type,':', sym + + do + read(infile,fmt='(a)') line + if (line(1:1) /= '%') exit + end do + if (debug) write(*,*) 'Line on input : "',line,'"' + read(line,fmt=*) nrow,ncol,nnzero + if (debug) write(*,*) 'Out: ',nrow,ncol,nnzero + a%m = nrow + a%k = ncol + a%fida = 'CSR' + a%descra='G' + call lowerc(type,1,10) + call lowerc(sym,1,15) + + if ((type == 'real').and.(sym == 'general')) then + allocate(a%aspk(nnzero), a%ia1(nnzero), a%ia2(nrow+1),& + & a%pl(nrow),a%pr(nrow), tmp(nnzero+1), aux(nnzero+2),stat = ircode) + if (ircode /= 0) goto 993 + do i=1,nnzero + read(infile,fmt=*,end=902) tmp(i),a%ia1(i),a%aspk(i) + end do + + call mrgsrt(nnzero,tmp,aux,ircode) + if (ircode.eq.0) call reordvn(nnzero,a%aspk,tmp,a%ia1,aux) + ! .... Order with key a%ia1 (COLUMN INDEX) ... + i = 1 + j = i + ! .... order with key tmp (row index) ... + do + if (i > nnzero) exit + do + if (j > nnzero) exit + if (tmp(j) /= tmp(i)) exit + j = j+1 + ! if (j.eq.(nnzero+1)) exit + enddo + iel = j - i + call mrgsrt(iel,a%ia1(i),aux,ircode) + if (ircode == 0) call reordvn(iel,a%aspk(i),tmp(i),& + & a%ia1(i), aux) + i = j + enddo + + ! convert to csr format + iel = 1 + a%ia2(1) = 1 + do i = a%ia2(1), nrow + + do + if (tmp(iel) /= i) exit + iel = iel + 1 + if (iel > nnzero) exit + enddo + a%ia2(i+1) = iel + enddo + deallocate(aux,tmp) + + else if ((type == 'real').and.(sym == 'symmetric')) then + ! we are generally working with non-symmetric matrices, so + ! we de-symmetrize what we are about to read + + allocate(a%aspk(2*nnzero),a%ia1(2*nnzero),& + & a%ia2(2*nnzero),as_loc(2*nnzero),& + & ia1_loc(2*nnzero),ia2_loc(2*nnzero),& + & a%pl(nrow),a%pr(nrow), stat = ircode) + + if (ircode /= 0) goto 993 + + do i=1,nnzero + read(infile,fmt=*,end=902) a%ia1(i),a%ia2(i),a%aspk(i) + end do + + liwork = 2*nnzero+2 + allocate(iwork(liwork), stat = ircode) + if (ircode /= 0) goto 993 + ! After this call NNZERO contains the actual value for + ! desymetrized matrix + call desym(nrow, a%aspk, a%ia2, a%ia1, as_loc, ia2_loc,& + & ia1_loc, iwork, nnzero, nzr) + + call spreall(a,nzr,ircode) + if (ircode /= 0) goto 993 + allocate(tmp(nzr),stat=ircode) + if (ircode /= 0) goto 993 + if (.false.) then + a%aspk(1:nzr) = as_loc(1:nzr) + a%ia1(1:nzr) = ia2_loc(1:nzr) + tmp(1:nzr) = ia1_loc(1:nzr) + else + write(0,*) 'After DESYM: ',nzr,ia2_loc(1:10) + do i=1,nzr + a%aspk(i) = as_loc(i) + a%ia1(i) = ia2_loc(i) + tmp(i) = ia1_loc(i) + end do + endif + + iel = 1 + a%ia2(1) = 1 + do i = 1, nrow + do + if (tmp(iel) /= i) exit + iel = iel + 1 + if (iel > nzr) exit + enddo + a%ia2(i+1) = iel + enddo + + deallocate(as_loc, ia1_loc, ia2_loc,tmp,iwork) + else + write(0,*) 'read_matrix: matrix type not yet supported' + iret=904 + end if + if (infile/=5) close(infile) + return + + ! open failed +901 iret=901 + write(0,*) 'read_matrix: could not open file ',filename,' for input' + return +902 iret=902 + write(0,*) 'READ_MATRIX: Unexpected end of file ' + return +993 iret=993 + write(0,*) 'READ_MATRIX: Memory allocation failure' + return + end subroutine dmm_mat_read + + + subroutine zmm_mat_read(a, iret, iunit, filename) + use typesp + implicit none + type(z_spmat), intent(out) :: a + integer, intent(out) :: iret + integer, optional, intent(in) :: iunit + character(len=*), optional, intent(in) :: filename + character :: mmheader*15, fmt*15, object*10, type*10, sym*15, line*1024 + integer :: indcrd, ptrcrd, totcrd,& + & valcrd, rhscrd, nrow, ncol, nnzero, neltvl, nrhs, nrhsix + complex(kind(1.0d0)), pointer :: as_loc(:), dwork(:) + integer, pointer :: ia1_loc(:), ia2_loc(:), iwork(:), tmp(:), aux(:) + integer :: ircode, i,iel,ptr,nzr,infile,& + & j, liwork, ldwork, root, nprow, npcol, myprow, mypcol + + iret = 0 + + if (present(filename)) then + if (filename=='-') then + infile=5 + else + if (present(iunit)) then + infile=iunit + else + infile=99 + endif + open(infile,file=filename, status='OLD', err=901, action='READ') + endif + else + if (present(iunit)) then + infile=iunit + else + infile=5 + endif + endif + + read(infile,fmt=*,end=902) mmheader, object, fmt, type, sym + call lowerc(object,1,10) + call lowerc(fmt,1,15) + + if ( (object .ne. 'matrix').or.(fmt.ne.'coordinate')) then + write(0,*) 'READ_MATRIX: input file type not yet supported' + iret=909 + return + end if + + do + read(infile,fmt='(a)') line + if (line(1:1) /= '%') exit + end do + read(line,fmt=*) nrow,ncol,nnzero + + a%m = nrow + a%k = ncol + a%fida = 'CSR' + call lowerc(type,1,10) + call lowerc(sym,1,15) + + if ((type == 'complex').and.(sym == 'general')) then + + + allocate(a%aspk(nnzero), a%ia1(nnzero), a%ia2(nrow+1),& + & a%pl(nrow),a%pr(nrow), tmp(nnzero+1), aux(nnzero+2),stat = ircode) + if (ircode /= 0) goto 993 + do i=1,nnzero + read(infile,fmt=*,end=902) tmp(i),a%ia1(i),a%aspk(i) + end do + + call mrgsrt(nnzero,tmp,aux,ircode) + if (ircode.eq.0) call zreordvn(nnzero,a%aspk,tmp,a%ia1,aux) + ! .... Order with key a%ia1 (COLUMN INDEX) ... + i = 1 + j = i + ! .... order with key tmp (row index) ... + do + if (i > nnzero) exit + do + if (j > nnzero) exit + if (tmp(j) /= tmp(i)) exit + j = j+1 + ! if (j.eq.(nnzero+1)) exit + enddo + iel = j - i + call mrgsrt(iel,a%ia1(i),aux,ircode) + if (ircode == 0) call zreordvn(iel,a%aspk(i),tmp(i),& + & a%ia1(i), aux) + i = j + enddo + + ! convert to csr format + iel = 1 + a%ia2(1) = 1 + do i = a%ia2(1), nrow + + do + if (iel > nnzero) exit + if (tmp(iel) /= i) exit + iel = iel + 1 + enddo + a%ia2(i+1) = iel + enddo + deallocate(aux,tmp) + + else if ((type == 'complex').and.(sym == 'symmetric')) then + ! we are generally working with non-symmetric matrices, so + ! we de-symmetrize what we are about to read + allocate(a%aspk(2*nnzero),a%ia1(2*nnzero),& + & a%ia2(2*nnzero),as_loc(2*nnzero),& + & ia1_loc(2*nnzero),ia2_loc(2*nnzero),& + &a%pl(nrow),a%pr(nrow), stat = ircode) + if (ircode /= 0) goto 993 + do i=1,nnzero + read(infile,fmt=*,end=902) a%ia1(i),a%ia2(i),a%aspk(i) + end do + + liwork = 2*nnzero+2 + allocate(iwork(liwork), stat = ircode) + if (ircode /= 0) goto 993 + ! After this call NNZERO contains the actual value for + ! desymetrized matrix + call zdesym(nrow, a%aspk, a%ia2, a%ia1, as_loc, ia2_loc,& + & ia1_loc, iwork, nnzero, nzr) + + deallocate(a%aspk,a%ia1,a%ia2) + nnzero=nzr +!!$ call spreall(a,nzr,ircode) + if (ircode /= 0) goto 993 + allocate(tmp(nzr),stat=ircode) + if (ircode /= 0) goto 993 + + a%aspk(1:nzr) = as_loc(1:nzr) + a%ia1(1:nzr) = ia2_loc(1:nzr) + tmp(1:nzr) = ia1_loc(1:nzr) + + + iel = 1 + a%ia2(1) = 1 + do i = 1, nrow + do + if (tmp(iel) /= i) exit + iel = iel + 1 + if (iel > nzr) exit + enddo + a%ia2(i+1) = iel + enddo + + deallocate(as_loc, ia1_loc, ia2_loc,tmp,iwork) + else + write(0,*) 'read_matrix: matrix type not yet supported' + iret=904 + end if + if (infile/=5) close(infile) + return + + ! open failed +901 iret=901 + write(0,*) 'read_matrix: could not open file ',filename,' for input' + return +902 iret=902 + write(0,*) 'READ_MATRIX: Unexpected end of file ' + return +993 iret=993 + write(0,*) 'READ_MATRIX: Memory allocation failure' + return + end subroutine zmm_mat_read + + + + subroutine dmm_mat_write(a,mtitle,iret,eiout,filename) + use typesp + implicit none + type(d_spmat), intent(in) :: a + integer, intent(out) :: iret + character(len=*), intent(in) :: mtitle + integer, optional, intent(in) :: eiout + character(len=*), optional, intent(in) :: filename + integer :: iout + + + iret = 0 + + if (present(filename)) then + if (filename=='-') then + iout=6 + else + if (present(eiout)) then + iout = eiout + else + iout=99 + endif + open(iout,file=filename, err=901, action='WRITE') + endif + else + if (present(eiout)) then + iout = eiout + else + iout=6 + endif + endif + + call dcsprt(a%m,a%k,a%fida,a%descra,a%aspk,a%ia1,a%ia2,a%infoa,& + & mtitle,iout,iret) + + if (iout /= 6) close(iout) + + +!!$ write(outfile(9:),998) '.xrhs' +!!$ open (iout,file=outfile,status='replace',err=901) +!!$ write(iout,fmt=997) +!!$ write(iout,fmt=996) mtitle +!!$ write(iout,fmt=995) 'Number of equations ',nrow +!!$ write(iout,fmt=995) 'Number of iterations to convergence ',iter +!!$ write(iout,fmt=996) +!!$ write(iout,fmt=996) 'index comp. solution Right hand side' +!!$ write(iout,fmt=997) +!!$ do i=1, nrow +!!$ write(iout,993) i,x(i),rhs(i) +!!$993 format(i5,4(1x,e12.6)) +!!$ enddo +!!$ close(iout) +!!$ !$$$ call system('gzip -f9 '//outfile) + + return + +901 continue + iret=901 + write(0,*) 'Error while opening ',filename + return + end subroutine dmm_mat_write + + +!!$ subroutine lowerc(string,pos,len) +!!$ integer pos, len +!!$ character(len=*) string +!!$ +!!$ character(len=26), parameter :: lcase='abcdefghijklmnopqrstuvwxyz',& +!!$ & ucase='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +!!$ +!!$ do i=pos,pos+len-1 +!!$ k = index(ucase,string(i:i)) +!!$ if (k.ne.0) string(i:i) = lcase(k:k) +!!$ enddo +!!$ return +!!$ end subroutine lowerc + + subroutine desym(nrow,a,ja,ia,as,jas,ias,aux,nnzero,nzr) + implicit none + ! .. scalar arguments .. + integer :: nrow,nnzero,value,index,ptr, nzr + ! .. array arguments .. + real(kind(1.d0)) :: a(*),as(*) + integer :: ia(*),ias(*),jas(*),ja(*),aux(*) + ! .. local scalars .. + integer :: i,iaw1,iaw2,iawt,j,jpt,k,kpt,ldim,nzl,js,iret,nel,diagel + ! .. + + nel = 0 + diagel=0 + + do i=1, nnzero + as(i) = a(i) + jas(i) = ja(i) + ias(i) = ia(i) + if(ja(i) < ia(i)) then !this control avoids malfunctions in the cases + ! where the matrix is declared symmetric but all its elements are + ! explicitly stored see young1c.mtx from "Matrix Market". + ! Nominally Matrix Market only stores lower triangle. + nel = nel+1 + as(nnzero+nel) = a(i) + jas(nnzero+nel) = ia(i) + ias(nnzero+nel) = ja(i) + end if + end do + if (nel == 0) then ! Something strange is going on + write(0,*) 'Warning: DESYM did not copy anything in the upper triangle. ' + write(0,*) ' This feels wrong!!!!! ' + endif + ! .... order with key ias ... + nzr = nnzero + nel + call mrgsrt(nzr,ias,aux,iret) + if (iret == 0) call reordvn(nzr,as,ias,jas,aux) + ! .... order with key jas ... + + i = 1 + j = i + do + if (i > nzr) exit + do + if (j > nzr) exit + if (ias(j) /= ias(i)) exit + j = j+1 + enddo + nzl = j - i + call mrgsrt(nzl,jas(i),aux,iret) + if (iret.eq.0) call reordvn(nzl,as(i),ias(i),jas(i),aux) + i = j + + enddo + + return + end subroutine desym + + subroutine zdesym(nrow,a,ja,ia,as,jas,ias,aux,nnzero,nzr) + implicit none + ! .. scalar arguments .. + integer :: nrow,nnzero,value,index,ptr, nzr + ! .. array arguments .. + complex(kind(1.d0)) :: a(*),as(*) + integer :: ia(*),ias(*),jas(*),ja(*),aux(*) + ! .. local scalars .. + integer :: i,iaw1,iaw2,iawt,j,jpt,k,kpt,ldim,nzl,js,iret,nel,diagel + ! .. + + nel = 0 + diagel=0 + + do i=1, nnzero + as(i) = a(i) + jas(i) = ja(i) + ias(i) = ia(i) + if(ja(i) < ia(i)) then !this control avoids malfunctions in the cases + ! where the matrix is declared symmetric but all its elements are + ! explicitly stored see young1c.mtx from "Matrix Market". + ! Nominally Matrix Market only stores lower triangle. + nel = nel+1 + as(nnzero+nel) = a(i) + jas(nnzero+nel) = ia(i) + ias(nnzero+nel) = ja(i) + end if + end do + + ! .... order with key ias ... + nzr = nnzero + nel + call mrgsrt(nzr,ias,aux,iret) + if (iret == 0) call zreordvn(nzr,as,ias,jas,aux) + ! .... order with key jas ... + + i = 1 + j = i + do + if (i > nzr) exit + do + if (j > nzr) exit + if (ias(j) /= ias(i)) exit + j = j+1 + enddo + nzl = j - i + call mrgsrt(nzl,jas(i),aux,iret) + if (iret.eq.0) call zreordvn(nzl,as(i),ias(i),jas(i),aux) + i = j + + enddo + + return + end subroutine zdesym + +end module mmio diff --git a/test/Fileread/part_blk2.f b/test/Fileread/part_blk2.f new file mode 100644 index 00000000..6fbe6399 --- /dev/null +++ b/test/Fileread/part_blk2.f @@ -0,0 +1,56 @@ +C +C User defined function corresponding to an HPF BLOCK partition +C + SUBROUTINE PART_BLK2(IDX,N,NP,PV,NV) + + IMPLICIT NONE + + INTEGER IDX, N, NP + INTEGER NV + INTEGER PV(*) + DOUBLE PRECISION DDIFF + INTEGER IB1, IB2, IP, NB, NB1, NNB1 + + NV = 1 + NB = N/NP + NB1 = NB+1 + NNB1 = MOD(N,NP) + IF (IDX .LE. (NNB1*NB1)) THEN + PV(1) = (IDX - 1) / NB1 + ELSE + IF (NB > 0) THEN + IP = ( (IDX-NNB1*NB1) - 1)/NB + PV(1) = NNB1 + IP + ELSE + write(0,*) 'Impossible ??? ' + PV(1) = NNB1 + ENDIF + ENDIF + + RETURN + END + + + SUBROUTINE BLD_PARTBLK2(N,NP,IVG) + + INTEGER N, IVG(*),NP + INTEGER IB1, IB2, IP, NB, NB1, NNB1, I + + NB = N/NP + NB1 = NB+1 + NNB1 = MOD(N,NP) + DO I=1,N + IF (I .LE. (NNB1*NB1)) THEN + IVG(I) = (I - 1) / NB1 + ELSE + IF (NB > 0) THEN + IP = ( (I-NNB1*NB1) - 1)/NB + IVG(I) = NNB1 + IP + ELSE + write(0,*) 'Impossible ??? ' + IVG(I) = NNB1 + ENDIF + ENDIF + ENDDO + + END diff --git a/test/Fileread/part_block.f b/test/Fileread/part_block.f new file mode 100644 index 00000000..7c521a29 --- /dev/null +++ b/test/Fileread/part_block.f @@ -0,0 +1,67 @@ +C +C User defined function corresponding to an HPF BLOCK partition +C + SUBROUTINE PART_BLOCK(GLOBAL_INDX,N,NP,PV,NV) + + IMPLICIT NONE + + INTEGER GLOBAL_INDX, N, NP + INTEGER NV + INTEGER PV(*) + INTEGER DIM_BLOCK + DOUBLE PRECISION DDIFF + INTEGER IB1, IB2, IPV + + double precision PC + PARAMETER (PC=0.0D0) + + DIM_BLOCK = (N + NP - 1)/NP + NV = 1 + PV(NV) = (GLOBAL_INDX - 1) / DIM_BLOCK + + IPV = PV(1) + IB1 = IPV * DIM_BLOCK + 1 + IB2 = (IPV+1) * DIM_BLOCK + + DDIFF = DBLE(ABS(GLOBAL_INDX-IB1))/DBLE(DIM_BLOCK) + IF (DDIFF .lt. PC/2) THEN +C +C Overlap at the beginning of a block, with the previous proc +C + IF (IPV.gt.0) THEN + NV = NV + 1 + PV(NV) = IPV - 1 + ENDIF + ENDIF + + DDIFF = DBLE(ABS(GLOBAL_INDX-IB2))/DBLE(DIM_BLOCK) + IF (DDIFF .lt. PC/2) THEN +C +C Overlap at the end of a block, with the next proc +C + IF (IPV.lt.(NP-1)) THEN + NV = NV + 1 + PV(NV) = IPV + 1 + ENDIF + ENDIF + + RETURN + END + + + + SUBROUTINE BLD_PARTBLOCK(N,NP,IVG) + + INTEGER N,NP,IVG(*) + + INTEGER DIM_BLOCK,I + + + DIM_BLOCK = (N + NP - 1)/NP + DO I=1,N + IVG(I) = (I - 1) / DIM_BLOCK + ENDDO + + END + + diff --git a/test/Fileread/partgraph.f90 b/test/Fileread/partgraph.f90 new file mode 100644 index 00000000..3c53101c --- /dev/null +++ b/test/Fileread/partgraph.f90 @@ -0,0 +1,206 @@ +! +! Purpose: +! Provide a set of subroutines to define a data distribution based on +! a graph partitioning routine. +! +! Subroutines: +! +! BUILD_GRPPART(A,NPARTS): This subroutine will be called by the root +! process to build define the data distribuition mapping. +! Input parameters: +! TYPE(D_SPMAT) :: A The input matrix. The coefficients are +! ignored; only the structure is used. +! INTEGER :: NPARTS How many parts we are requiring to the +! partition utility +! +! DISTR_GRPPART(RROOT,CROOT,ICTXT): This subroutine will be called by +! all processes to distribute the information computed by the root +! process, to be used subsequently. +! +! +! PART_GRAPH : The subroutine to be passed to PSBLAS sparse library; +! uses information prepared by the previous two subroutines. +! +MODULE PARTGRAPH + public part_graph, build_grppart, distr_grppart,getv_grppart + private + INTEGER, POINTER, SAVE :: GRAPH_VECT(:) + +CONTAINS + + SUBROUTINE PART_GRAPH(GLOBAL_INDX,N,NP,PV,NV) + + INTEGER, INTENT(IN) :: GLOBAL_INDX, N, NP + INTEGER, INTENT(OUT) :: NV + INTEGER, INTENT(OUT) :: PV(*) + + IF (.NOT.ASSOCIATED(GRAPH_VECT)) THEN + WRITE(0,*) 'Fatal error in PART_GRAPH: vector GRAPH_VECT ',& + & 'not initialized' + RETURN + ENDIF + IF ((GLOBAL_INDX<1).OR.(GLOBAL_INDX > SIZE(GRAPH_VECT))) THEN + WRITE(0,*) 'Fatal error in PART_GRAPH: index GLOBAL_INDX ',& + & 'outside GRAPH_VECT bounds',global_indx,size(graph_vect) + RETURN + ENDIF + NV = 1 + PV(NV) = GRAPH_VECT(GLOBAL_INDX) + RETURN + END SUBROUTINE PART_GRAPH + + + SUBROUTINE DISTR_GRPPART(RROOT, CROOT, ICTXT) + INTEGER :: RROOT, CROOT, ICTXT + INTEGER :: N, MER, MEC, NPR, NPC + + CALL BLACS_GRIDINFO(ICTXT,NPR,NPC,MER,MEC) + + IF (.NOT.((RROOT>=0).AND.(RROOT=0).AND.(CROOT null() + end if + end subroutine getv_grppart + + + SUBROUTINE BUILD_GRPPART(N,FIDA,IA1,IA2,NPARTS) + USE TYPESP + INTEGER :: NPARTS + INTEGER :: IA1(:), IA2(:) + INTEGER :: N, I, IB, II,numflag,nedc,wgflag + CHARACTER(LEN=5) :: FIDA + INTEGER, PARAMETER :: NB=512 + REAL(KIND(1.D0)), PARAMETER :: SEED=12345.D0 + REAL(KIND(1.D0)) :: XV(NB) + integer :: iopt(10),idummy(2),jdummy(2) + interface + subroutine METIS_PartGraphRecursive(n,ixadj,iadj,ivwg,iajw,& + & wgflag,numflag,nparts,iopt,nedc,part) + integer :: n,wgflag,numflag,nparts,nedc + integer :: ixadj(*),iadj(*),ivwg(*),iajw(*),iopt(*),part(*) + end subroutine METIS_PartGraphRecursive + end interface + + +!!$ IF (ASSOCIATED(GRAPH_VECT)) THEN +!!$ DEALLOCATE(GRAPH_VECT) +!!$ ENDIF + + ALLOCATE(GRAPH_VECT(N),STAT=INFO) + + IF (INFO /= 0) THEN + WRITE(0,*) 'Fatal error in BUILD_GRPPART: memory allocation ',& + & ' failure.' + RETURN + ENDIF + IF (NPARTS.GT.1) THEN + IF (FIDA.EQ.'CSR') THEN + iopt(1) = 0 + numflag = 1 + wgflag = 0 + call METIS_PartGraphRecursive(n,ia2,ia1,idummy,jdummy,& + & wgflag,numflag,nparts,iopt,nedc,graph_vect) + write(0,*)'Edge cut from Metis ',nedc + DO I=1, N + GRAPH_VECT(I) = GRAPH_VECT(I) - 1 + ENDDO + ELSE + WRITE(0,*) 'Fatal error in BUILD_GRPPART: matrix format ',& + & ' failure. ', FIDA + RETURN + ENDIF + ELSE + DO I=1, N + GRAPH_VECT(I) = 0 + ENDDO + ENDIF + + RETURN + + END SUBROUTINE BUILD_GRPPART + + SUBROUTINE BUILD_USRPART(N,V,NPARTS) + INTEGER :: NPARTS + INTEGER :: V(:) + INTEGER :: N, I, IB, II,numflag,nedc,wgflag + CHARACTER(LEN=5) :: FIDA + + if ((n<=0) .or. (nparts<1)) then + write(0,*) 'Invalid input to BUILD_USRPART ',n,nparts + return + endif + + +!!$ IF (ASSOCIATED(GRAPH_VECT)) THEN +!!$ DEALLOCATE(GRAPH_VECT) +!!$ ENDIF + + ALLOCATE(GRAPH_VECT(N),STAT=INFO) + + IF (INFO /= 0) THEN + WRITE(0,*) 'Fatal error in BUILD_USRPART: memory allocation ',& + & ' failure.' + RETURN + ENDIF + + do i=1, n + if ((0<=v(i)).and.(v(i)AUX_B(:,1) + ELSE + write(0,*) 'Inventing an RHS ' + ALLOCATE(AUX_B(M_PROBLEM,1), STAT=IRCODE) + IF (IRCODE /= 0) THEN + WRITE(0,*) 'Memory allocation failure in TESTMM' + CALL BLACS_ABORT(ICTXT,-1) + STOP + ENDIF + B_COL_GLOB =>AUX_B(:,1) + DO I=1, M_PROBLEM + B_COL_GLOB(I) = REAL(I)*2.0/REAL(M_PROBLEM) + ENDDO + ENDIF + CALL DGEBS2D(ICTXT,'A',' ',M_PROBLEM,1,B_COL_GLOB,M_PROBLEM) + ELSE + CALL IGEBR2D(ICTXT,'A',' ',1,1,M_PROBLEM,1,0,0) + WRITE(0,*) 'Receiving AUX_B' + ALLOCATE(AUX_B(M_PROBLEM,1), STAT=IRCODE) + IF (IRCODE /= 0) THEN + WRITE(0,*) 'Memory allocation failure in TESTMM' + CALL BLACS_ABORT(ICTXT,-1) + STOP + ENDIF + B_COL_GLOB =>AUX_B(:,1) + CALL DGEBR2D(ICTXT,'A',' ',M_PROBLEM,1,B_COL_GLOB,M_PROBLEM,0,0) + END IF + + ! Switch over different partition types + IF (IPART.EQ.0) THEN + CALL BLACS_BARRIER(ICTXT,'A') + WRITE(6,*) 'Partition type: BLOCK' + CALL MATDIST(AUX_A, A, PART_BLOCK, ICTXT, & + & DESC_A,B_COL_GLOB,B_COL,FMT=AFMT) + ELSE IF (IPART.EQ.2) THEN + IF (AMROOT) THEN +!!$ WRITE(0,*) 'Call BUILD',size(aux_a%ia1),size(aux_a%ia2),np + WRITE(0,*) 'Build type: GRAPH ',aux_a%fida,& + &aux_a%m + CALL BUILD_GRPPART(AUX_A%M,AUX_A%FIDA,AUX_A%IA1,AUX_A%IA2,NP) + ENDIF + + CALL DISTR_GRPPART(0,0,ICTXT) + + CALL MATDIST(AUX_A, A, PART_GRAPH, ICTXT, & + & DESC_A,B_COL_GLOB,B_COL,FMT=AFMT) + ELSE + WRITE(6,*) 'Partition type: BLOCK' + CALL MATDIST(AUX_A, A, PART_BLOCK, ICTXT, & + & DESC_A,B_COL_GLOB,B_COL,FMT=AFMT) + END IF + + CALL F90_PSDSALL(M_PROBLEM,X_COL,IERRV,DESC_A) + X_COL(:) =0.0 + CALL F90_PSDSASB(X_COL,IERRV,DESC_A) + CALL F90_PSDSALL(M_PROBLEM,R_COL,IERRV,DESC_A) + R_COL(:) =0.0 + CALL F90_PSDSASB(R_COL,IERRV,DESC_A) + T2 = MPI_WTIME() - T1 + + CALL DGAMX2D(ICTXT, 'A', ' ', IONE, IONE, T2, IONE,& + & T1, T1, -1, -1, -1) + + IF (AMROOT) THEN + WRITE(6,*) 'Time to Read and Partition Matrix : ',T2 + END IF + + ! + ! Prepare the preconditioning matrix. Note the availability + ! of optional parameters + ! + + IF (AMROOT) WRITE(6,*) 'Preconditioner : "',PREC(1:6),'" ',PRE%PREC + + +!!$ do i=1,a%m +!!$ do j=a%ia2(i),a%ia2(i+1)-1 +!!$ write(0,*)'a ',i,a%ia1(j),a%aspk(j) +!!$ end do +!!$ end do +!!$ +!!$ write(0,*)'halo_index',desc_a%halo_index(:) +!!$ write(0,*)'ovrlap_index',desc_a%ovrlap_index(:) +!!$ write(0,*)'ovrlap_elem',desc_a%ovrlap_elem(:) + + T1 = MPI_WTIME() + + CALL PRECONDITIONER(A,PRE,DESC_A,INFO)!,'F') + TPREC = MPI_WTIME()-T1 + + + CALL DGAMX2D(ICTXT,'A',' ',IONE, IONE,TPREC,IONE,T1,T1,-1,-1,-1) + + WRITE(0,*) 'Preconditioner Time :',TPREC,' ',& + &prec,pre%prec + IF (INFO /= 0) THEN + WRITE(0,*) 'Error in preconditioner :',INFO + CALL BLACS_ABORT(ICTXT,-1) + STOP + END IF + + IPARM = 0 + RPARM = 0.D0 + CALL BLACS_BARRIER(ICTXT,'All') + T1 = MPI_WTIME() + IF (CMETHD.EQ.'BICGSTAB') Then + CALL F90_BICGSTAB(A,PRE,B_COL,X_COL,EPS,DESC_A,& + & ITMAX,ITER,ERR,IERR,ITRACE) +!!$ ELSE IF (CMETHD.EQ.'BICG') Then +!!$ CALL F90_BICG(A,IPREC,L,U,VDIAG,B_COL,X_COL,EPS,DESC_A,& +!!$ & ITMAX,ITER,ERR,IERR,ITRACE) +!!$ ELSE IF (CMETHD.EQ.'CGS') Then +!!$ CALL F90_CGS(A,IPREC,L,U,VDIAG,B_COL,X_COL,EPS,DESC_A,& +!!$ & ITMAX,ITER,ERR,IERR,ITRACE) +!!$ ELSE IF (CMETHD.EQ.'BICGSTABL') Then +!!$ CALL F90_BICGSTABL(A,IPREC,L,U,VDIAG,B_COL,X_COL,EPS,DESC_A,& +!!$ & ITMAX,ITER,ERR,IERR,ITRACE,ML) + ENDIF + CALL BLACS_BARRIER(ICTXT,'All') + T2 = MPI_WTIME() - T1 + CALL DGAMX2D(ICTXT,'A',' ',IONE, IONE,T2,IONE,T1,T1,-1,-1,-1) + call f90_psaxpby(1.d0,b_col,0.d0,r_col,desc_A) + call f90_psspmm(-1.d0,a,x_col,1.d0,r_col,desc_a) + call f90_amax(resmx,r_col,desc_a) + where (b_col /= 0.d0) + r_col = r_col/b_col + end where + call f90_amax(resmxp,r_col,desc_a) + +!!$ ITER=IPARM(5) +!!$ ERR = RPARM(2) + if (amroot) then + write(6,*) 'methd iprec : ',pre%prec + write(6,*) 'Number of iterations : ',iter + write(6,*) 'Time to Solve Matrix : ',t2 + write(6,*) 'Time per iteration : ',t2/(iter) + write(6,*) 'Error on exit : ',err + end if + + + do nc=1, ncols + call f90_psdsall(m_problem,nc,xm,ierrv,desc_a) + call f90_psdsall(m_problem,nc,ym1,ierrv,desc_a) + call f90_psdsall(m_problem,nc,ymm,ierrv,desc_a) + ym1(:,:) = 0.d0 + ymm(:,:) = 0.d0 + do j=1,nc + xm(:,j) = j + end do + call f90_psdsasb(xm,ierrv,desc_a) + call f90_psdsasb(ym1,ierrv,desc_a) + call f90_psdsasb(ymm,ierrv,desc_a) + + tlpm1 = 1.d200 + do itry=1,ntry + call blacs_barrier(ictxt,'All') + T1 = MPI_WTIME() + do i=1, nc + call f90_psspmm(1.d0,a,xm(:,i),1.d0,ym1(:,i),desc_a) + enddo + t2 = mpi_wtime()-t1 + call dgamx2d(ictxt,'a',' ',ione, ione,t2,ione,t1,t1,-1,-1,-1) + tlpm1 = min(tlpm1,t2) +!!$ write(0,*) 'Timing for loop ',nc,itry,t2 + enddo + + tlpmm = 1.d200 + do itry=1,ntry + call blacs_barrier(ictxt,'All') + T1 = MPI_WTIME() + call f90_psspmm(1.d0,a,xm,1.d0,ymm,desc_a) + t2 = mpi_wtime()-t1 + call dgamx2d(ictxt,'a',' ',ione, ione,t2,ione,t1,t1,-1,-1,-1) + tlpmm = min(tlpmm,t2) +!!$ write(0,*) 'Timing for mm ',nc,itry,t2 + enddo + +!!$ ymm = ymm - ym1 + if (nc == 1) tnc1 = tlpm1 + if (amroot) then +!!$ write(6,*) 'Size : ',ncols,size(xm,2),size(ym1,2) +!!$ write(6,*) 'Loop : ',tlpm1 +!!$ write(6,*) 'Single call : ',tlpmm + write(6,997) nc, tlpm1, tlpmm, tlpm1/(nc*tnc1),tlpmm/(nc*tnc1) +997 format(i8,4(2x,g16.10)) + end if + +!!$ write(6,*) 'maxdiff : ',maxval(ymm) + + call f90_psdsfree(xm,desc_a) + call f90_psdsfree(ymm,desc_a) + call f90_psdsfree(ym1,desc_a) + end do + + if (.false.) then + allocate(x_col_glob(m_problem),r_col_glob(m_problem),stat=ierr) + if (ierr.ne.0) then + write(0,*) 'Allocation error: no data collection' + else + call f90_psdgatherm(x_col_glob,x_col,desc_a,iroot=0) + call f90_psdgatherm(r_col_glob,r_col,desc_a,iroot=0) + if (amroot) then + write(0,*) 'Saving X on file' + write(20,*) 'Matrix: ',mtrx_file + write(20,*) 'Computed solution on ',NPROW,' processors.' + write(20,*) 'Iterations to convergence: ',iter + write(20,*) 'Error indicator (infinity norm) on exit:', & + & ' ||r||/(||A||||x||+||b||) = ',err + write(20,*) 'Max residual = ',resmx, resmxp + do i=1,m_problem + write(20,998) i,x_col_glob(i),r_col_glob(i),b_col_glob(i) + enddo + end if + end if +998 format(i8,4(2x,g20.14)) +993 format(i6,4(1x,e12.6)) + + +!!$ ! +!!$ ! Raleygh quotients for first eigenvalue +!!$ ! +!!$ CALL F90_PSDSall(M_problem,Q,ierrv,DESC_A) +!!$ CALL F90_PSDSall(M_problem,Z,ierrv,DESC_A) +!!$ CALL F90_PSDSall(M_problem,Z1,ierrv,DESC_A) +!!$ CALL F90_PSDSasb(Q,ierrv,DESC_A) +!!$ CALL F90_PSDSasb(Z,ierrv,DESC_A) +!!$ CALL F90_PSDSasb(Z1,ierrv,DESC_A) +!!$ scale = f90_psnrm2(x_col,desc_a) +!!$ scale = one/scale +!!$ call f90_psaxpby(scale,x_col,dzero,q,desc_A) +!!$ call f90_psspmm(one,a,q,dzero,z,desc_a) +!!$ do i=1, itmax +!!$ scale = f90_psnrm2(z,desc_a) +!!$ scale = one/scale +!!$ call f90_psaxpby(one,z,dzero,z1,desc_a) +!!$ call f90_psaxpby(scale,z,dzero,q,desc_a) +!!$ call f90_psspmm(one,a,q,dzero,z,desc_a) +!!$ lambda = f90_psdot(q,z,desc_A) +!!$ scale = f90_psnrm2(z,desc_A) +!!$ if (amroot) write(0,*) 'Lambda: ',i,lambda, scale +!!$ enddo +!!$ call f90_psaxpby(-one,z,one,z1,desc_a) +!!$ scale = f90_psnrm2(z1,desc_A) +!!$ if (amroot) write(0,*) 'Final check: ',i,lambda, scale +!!$ do i=1, desc_a%matrix_data(n_row_) +!!$ scale=z(i)/q(i) +!!$ write(0,*) 'Vector check: ',i,lambda, scale,abs(scale-lambda) +!!$ enddo + endif + + CALL F90_PSDSFREE(B_COL, DESC_A) + CALL F90_PSDSFREE(X_COL, DESC_A) + CALL F90_PSSPFREE(A, DESC_A) + CALL F90_PSPRECFREE(PRE,info) + CALL F90_PSDSCFREE(DESC_A,info) + CALL BLACS_GRIDEXIT(ICTXT) + CALL BLACS_EXIT(0) + + +END PROGRAM TESTMM + + + + + diff --git a/test/Fileread/zdesym.f b/test/Fileread/zdesym.f new file mode 100644 index 00000000..89d26365 --- /dev/null +++ b/test/Fileread/zdesym.f @@ -0,0 +1,154 @@ +* SUBROUTINE DESYM(NROW,A,JA,IA,AS,JAS,IAS,IAW,NNZERO) * +* * +* Purpose * +* ======= * +* Utility routine to convert from symmetric storage * +* to full format (CSR mode). * +* * +* Parameter * +* ========= * +* INPUT= * +* * +* SYMBOLIC NAME: NROW * +* POSITION: Parameter No.1 * +* ATTRIBUTES: INTEGER * +* VALUES: NROW>0 * +* DESCRIPTION: On entry NROW specifies the number of rows of the * +* input sparse matrix. The number of column of the input * +* sparse matrix mest be the same. * +* Unchanged on exit. * +* * +* SYMBOLIC NAME: A * +* POSITION: Parameter No.2 * +* ATTRIBUTES: DOUBLE PRECISION ARRAY of Dimension (NNZERO) * +* VALUES: * +* DESCRIPTION: A specifies the values of the input sparse matrix. * +* This matrix is stored in CSR mode * +* Unchanged on exit. * +* * +* SYMBOLIC NAME: JA * +* POSITION: Parameter No. 3 * +* ATTRIBUTES: INTEGER ARRAY(IA(NNZERO)) * +* VALUES: > 0 * +* DESCRIPTION: Column indices stored by rows refered to the input * +* sparse matrix. * +* Unchanged on exit. * +* * +* SYMBOLIC NAME: IA * +* POSITION: Parameter No. 4 * +* ATTRIBUTES: INTEGER ARRAY(NROW+1) * +* VALUES: >0; increasing. * +* DESCRIPTION: Row pointer array: it contains the starting * +* position of each row of A in array JA. * +* Unchanged on exit. * +* * +* SYMBOLIC NAME: IAW * +* POSITION: Parameter No. 7 * +* ATTRIBUTES: INTEGER ARRAY of Dimension (NROW+1) * +* VALUES: >0; * +* DESCRIPTION: Work Area. * +* * +* SYMBOLIC NAME: WORK * +* POSITION: Parameter No. 8 * +* ATTRIBUTES: REAL*8 ARRAY of Dimension (NROW+1) * +* VALUES: >0; * +* DESCRIPTION: Work Area. * +* * +* SYMBOLIC NAME: NNZERO * +* POSITION: Parameter No. 9 * +* ATTRIBUTES: INTEGER * +* VALUES: >0; * +* DESCRIPTION: On entry contains: the number of the non zero * +* entry of the input matrix. * +* Unchanged on exit. * +* OUTPUT== * +* * +* * +* SYMBOLIC NAME: AS * +* POSITION: Parameter No.5 * +* ATTRIBUTES: DOUBLE PRECISION ARRAY of Dimension (*) * +* VALUES: * +* DESCRIPTION: On exit A specifies the values of the output sparse * +* matrix. * +* This matrix correspondes to A rapresented in FULL-CSR * +* mode * +* * +* SYMBOLIC NAME: JAS * +* POSITION: Parameter No. 6 * +* ATTRIBUTES: INTEGER ARRAY(IAS(NROW+1)-1) * +* VALUES: > 0 * +* DESCRIPTION: Column indices stored by rows refered to the output * +* sparse matrix. * +* * +* SYMBOLIC NAME: IAS * +* POSITION: Parameter No. S * +* ATTRIBUTES: INTEGER ARRAY(NROW+1) * +* VALUES: >0; increasing. * +* DESCRIPTION: Row pointer array: it contains the starting * +* position of each row of AS in array JAS. * +***************************************************************************** + +C + SUBROUTINE ZDESYM(NROW,A,JA,IA,AS,JAS,IAS,AUX,WORK,NNZERO,PTR, + + NZR, VALUE) + IMPLICIT NONE +C .. Scalar Arguments .. + INTEGER NROW,NNZERO,VALUE,INDEX,PTR,NZR +C .. Array Arguments .. + COMPLEX*16 A(*),AS(*),WORK(*) + INTEGER IA(*),IAS(*),JAS(*),JA(*),AUX(*) +C .. Local Scalars .. + INTEGER I,IAW1,IAW2,IAWT,J,JPT,K,KPT,LDIM,NZL,JS, IRET, NEL,DIAGEL +C REAL*8 BUF +C .. + + NEL = 0 + DIAGEL=0 + + DO I=1, NNZERO + IF(JA(I).LE.IA(I)) THEN + NEL = NEL+1 + AS(I) = A(I) + JAS(I) = JA(I) + IAS(I) = IA(I) + IF(JA(I).NE.IA(I)) THEN !This control avoids malfunctions in the cases + ! where the matrix is declared symmetric but all + !his elements are explicitly stored + ! see young1c.mtx from "Matrix Market" + AS(NNZERO+I) = A(I) + JAS(NNZERO+I) = IA(I) + IAS(NNZERO+I) = JA(I) + ELSE + DIAGEL = DIAGEL+1 + END IF + END IF + END DO + +C .... Order with key IAS ... + CALL MRGSRT(2*NNZERO,IAS,AUX,IRET) + IF (IRET.EQ.0) CALL ZREORDVN(2*NNZERO,AS,IAS,JAS,AUX) +C .... Order with key JAS ... + + I = 1 + J = I + DO WHILE (I.LE.(2*NNZERO)) + DO WHILE ((IAS(J).EQ.IAS(I)).AND. + + (J.LE.2*NNZERO)) + J = J+1 + ENDDO + NZL = J - I + CALL MRGSRT(NZL,JAS(I),AUX,IRET) + IF (IRET.EQ.0) CALL ZREORDVN(NZL,AS(I),IAS(I),JAS(I), + + AUX) + I = J + ENDDO + NZR = NEL*2 - DIAGEL + PTR = 2*NNZERO-NZR+1 + + RETURN + + END + + + + diff --git a/test/Fileread/zf_sample.f90 b/test/Fileread/zf_sample.f90 new file mode 100644 index 00000000..9f09c3e9 --- /dev/null +++ b/test/Fileread/zf_sample.f90 @@ -0,0 +1,304 @@ +PROGRAM ZF_SAMPLE + USE TYPESP + USE TYPEDESC + USE F90TOOLS + USE F90PSBLAS + USE F90METHD + USE MAT_DIST + USE READ_MAT + USE PARTGRAPH + USE GETP + IMPLICIT NONE + + ! Input parameters + CHARACTER*20 :: CMETHD, PREC, MTRX_FILE, RHS_FILE + CHARACTER*80 :: CHARBUF + + DOUBLE PRECISION DDOT + EXTERNAL DDOT + INTERFACE + ! .....user passed subroutine..... + SUBROUTINE PART_BLOCK(GLOBAL_INDX,N,NP,PV,NV) + IMPLICIT NONE + INTEGER, INTENT(IN) :: GLOBAL_INDX, N, NP + INTEGER, INTENT(OUT) :: NV + INTEGER, INTENT(OUT) :: PV(*) + END SUBROUTINE PART_BLOCK + END INTERFACE ! Local variables + + INTEGER, PARAMETER :: IZERO=0, IONE=1 + CHARACTER, PARAMETER :: ORDER='R' + COMPLEX(KIND(1.D0)), POINTER,SAVE :: B_COL(:), X_COL(:), R_COL(:), & + & B_COL_GLOB(:), X_COL_GLOB(:), R_COL_GLOB(:), B_GLOB(:,:), & + &Z(:), Q(:),Z1(:) + INTEGER :: IARGC + Real(Kind(1.d0)), Parameter :: Dzero = 0.d0, One = 1.d0 + Real(Kind(1.d0)) :: MPI_WTIME, T1, T2, TPREC, R_AMAX, B_AMAX,bb(1,1),lambda,scale,resmx,resmxp + integer :: nrhs, nrow, nx1, nx2, n_row + External IARGC, MPI_WTIME + integer bsze,overlap + common/part/bsze,overlap + + ! Sparse Matrices + TYPE(Z_SPMAT) :: A, AUX_A, L, U +!!$ TYPE(D_PRECN) :: APRC + ! Dense Matrices + COMPLEX(KIND(1.D0)), POINTER :: AUX_B(:,:) , AUX1(:), AUX2(:), VDIAG(:),& + & AUX_G(:,:), AUX_X(:,:) + + ! Communications data structure + TYPE(desc_type) :: DESC_A + + ! BLACS parameters + INTEGER :: NPROW, NPCOL, ICTXT, IAM, NP, MYPROW, MYPCOL + logical :: amroot + + ! Solver paramters + INTEGER :: ITER, ITMAX, IERR, ITRACE, IRCODE, IPART,& + & IPREC, METHD, ISTOPC, ML + integer, pointer :: ierrv(:) + REAL(KIND(1.D0)) :: ERR, EPS + integer iparm(20) + real(kind(1.d0)) rparm(20) + + ! Other variables + INTEGER :: I,INFO,J + INTEGER :: INTERNAL, M,II,NNZERO + + ! common area + INTEGER M_PROBLEM, NPROC + + allocate(ierrv(6)) + ! Initialize BLACS + CALL BLACS_PINFO(IAM, NP) + CALL BLACS_GET(IZERO, IZERO, ICTXT) + + ! Rectangular Grid, Np x 1 + + CALL BLACS_GRIDINIT(ICTXT, ORDER, NP, IONE) + CALL BLACS_GRIDINFO(ICTXT, NPROW, NPCOL, MYPROW, MYPCOL) + amroot = (myprow==0).and.(mypcol==0) + ! + ! Get parameters + ! + CALL GET_PARMS(ICTXT,MTRX_FILE,RHS_FILE,CMETHD,PREC,& + & IPART,ISTOPC,ITMAX,ITRACE,ML,IPREC,EPS) + CALL BLACS_BARRIER(ICTXT,'A') + T1 = MPI_WTIME() + ! Read the input matrix to be processed and (possibly) the RHS + NRHS = 1 + IF (amroot) THEN + NULLIFY(AUX_B) + CALL ZREADMAT(MTRX_FILE, AUX_A, ICTXT) + M_PROBLEM = AUX_A%M + CALL IGEBS2D(ICTXT,'A',' ',1,1,M_PROBLEM,1) + + IF(RHS_FILE.NE.'NONE') THEN + ! Reading an RHS + CALL ZREAD_RHS(RHS_FILE,AUX_B,ICTXT) + END IF + + IF (ASSOCIATED(AUX_B).and.SIZE(AUX_B,1)==M_PROBLEM) THEN + ! If any RHS were present, broadcast the first one + write(0,*) 'Ok, got an RHS ',aux_b(m_problem,1) + B_COL_GLOB =>AUX_B(:,1) + ELSE + write(0,*) 'Inventing an RHS ' + ALLOCATE(AUX_B(M_PROBLEM,1), STAT=IRCODE) + IF (IRCODE /= 0) THEN + WRITE(0,*) 'Memory allocation failure in ZF_SAMPLE' + CALL BLACS_ABORT(ICTXT,-1) + STOP + ENDIF + write(0,*) 'Check on AUX_B ',size(aux_b,1),size(aux_b,2),m_problem + B_COL_GLOB => AUX_B(:,1) + + DO I=1, M_PROBLEM + B_COL_GLOB(I) = CMPLX(I*2.0/M_PROBLEM,0) + ENDDO + ENDIF + CALL ZGEBS2D(ICTXT,'A',' ',M_PROBLEM,1,B_COL_GLOB,M_PROBLEM) + + ELSE + CALL IGEBR2D(ICTXT,'A',' ',1,1,M_PROBLEM,1,0,0) + WRITE(0,*) 'Receiving AUX_B' + ALLOCATE(AUX_B(M_PROBLEM,1), STAT=IRCODE) + IF (IRCODE /= 0) THEN + WRITE(0,*) 'Memory allocation failure in ZF_SAMPLE' + CALL BLACS_ABORT(ICTXT,-1) + STOP + ENDIF + write(0,*) 'Check on AUX_B ',size(aux_b,1),size(aux_b,2),m_problem + B_COL_GLOB =>AUX_B(:,1) + CALL ZGEBR2D(ICTXT,'A',' ',M_PROBLEM,1,B_COL_GLOB,M_PROBLEM,0,0) + END IF + NPROC = NPROW + + ! Switch over different partition types + IF (IPART.EQ.0) THEN + CALL BLACS_BARRIER(ICTXT,'A') + WRITE(6,*) 'Partition type: BLOCK' + CALL ZMATDIST(AUX_A, A, PART_BLOCK, ICTXT, & + & DESC_A,B_COL_GLOB,B_COL) + ELSE IF (IPART.EQ.2) THEN + WRITE(6,*) amroot,' Partition type: GRAPH' + IF (amroot) THEN + CALL BUILD_GRPPART(AUX_A%M,AUX_A%FIDA,AUX_A%IA1,AUX_A%IA2,NP) + ENDIF + call blacs_barrier(ictxt,'All') + CALL DISTR_GRPPART(0,0,ICTXT) + CALL ZMATDIST(AUX_A, A, PART_GRAPH, ICTXT, & + & DESC_A,B_COL_GLOB,B_COL) + ELSE + WRITE(6,*) 'Partition type: BLOCK' + CALL ZMATDIST(AUX_A, A, PART_BLOCK, ICTXT, & + & DESC_A,B_COL_GLOB,B_COL) + END IF + + write(*,*) amroot,' Done matdist' + CALL F90_PSDSALL(M_PROBLEM,X_COL,IERRV,DESC_A) + X_COL(:) =0.0 + CALL F90_PSDSASB(X_COL,IERRV,DESC_A) + CALL F90_PSDSALL(M_PROBLEM,R_COL,IERRV,DESC_A) + R_COL(:) =0.0 + CALL F90_PSDSASB(R_COL,IERRV,DESC_A) + T2 = MPI_WTIME() - T1 + + CALL DGAMX2D(ICTXT, 'A', ' ', IONE, IONE, T2, IONE,& + & T1, T1, -1, -1, -1) + + IF (amroot) THEN + WRITE(6,*) 'Time to Read and Partition Matrix : ',T2 + END IF + + ! + ! Prepare the preconditioning matrix. Note the availability + ! of optional parameters + ! + + IF (amroot) WRITE(6,*) 'Preconditioner : "',PREC(1:6),'" ',iprec + T1 = MPI_WTIME() + CALL PRECONDITIONER(IPREC,A,L,U,VDIAG,DESC_A,INFO) + + TPREC = MPI_WTIME()-T1 + + + CALL DGAMX2D(ICTXT,'A',' ',IONE, IONE,TPREC,IONE,T1,T1,-1,-1,-1) + + WRITE(0,*) 'Preconditioner Time : ',TPREC,' ',& + &prec,iprec + IF (INFO /= 0) THEN + WRITE(0,*) 'Error in preconditioner :',INFO + CALL BLACS_ABORT(ICTXT,-1) + STOP + END IF + IPARM = 0 + RPARM = 0.D0 + write(0,*) amroot,'Starting method' + CALL BLACS_BARRIER(ICTXT,'All') + T1 = MPI_WTIME() + IF (CMETHD.EQ.'BICGSTAB') Then + CALL F90_BICGSTAB(A,IPREC,L,U,VDIAG,B_COL,X_COL,EPS,DESC_A,& + & ITMAX,ITER,ERR,IERR,ITRACE) +!!$ ELSE IF (CMETHD.EQ.'BICG') Then +!!$ CALL F90_BICG(A,IPREC,L,U,VDIAG,B_COL,X_COL,EPS,DESC_A,& +!!$ & ITMAX,ITER,ERR,IERR,ITRACE) +!!$ ELSE IF (CMETHD.EQ.'CGS') Then +!!$ CALL F90_CGS(A,IPREC,L,U,VDIAG,B_COL,X_COL,EPS,DESC_A,& +!!$ & ITMAX,ITER,ERR,IERR,ITRACE) +!!$ ELSE IF (CMETHD.EQ.'BICGSTABL') Then +!!$ CALL F90_BICGSTABL(A,IPREC,L,U,VDIAG,B_COL,X_COL,EPS,DESC_A,& +!!$ & ITMAX,ITER,ERR,IERR,ITRACE,ML) + ENDIF + CALL BLACS_BARRIER(ICTXT,'All') + T2 = MPI_WTIME() - T1 + CALL DGAMX2D(ICTXT,'A',' ',IONE, IONE,T2,IONE,T1,T1,-1,-1,-1) + call f90_psaxpby((1.d0,0.d0),b_col,(0.d0,0.d0),r_col,desc_A) + call f90_psspmm((-1.d0,0.d0),a,x_col,(1.d0,0.d0),r_col,desc_a) + call f90_amax(resmx,r_col,desc_a) + where (b_col/= 0) + r_col = r_col/b_col + end where + call f90_amax(resmxp,r_col,desc_a) + +!!$ ITER=IPARM(5) +!!$ ERR = RPARM(2) + if (amroot) then + write(6,*) 'methd iprec istopc : ',iprec, istopc + write(6,*) 'Number of iterations : ',iter + write(6,*) 'Time to Solve Matrix : ',T2 + WRITE(6,*) 'Time per iteration : ',T2/(ITER) + WRITE(6,*) 'Error on exit : ',ERR + END IF + + allocate(x_col_glob(m_problem),r_col_glob(m_problem),stat=ierr) + if (ierr.ne.0) then + write(0,*) 'Allocation error: no data collection' + else + call f90_psdgatherm(x_col_glob,x_col,desc_a,iroot=0) + call f90_psdgatherm(r_col_glob,r_col,desc_a,iroot=0) + if (amroot) then + write(0,*) 'Saving X on file' + write(20,*) 'Matrix: ',mtrx_file + write(20,*) 'Computed solution on ',NPROW,' processors.' + write(20,*) 'Iterations to convergence: ',iter + write(20,*) 'Error indicator (infinity norm) on exit:', & + & ' ||r||/(||A||||x||+||b||) = ',err + write(20,*) 'Max residual = ',resmx, resmxp + do i=1,m_problem + write(20,998) i,x_col_glob(i),r_col_glob(i),b_col_glob(i) + enddo + end if + end if +998 format(i8,4(2x,g20.14)) +993 format(i6,4(1x,e12.6)) + + +!!$ ! +!!$ ! Raleygh quotients for first eigenvalue +!!$ ! +!!$ CALL F90_PSDSall(M_problem,Q,ierrv,DESC_A) +!!$ CALL F90_PSDSall(M_problem,Z,ierrv,DESC_A) +!!$ CALL F90_PSDSall(M_problem,Z1,ierrv,DESC_A) +!!$ CALL F90_PSDSasb(Q,ierrv,DESC_A) +!!$ CALL F90_PSDSasb(Z,ierrv,DESC_A) +!!$ CALL F90_PSDSasb(Z1,ierrv,DESC_A) +!!$ scale = f90_psnrm2(x_col,desc_a) +!!$ scale = one/scale +!!$ call f90_psaxpby(scale,x_col,dzero,q,desc_A) +!!$ call f90_psspmm(one,a,q,dzero,z,desc_a) +!!$ do i=1, itmax +!!$ scale = f90_psnrm2(z,desc_a) +!!$ scale = one/scale +!!$ call f90_psaxpby(one,z,dzero,z1,desc_a) +!!$ call f90_psaxpby(scale,z,dzero,q,desc_a) +!!$ call f90_psspmm(one,a,q,dzero,z,desc_a) +!!$ lambda = f90_psdot(q,z,desc_A) +!!$ scale = f90_psnrm2(z,desc_A) +!!$ if (amroot) write(0,*) 'Lambda: ',i,lambda, scale +!!$ enddo +!!$ call f90_psaxpby(-one,z,one,z1,desc_a) +!!$ scale = f90_psnrm2(z1,desc_A) +!!$ if (amroot) write(0,*) 'Final check: ',i,lambda, scale +!!$ do i=1, desc_a%matrix_data(n_row_) +!!$ scale=z(i)/q(i) +!!$ write(0,*) 'Vector check: ',i,lambda, scale,abs(scale-lambda) +!!$ enddo + + CALL F90_PSDSFREE(B_COL, DESC_A) + CALL F90_PSDSFREE(X_COL, DESC_A) + CALL F90_PSSPFREE(A, DESC_A) + IF (IPREC.GE.2) THEN + CALL F90_PSSPFREE(L, DESC_A) + CALL F90_PSSPFREE(U, DESC_A) + END IF + CALL F90_PSDSCFREE(DESC_A,info) + CALL BLACS_GRIDEXIT(ICTXT) + CALL BLACS_EXIT(0) + +END PROGRAM ZF_SAMPLE + + + + + diff --git a/test/pargen/Makefile b/test/pargen/Makefile new file mode 100644 index 00000000..2d6a0a10 --- /dev/null +++ b/test/pargen/Makefile @@ -0,0 +1,73 @@ +include ../../Make.inc +# +# Libraries used +# +LIBDIR=../../LIB/ +PSBLAS_LIB= -L$(LIBDIR) -lpsblas +SPARKER_LIB= -L$(LIBDIR) -lsparker +ZSPARKER_LIB= -L$(LIBDIR) +BLAS90LIB=-L$(LIBDIR) -lpsblas90 +METHD90LIB=-L$(LIBDIR) +TOOLS90LIB=-L$(LIBDIR) + +# +# Compilers and such +# +CCOPT= -g +INCDIRS=-I$(LIBDIR) + + +EXEDIR=./RUNS + + +LINKOPT=$(F90COPT) + +ppde90log: ppde90log.o part_block.o + $(F90LINK) $(LINKOPT) ppde90log.o part_block.o -o ppde90log\ + $(METHD90LIB) $(TOOLS90LIB) $(BLAS90LIB) \ + $(PSBLAS_LIB) $(SPARKER_LIB) $(BLAS)\ + $(BLACS) -llmpe -lmpe + /bin/mv ppde90log $(EXEDIR) + +ppde90: ppde90.o part_block.o + $(F90LINK) $(LINKOPT) ppde90.o part_block.o -o ppde90\ + $(METHD90LIB) $(TOOLS90LIB) $(BLAS90LIB) \ + $(PSBLAS_LIB) $(SPARKER_LIB) $(BLAS)\ + $(BLACS) + /bin/mv ppde90 $(EXEDIR) +ppde90s: ppde90s.o part_block.o + $(F90LINK) $(LINKOPT) ppde90s.o part_block.o -o ppde90s\ + $(METHD90LIB) $(TOOLS90LIB) $(BLAS90LIB) \ + $(PSBLAS_LIB) $(SPARKER_LIB) $(BLAS)\ + $(BLACS) + /bin/mv ppde90s $(EXEDIR) + +ppde90.o: $(MODS) + +pp2d: pp2d.o part_block.o + $(F90LINK) $(LINKOPT) pp2d.o part_block.o -o pp2d\ + $(METHD90LIB) $(TOOLS90LIB) $(BLAS90LIB) \ + $(METHD_LIB) $(PSBLAS_LIB) $(SPARKER_LIB) $(BLAS)\ + $(BLACS) + /bin/mv pp2d $(EXEDIR) + +pp2ds: pp2ds.o part_block.o + $(F90LINK) $(LINKOPT) pp2ds.o part_block.o -o pp2ds\ + $(METHD90LIB) $(TOOLS90LIB) $(BLAS90LIB) \ + $(PSBLAS_LIB) $(SPARKER_LIB) $(BLAS)\ + $(BLACS) + /bin/mv pp2ds $(EXEDIR) + +.f90.o: + $(MPF90) $(F90COPT) $(INCDIRS) -c $< + + +clean: + /bin/rm -f ppde90.o pp2d.o part_block.o $(EXEDIR)/ppde90 $(EXEDIR)/pp2d +verycleanlib: + (cd ../..; make veryclean) +lib: + (cd ../../; make lib) + + + diff --git a/test/pargen/RUNS/Makefile b/test/pargen/RUNS/Makefile new file mode 100644 index 00000000..d8fa43ac --- /dev/null +++ b/test/pargen/RUNS/Makefile @@ -0,0 +1,12 @@ +lib: + (cd ..; $(MAKE) lib) +clean: + (cd ..; $(MAKE) clean) +verycleanlib: + (cd ..; $(MAKE) verycleanlib) +ppde90: + (cd ..; $(MAKE) ppde90) +pp2d: + (cd ..; $(MAKE) pp2d) + +.PHONY: pp2d ppde90 diff --git a/test/pargen/RUNS/mach b/test/pargen/RUNS/mach new file mode 100644 index 00000000..60432829 --- /dev/null +++ b/test/pargen/RUNS/mach @@ -0,0 +1,8 @@ +localhost +localhost +localhost +localhost +localhost +localhost +localhost +localhost diff --git a/test/pargen/RUNS/ppde.inp b/test/pargen/RUNS/ppde.inp new file mode 100644 index 00000000..9145f8e2 --- /dev/null +++ b/test/pargen/RUNS/ppde.inp @@ -0,0 +1,11 @@ +7 Number of entries below this +BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL +ILU Preconditioner ILU DIAGSC NONE +SSR A Storage format CSR COO JAD +20 Domain size (acutal sistem is this**3) +1 Stopping criterion +080 MAXIT +00 ITRACE +02 ML + + diff --git a/test/pargen/part_block.f b/test/pargen/part_block.f new file mode 100644 index 00000000..64a3298f --- /dev/null +++ b/test/pargen/part_block.f @@ -0,0 +1,50 @@ +C +C User defined function corresponding to an HPF BLOCK partition +C + SUBROUTINE PART_BLOCK(GLOBAL_INDX,N,NP,PV,NV) + + IMPLICIT NONE + + INTEGER GLOBAL_INDX, N, NP + INTEGER NV + INTEGER PV(*) + INTEGER DIM_BLOCK + DOUBLE PRECISION DDIFF + INTEGER IB1, IB2, IPV + + double precision PC + PARAMETER (PC=0.0D0) + + DIM_BLOCK = (N + NP - 1)/NP + NV = 1 + PV(NV) = (GLOBAL_INDX - 1) / DIM_BLOCK + + IPV = PV(1) + IB1 = IPV * DIM_BLOCK + 1 + IB2 = (IPV+1) * DIM_BLOCK + + DDIFF = DBLE(ABS(GLOBAL_INDX-IB1))/DBLE(DIM_BLOCK) + IF (DDIFF .LT. PC/2) THEN +C +C Overlap at the beginning of a block, with the previous proc +C + IF (IPV.GT.0) THEN + NV = NV + 1 + PV(NV) = IPV - 1 + ENDIF + ENDIF + + DDIFF = DBLE(ABS(GLOBAL_INDX-IB2))/DBLE(DIM_BLOCK) + IF (DDIFF .LT. PC/2) THEN +C +C Overlap at the end of a block, with the next proc +C + IF (IPV.LT.(NP-1)) THEN + NV = NV + 1 + PV(NV) = IPV + 1 + ENDIF + ENDIF + + RETURN + END + diff --git a/test/pargen/pp2d.f90 b/test/pargen/pp2d.f90 new file mode 100644 index 00000000..207797be --- /dev/null +++ b/test/pargen/pp2d.f90 @@ -0,0 +1,640 @@ +! +! This sample program shows how to build and solve a sparse linear +! +! The program solves a linear system based on the partial differential +! equation +! +! +! +! the equation generated is: +! b1 d d (u) b2 d d (u) a1 d (u)) a2 d (u))) +! - ------ - ------ + ----- + ------ + a3 u = 0 +! dx dx dy dy dx dy +! +! +! with Dirichlet boundary conditions on the unit cube +! +! 0<=x,y<=1 +! +! The equation is discretized with finite differences and uniform stepsize; +! the resulting discrete equation is +! +! ( u(x,y)(2b1+2b2+a1+a2)+u(x-1,y)(-b1-a1)+u(x,y-1)(-b2-a2)+ +! -u(x+1,y)b1-u(x,y+1)b2)*(1/h**2) +! +! Example taken from: C.T.Kelley +! Iterative Methods for Linear and Nonlinear Equations +! SIAM 1995 +! +! +! In this sample program the index space of the discretized +! computational domain is first numbered sequentially in a standard way, +! then the corresponding vector is distributed according to an HPF BLOCK +! distribution directive. +! +! Boundary conditions are set in a very simple way, by adding +! equations of the form +! +! u(x,y) = rhs(x,y) +! +Program PP2D + USE F90SPARSE + Implicit none + + interface + !.....user passed subroutine..... + subroutine part_block(glob_index,n,np,pv,nv) + INTEGER, INTENT(IN) :: GLOB_INDEX, N, NP + INTEGER, INTENT(OUT) :: NV + INTEGER, INTENT(OUT) :: PV(*) + end subroutine part_block + end interface + ! input parameters + Character :: CMETHD*10, PREC*10, AFMT*5 + Integer :: IDIM, IRET + + ! Miscellaneous + Integer, Parameter :: IZERO=0, IONE=1 + Character, PARAMETER :: ORDER='R' + INTEGER :: IARGC,CONVERT_DESCR,dim, CHECK_DESCR + REAL(KIND(1.D0)), PARAMETER :: DZERO = 0.D0, ONE = 1.D0 + REAL(KIND(1.D0)) :: MPI_WTIME, T1, T2, TPREC, TSOLVE, T3, T4 + EXTERNAL MPI_WTIME + + ! Sparse Matrix and preconditioner + TYPE(D_SPMAT) :: A, L, U, H + TYPE(D_PREC) :: PRE + ! Descriptor + TYPE(desc_type) :: DESC_A, DESC_A_OUT + ! Dense Matrices + REAL(KIND(1.d0)), POINTER :: B(:), X(:), D(:),LD(:) + INTEGER, pointer :: WORK(:) + ! BLACS parameters + INTEGER :: nprow, npcol, icontxt, iam, np, myprow, mypcol + + ! Solver parameters + INTEGER :: ITER, ITMAX,IERR,ITRACE, METHD,IPREC, ISTOPC,& + & IPARM(20), ML + REAL(KIND(1.D0)) :: ERR, EPS, RPARM(20) + + ! Other variables + INTEGER :: I,INFO + INTEGER :: INTERNAL, M,II + + ! Initialize BLACS + CALL BLACS_PINFO(IAM, NP) + CALL BLACS_GET(IZERO, IZERO, ICONTXT) + + ! Rectangular Grid, P x 1 + + CALL BLACS_GRIDINIT(ICONTXT, ORDER, NP, IONE) + CALL BLACS_GRIDINFO(ICONTXT, NPROW, NPCOL, MYPROW, MYPCOL) + + ! + ! Get parameters + ! + CALL GET_PARMS(ICONTXT,CMETHD,PREC,AFMT,IDIM,ISTOPC,ITMAX,ITRACE,ML) + + ! + ! Allocate and fill in the coefficient matrix, RHS and initial guess + ! + + CALL BLACS_BARRIER(ICONTXT,'All') + T1 = MPI_WTIME() + CALL CREATE_MATRIX(IDIM,A,B,X,DESC_A,PART_BLOCK,ICONTXT,AFMT) + T2 = MPI_WTIME() - T1 + + DIM=SIZE(A%ASPK) + + ALLOCATE(H%ASPK(DIM),H%IA1(DIM),H%IA2(DIM),H%PL(SIZE(A%PL)),& + & H%PL(SIZE(A%PL)),D(SIZE(A%PL)),& + & DESC_A_OUT%MATRIX_DATA(SIZE(DESC_A%MATRIX_DATA)),& + & DESC_A_OUT%HALO_INDEX(SIZE(DESC_A%HALO_INDEX)),& + & DESC_A_OUT%OVRLAP_INDEX(SIZE(DESC_A%OVRLAP_INDEX)),& + & DESC_A_OUT%OVRLAP_ELEM(SIZE(DESC_A%OVRLAP_ELEM)),& + & DESC_A_OUT%LOC_TO_GLOB(SIZE(DESC_A%LOC_TO_GLOB)),& + & DESC_A_OUT%GLOB_TO_LOC(SIZE(DESC_A%GLOB_TO_LOC)), WORK(1024)) + check_descr=15 +! work(5)=9 +!!$ WRITE(0,*)'CALLING VERIFY' +!!$ CALL F90_PSVERIFY(D,A,DESC_A,CHECK_DESCR,CONVERT_DESCR,H,& +!!$ & DESC_A_OUT,WORK) +!!$ WRITE(0,*)'VERIFY DONE',CONVERT_DESCR + + deallocate(work) + + CALL DGAMX2D(ICONTXT,'A',' ',IONE, IONE,T2,IONE,T1,T1,-1,-1,-1) + IF (IAM.EQ.0) Write(6,*) 'Matrix creation Time : ',T2 + + + ! + ! Prepare the preconditioner. + ! + SELECT CASE (PREC) + CASE ('SCHW') + IPREC = 3 + CASE ('ILU') + IPREC = 2 + CASE ('DIAGSC') + IPREC = 1 + CASE ('NONE') + IPREC = 0 + CASE DEFAULT + WRITE(0,*) 'Unknown preconditioner' + CALL BLACS_ABORT(ICONTXT,-1) + END SELECT + pre%prec=iprec + CALL BLACS_BARRIER(ICONTXT,'All') + T1 = MPI_WTIME() + CALL PRECONDITIONER(A,PRE,DESC_A,IRET) +!!$ CALL PRECONDITIONER(IPREC,A,L,U,D,DESC_A,IRET) + TPREC = MPI_WTIME()-T1 + + CALL DGAMX2D(icontxt,'A',' ',IONE, IONE,TPREC,IONE,t1,t1,-1,-1,-1) + + IF (IAM.EQ.0) WRITE(6,*) 'Preconditioner Time : ',TPREC + + IF (IRET.NE.0) THEN + WRITE(0,*) 'Error on preconditioner',IRET + CALL BLACS_ABORT(ICONTXT,-1) + STOP + END IF + + ! + ! Iterative method parameters + ! + write(*,*) 'Calling Iterative method', size(b),ml + CALL BLACS_BARRIER(ICONTXT,'All') + T1 = MPI_WTIME() + EPS = 1.D-9 + IF (CMETHD.EQ.'BICGSTAB') THEN + CALL F90_BICGSTAB(A,PRE,B,X,EPS,DESC_A,& + & ITMAX,ITER,ERR,IERR,ITRACE) +!!$ ELSE IF (CMETHD.EQ.'BICG') THEN +!!$ CALL F90_BICG(A,PRE,B,X,EPS,DESC_A,& +!!$ & ITMAX,ITER,ERR,IERR,ITRACE) +!!$ ELSE IF (CMETHD.EQ.'CGS') THEN +!!$ CALL F90_CGS(A,PRE,B,X,EPS,DESC_A,& +!!$ & ITMAX,ITER,ERR,IERR,ITRACE) +!!$ ELSE IF (CMETHD.EQ.'BICGSTABL') THEN +!!$ CALL F90_BICGSTABL(A,PRE,B,X,EPS,DESC_A,& +!!$ & ITMAX,ITER,ERR,IERR,ITRACE,ML) + ELSE + write(0,*) 'Unknown method ',cmethd + end IF + + CALL BLACS_BARRIER(ICONTXT,'All') + T2 = MPI_WTIME() - T1 + CALL DGAMX2D(ICONTXT,'A',' ',IONE, IONE,T2,IONE,T1,T1,-1,-1,-1) + + IF (IAM.EQ.0) THEN + WRITE(6,*) 'Time to Solve Matrix : ',T2 + WRITE(6,*) 'Time per iteration : ',T2/ITER + WRITE(6,*) 'Number of iterations : ',ITER + WRITE(6,*) 'Error on exit : ',ERR + WRITE(6,*) 'INFO on exit : ',IERR + END IF + + ! + ! Cleanup storage and exit + ! + CALL F90_PSDSFREE(B,DESC_A) + CALL F90_PSDSFREE(X,DESC_A) +!!$ CALL F90_PSDSFREE(D,DESC_A) + + CALL F90_PSSPFREE(A,DESC_A) +!!$ CALL F90_PSSPFREE(L,DESC_A) +!!$ CALL F90_PSSPFREE(U,DESC_A) + CALL F90_PSDSCFREE(DESC_A,info) + + CALL BLACS_GRIDEXIT(ICONTXT) + CALL BLACS_EXIT(0) + + STOP + +CONTAINS + ! + ! Get iteration parameters from the command line + ! + SUBROUTINE GET_PARMS(ICONTXT,CMETHD,PREC,AFMT,IDIM,ISTOPC,ITMAX,ITRACE,ML) + integer :: icontxt + Character :: CMETHD*10, PREC*10, AFMT*5 + Integer :: IDIM, IRET, ISTOPC,ITMAX,ITRACE,ML + Character*40 :: CHARBUF + INTEGER :: IARGC, NPROW, NPCOL, MYPROW, MYPCOL + EXTERNAL IARGC + INTEGER :: INTBUF(10), IP + + CALL BLACS_GRIDINFO(ICONTXT, NPROW, NPCOL, MYPROW, MYPCOL) + + IF (MYPROW==0) THEN + READ(*,*) IP + IF (IP.GE.3) THEN + READ(*,*) CMETHD + READ(*,*) PREC + READ(*,*) AFMT + + ! Convert strings in array + DO I = 1, LEN(CMETHD) + INTBUF(I) = IACHAR(CMETHD(I:I)) + END DO + ! Broadcast parameters to all processors + CALL IGEBS2D(ICONTXT,'ALL',' ',10,1,INTBUF,10) + + DO I = 1, LEN(PREC) + INTBUF(I) = IACHAR(PREC(I:I)) + END DO + ! Broadcast parameters to all processors + CALL IGEBS2D(ICONTXT,'ALL',' ',10,1,INTBUF,10) + + DO I = 1, LEN(AFMT) + INTBUF(I) = IACHAR(AFMT(I:I)) + END DO + ! Broadcast parameters to all processors + CALL IGEBS2D(ICONTXT,'ALL',' ',10,1,INTBUF,10) + + READ(*,*) IDIM + IF (IP.GE.4) THEN + READ(*,*) ISTOPC + ELSE + ISTOPC=1 + ENDIF + IF (IP.GE.5) THEN + READ(*,*) ITMAX + ELSE + ITMAX=500 + ENDIF + IF (IP.GE.6) THEN + READ(*,*) ITRACE + ELSE + ITRACE=-1 + ENDIF + IF (IP.GE.7) THEN + READ(*,*) ML + ELSE + ML=1 + ENDIF + ! Broadcast parameters to all processors + + INTBUF(1) = IDIM + INTBUF(2) = ISTOPC + INTBUF(3) = ITMAX + INTBUF(4) = ITRACE + INTBUF(5) = ML + CALL IGEBS2D(ICONTXT,'ALL',' ',5,1,INTBUF,5) + + WRITE(6,*)'Solving matrix: ELL1' + WRITE(6,*)'on grid',IDIM,'x',IDIM,'x',IDIM + WRITE(6,*)' with BLOCK data distribution, NP=',Np,& + & ' Preconditioner=',PREC,& + & ' Iterative methd=',CMETHD + ELSE + ! Wrong number of parameter, print an error message and exit + CALL PR_USAGE(0) + CALL BLACS_ABORT(ICONTXT,-1) + STOP 1 + ENDIF + ELSE + ! Receive Parameters + CALL IGEBR2D(ICONTXT,'ALL',' ',10,1,INTBUF,10,0,0) + DO I = 1, 10 + CMETHD(I:I) = ACHAR(INTBUF(I)) + END DO + CALL IGEBR2D(ICONTXT,'ALL',' ',10,1,INTBUF,10,0,0) + DO I = 1, 10 + PREC(I:I) = ACHAR(INTBUF(I)) + END DO + CALL IGEBR2D(ICONTXT,'ALL',' ',10,1,INTBUF,10,0,0) + DO I = 1, 5 + AFMT(I:I) = ACHAR(INTBUF(I)) + END DO + CALL IGEBR2D(ICONTXT,'ALL',' ',5,1,INTBUF,5,0,0) + IDIM = INTBUF(1) + ISTOPC = INTBUF(2) + ITMAX = INTBUF(3) + ITRACE = INTBUF(4) + ML = INTBUF(5) + END IF + RETURN + + END SUBROUTINE GET_PARMS + ! + ! Print an error message + ! + SUBROUTINE PR_USAGE(IOUT) + INTEGER :: IOUT + WRITE(IOUT,*)'Incorrect parameter(s) found' + WRITE(IOUT,*)' Usage: pde90 methd prec dim & + &[istop itmax itrace]' + WRITE(IOUT,*)' Where:' + WRITE(IOUT,*)' methd: CGSTAB TFQMR CGS' + WRITE(IOUT,*)' prec : ILU DIAGSC NONE' + WRITE(IOUT,*)' dim number of points along each axis' + WRITE(IOUT,*)' the size of the resulting linear ' + WRITE(IOUT,*)' system is dim**3' + WRITE(IOUT,*)' istop Stopping criterion 1, 2 or 3 [1] ' + WRITE(IOUT,*)' itmax Maximum number of iterations [500] ' + WRITE(IOUT,*)' itrace 0 (no tracing, default) or ' + WRITE(IOUT,*)' >= 0 do tracing every ITRACE' + WRITE(IOUT,*)' iterations ' + END SUBROUTINE PR_USAGE + +! +! Subroutine to allocate and fill in the coefficient matrix and +! the RHS. +! + SUBROUTINE CREATE_MATRIX(IDIM,A,B,T,DESC_A,PARTS,ICONTXT,AFMT) + ! + ! Discretize the partial diferential equation + ! + ! b1 dd(u) b2 dd(u) a1 d(u) a2 d(u) + ! - ------ - ------ - ----- - ------ + a4 u + ! dxdx dydy dx dy + ! + ! = 0 + ! + ! boundary condition: Dirichlet + ! 0< x,y<1 + ! + ! u(x,y)(2b1+2b2+a1+a2)+u(x-1,y)(-b1-a1)+u(x,y-1)(-b2-a2)+ + ! -u(x+1,y,z)b1-u(x,y+1,z)b2 + + USE TYPESP + USE TYPEDESC + USE F90TOOLS + Implicit None + INTEGER :: IDIM +!!$ external parts + integer, parameter :: nbmax=10 + Real(Kind(1.D0)),Pointer :: B(:),T(:) + Type (desc_type) :: DESC_A + Integer :: ICONTXT + INTERFACE + ! .....user passed subroutine..... + SUBROUTINE PARTS(GLOBAL_INDX,N,NP,PV,NV) + INTEGER, INTENT(IN) :: GLOBAL_INDX, N, NP + INTEGER, INTENT(OUT) :: NV, PV(*) + END SUBROUTINE PARTS + END INTERFACE ! Local variables + Type(D_SPMAT) :: A + Real(Kind(1.d0)) :: ZT(NBMAX),GLOB_X,GLOB_Y,GLOB_Z + Integer :: M,N,NNZ,GLOB_ROW,J + Type (D_SPMAT) :: ROW_MAT + Integer :: X,Y,Z,COUNTER,IA,I,INDX_OWNER + INTEGER :: NPROW,NPCOL,MYPROW,MYPCOL + Integer :: ELEMENT + INTEGER :: INFO, NV, INV + INTEGER, ALLOCATABLE :: PRV(:) + INTEGER, pointer :: ierrv(:) + Real(Kind(1.d0)), pointer :: DWORK(:) + INTEGER,POINTER :: IWORK(:) + character :: afmt*5 + ! deltah dimension of each grid cell + ! deltat discretization time + Real(Kind(1.D0)) :: DELTAH + Real(Kind(1.d0)),Parameter :: RHS=0.d0,ONE=1.d0,ZERO=0.d0 + Real(Kind(1.d0)) :: MPI_WTIME, T1, T2, T3, TINS + Real(Kind(1.d0)) :: a1, a2, a3, a4, b1, b2, b3 + external mpi_wtime,a1, a2, a3, a4, b1, b2, b3 + integer :: nb, ir1, ir2, ipr + logical :: own + ! common area + + + CALL BLACS_GRIDINFO(ICONTXT, NPROW, NPCOL, MYPROW, MYPCOL) + + DELTAH = 1.D0/(IDIM-1) + + ! Initialize array descriptor and sparse matrix storage. Provide an + ! estimate of the number of non zeroes + CALL SETERR(2) + allocate(ierrv(6)) + + ierrv(:) = 0 + M = IDIM*IDIM + N = M + NNZ = ((N*6)/(NPROW*NPCOL)) + write(*,*) 'Size: n ',n + Call F90_PSDSCALL(N,N,PARTS,ICONTXT,IERRV,DESC_A) + write(*,*) 'Allocating A : nnz',nnz + Call F90_PSSPALL(A,IERRV,DESC_A,NNZ=NNZ) + ! Define RHS from boundary conditions; also build initial guess + write(*,*) 'Allocating B' + Call F90_PSDSALL(N,B,IERRV,DESC_A) + write(*,*) 'Allocating T' + Call F90_PSDSALL(N,T,IERRV,DESC_A) + + ! We build an auxiliary matrix consisting of one row at a + ! time; just a small matrix. Might be extended to generate + ! a bunch of rows per call. + ! + ROW_MAT%DESCRA(1:1) = 'G' + ROW_MAT%FIDA = 'CSR' + write(*,*) 'Allocating ROW_MAT',20*nbmax + ALLOCATE(ROW_MAT%ASPK(20*nbmax),ROW_MAT%IA1(20*nbmax),& + &ROW_MAT%IA2(20*nbmax),PRV(NPROW),stat=info) + if (info.ne.0 ) then + write(*,*) 'Memory allocation error' + call blacs_abort(icontxt,-1) + endif + + TINS = 0.D0 + CALL BLACS_BARRIER(ICONTXT,'ALL') + T1 = MPI_WTIME() + + ! Loop over rows belonging to current process in a BLOCK + ! distribution. + + Z=0 + ROW_MAT%IA2(1)=1 + DO GLOB_ROW = 1, N + CALL PARTS(GLOB_ROW,N,NPROW,PRV,NV) + DO INV = 1, NV + INDX_OWNER = PRV(INV) + IF (INDX_OWNER == MYPROW) THEN + ! Local matrix pointer + ELEMENT=1 + ! Compute gridpoint Coordinates + IF (MOD(GLOB_ROW,(IDIM)).EQ.0) THEN + X = GLOB_ROW/(IDIM) + ELSE + X = GLOB_ROW/(IDIM)+1 + ENDIF + Y = GLOB_ROW-(X-1)*IDIM + ! GLOB_X, GLOB_Y, GLOB_X coordinates + GLOB_X=X*DELTAH + GLOB_Y=Y*DELTAH + GLOB_Z=Z*DELTAH + + + ! Check on boundary points + IF (X.EQ.1) THEN + ROW_MAT%ASPK(ELEMENT)=ONE + ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM+(Y) + ELEMENT=ELEMENT+1 + ELSE IF (Y.EQ.1) THEN + ROW_MAT%ASPK(ELEMENT)=ONE + ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM+(Y) + ELEMENT=ELEMENT+1 + ELSE IF (X.EQ.IDIM) THEN + ROW_MAT%ASPK(ELEMENT)=ONE + ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM+(Y) + ELEMENT=ELEMENT+1 + ELSE IF (Y.EQ.IDIM) THEN + ROW_MAT%ASPK(ELEMENT)=ONE + ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM+(Y) + ELEMENT=ELEMENT+1 + ELSE + ! Internal point: build discretization + ! + ! Term depending on (x-1,y) + ! + ROW_MAT%ASPK(ELEMENT)=-B1(GLOB_X,GLOB_Y,GLOB_Z)& + & -A1(GLOB_X,GLOB_Y,GLOB_Z) + ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& + & DELTAH) + ROW_MAT%IA1(ELEMENT)=(X-2)*IDIM+(Y) + ELEMENT=ELEMENT+1 + ! Term depending on (x,y-1,z) + ROW_MAT%ASPK(ELEMENT)=-B2(GLOB_X,GLOB_Y,GLOB_Z)& + & -A2(GLOB_X,GLOB_Y,GLOB_Z) + ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& + & DELTAH) + ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM+(Y-1) + ELEMENT=ELEMENT+1 +!!$ ! Term depending on (x,y,z-1) +!!$ ROW_MAT%ASPK(ELEMENT)=-B3(GLOB_X,GLOB_Y,GLOB_Z)& +!!$ & -A3(GLOB_X,GLOB_Y,GLOB_Z) +!!$ ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& +!!$ & DELTAH) +!!$ ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM*IDIM+(Y-1)*IDIM+(Z-1) +!!$ ELEMENT=ELEMENT+1 + ! Term depending on (x,y,z) + ROW_MAT%ASPK(ELEMENT)=2*B1(GLOB_X,GLOB_Y,GLOB_Z)& + & +2*B2(GLOB_X,GLOB_Y,GLOB_Z)& + & +A1(GLOB_X,GLOB_Y,GLOB_Z)& + & +A2(GLOB_X,GLOB_Y,GLOB_Z) + ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& + & DELTAH) + ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM+(Y) + ELEMENT=ELEMENT+1 +!!$ ! Term depending on (x,y,z+1) +!!$ ROW_MAT%ASPK(ELEMENT)=-B1(GLOB_X,GLOB_Y,GLOB_Z) +!!$ ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& +!!$ & DELTAH) +!!$ ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM*IDIM+(Y-1)*IDIM+(Z+1) +!!$ ELEMENT=ELEMENT+1 +!!$ ! Term depending on (x,y+1,z) + ROW_MAT%ASPK(ELEMENT)=-B2(GLOB_X,GLOB_Y,GLOB_Z) + ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& + & DELTAH) + ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM+(Y+1) + ELEMENT=ELEMENT+1 + ! Term depending on (x+1,y,z) + ROW_MAT%ASPK(ELEMENT)=-B3(GLOB_X,GLOB_Y,GLOB_Z) + ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& + & DELTAH) + ROW_MAT%IA1(ELEMENT)=(X)*IDIM+(Y) + ELEMENT=ELEMENT+1 + ENDIF + ROW_MAT%M=1 + ROW_MAT%K=N + ROW_MAT%IA2(2)=ELEMENT + ! IA== GLOBAL ROW INDEX + IA=GLOB_ROW +!!$ IA=(X-1)*IDIM*IDIM+(Y-1)*IDIM+(Z) +!!$ write(0,*) 'Inserting row ',ia,' On proc',myprow + T3 = MPI_WTIME() + CALL F90_PSSPINS(A,IA,1,ROW_MAT,IERRV,DESC_A) + if (ierrv(1).ne.0) then + write(0,*) 'On row ',ia,' IERRV:',ierrv(:) + endif + TINS = TINS + (MPI_WTIME()-T3) + ! Build RHS + IF (X==1) THEN + GLOB_Y=(Y-IDIM/2)*DELTAH + GLOB_Z=(Z-IDIM/2)*DELTAH + ZT(1) = EXP(-GLOB_Y**2-GLOB_Z**2) + ELSE IF ((Y==1).OR.(Y==IDIM).OR.(Z==1).OR.(Z==IDIM)) THEN + GLOB_X=3*(X-1)*DELTAH + GLOB_Y=(Y-IDIM/2)*DELTAH + GLOB_Z=(Z-IDIM/2)*DELTAH + ZT(1) = EXP(-GLOB_Y**2-GLOB_Z**2)*EXP(-GLOB_X) + ELSE + ZT(1) = 0.D0 + ENDIF + CALL F90_PSDSINS(1,B,IA,ZT(1:1),IERRV,DESC_A) + ZT(1)=0.D0 + CALL F90_PSDSINS(1,T,IA,ZT(1:1),IERRV,DESC_A) + END IF + END DO + END DO + + CALL BLACS_BARRIER(ICONTXT,'ALL') + T2 = MPI_WTIME() + + WRITE(*,*) ' pspins time',TINS + WRITE(*,*) ' Insert time',(T2-T1) + + DEALLOCATE(ROW_MAT%ASPK,ROW_MAT%IA1,ROW_MAT%IA2) + + write(*,*) 'Calling SPASB' + CALL BLACS_BARRIER(ICONTXT,'ALL') + T1 = MPI_WTIME() + + CALL F90_PSSPASB(A,IERRV,DESC_A,AFMT=AFMT) + + CALL BLACS_BARRIER(ICONTXT,'ALL') + T2 = MPI_WTIME() + + WRITE(0,*) ' Assembly time',(T2-T1),' ',a%fida(1:4) + + CALL F90_PSDSASB(B,IERRV,DESC_A) + CALL F90_PSDSASB(T,IERRV,DESC_A) + IF (MYPROW.EQ.0) THEN + WRITE(0,*) ' End CREATE_MATRIX' + ENDIF + RETURN + + END SUBROUTINE CREATE_MATRIX +END PROGRAM PP2D +! +! Functions parametrizing the differential equation +! +FUNCTION A1(X,Y,Z) + REAL(KIND(1.D0)) :: A1 + REAL(KIND(1.D0)) :: X,Y,Z + A1=1.D0 +END FUNCTION A1 +FUNCTION A2(X,Y,Z) + REAL(KIND(1.D0)) :: A2 + REAL(KIND(1.D0)) :: X,Y,Z + A2=2.D1*Y +END FUNCTION A2 +FUNCTION A3(X,Y,Z) + REAL(KIND(1.D0)) :: A3 + REAL(KIND(1.D0)) :: X,Y,Z + A3=1.D0 +END FUNCTION A3 +FUNCTION A4(X,Y,Z) + REAL(KIND(1.D0)) :: A4 + REAL(KIND(1.D0)) :: X,Y,Z + A4=1.D0 +END FUNCTION A4 +FUNCTION B1(X,Y,Z) + REAL(KIND(1.D0)) :: B1 + REAL(KIND(1.D0)) :: X,Y,Z + B1=1.D0 +END FUNCTION B1 +FUNCTION B2(X,Y,Z) + REAL(KIND(1.D0)) :: B2 + REAL(KIND(1.D0)) :: X,Y,Z + B2=1.D0 +END FUNCTION B2 +FUNCTION B3(X,Y,Z) + REAL(KIND(1.D0)) :: B3 + REAL(KIND(1.D0)) :: X,Y,Z + B3=1.D0 +END FUNCTION B3 + + diff --git a/test/pargen/pp2ds.f90 b/test/pargen/pp2ds.f90 new file mode 100644 index 00000000..5ee6610b --- /dev/null +++ b/test/pargen/pp2ds.f90 @@ -0,0 +1,700 @@ +! +! This sample program shows how to build and solve a sparse linear +! +! The program solves a linear system based on the partial differential +! equation +! +! +! +! the equation generated is: +! b1 d d (u) b2 d d (u) a1 d (u)) a2 d (u))) +! - ------ - ------ + ----- + ------ + a3 u = 0 +! dx dx dy dy dx dy +! +! +! with Dirichlet boundary conditions on the unit cube +! +! 0<=x,y,z<=1 +! +! The equation is discretized with finite differences and uniform stepsize; +! the resulting discrete equation is +! +! ( u(x,y,z)(2b1+2b2+a1+a2)+u(x-1,y)(-b1-a1)+u(x,y-1)(-b2-a2)+ +! -u(x+1,y)b1-u(x,y+1)b2)*(1/h**2) +! +! Example taken from: C.T.Kelley +! Iterative Methods for Linear and Nonlinear Equations +! SIAM 1995 +! +! +! In this sample program the index space of the discretized +! computational domain is first numbered sequentially in a standard way, +! then the corresponding vector is distributed according to an HPF BLOCK +! distribution directive. +! +! Boundary conditions are set in a very simple way, by adding +! equations of the form +! +! u(x,y) = rhs(x,y) +! +Program PDE90 + USE F90SPARSE + Implicit none + + interface + !.....user passed subroutine..... + subroutine part_block(glob_index,n,np,pv,nv) + INTEGER, INTENT(IN) :: GLOB_INDEX, N, NP + INTEGER, INTENT(OUT) :: NV + INTEGER, INTENT(OUT) :: PV(*) + end subroutine part_block + end interface + ! input parameters + Character :: CMETHD*10, PREC*10, AFMT*5 + Integer :: IDIM, IRET + + ! Miscellaneous + Integer, Parameter :: IZERO=0, IONE=1 + Character, PARAMETER :: ORDER='R' + INTEGER :: IARGC,CONVERT_DESCR,dim, CHECK_DESCR + REAL(KIND(1.D0)), PARAMETER :: DZERO = 0.D0, ONE = 1.D0 + REAL(KIND(1.D0)) :: MPI_WTIME, T1, T2, TPREC, TSOLVE, T3, T4 + EXTERNAL MPI_WTIME + + ! Sparse Matrix and preconditioner + TYPE(D_SPMAT) :: A, L, U, H + TYPE(D_PREC) :: PRE + ! Descriptor + TYPE(desc_type) :: DESC_A, DESC_A_OUT + ! Dense Matrices + REAL(KIND(1.d0)), POINTER :: B(:), X(:), D(:),LD(:) + INTEGER, pointer :: WORK(:) + ! BLACS parameters + INTEGER :: nprow, npcol, icontxt, iam, np, myprow, mypcol + + ! Solver parameters + INTEGER :: ITER, ITMAX,IERR,ITRACE, METHD,IPREC, ISTOPC,& + & IPARM(20), ML + REAL(KIND(1.D0)) :: ERR, EPS, RPARM(20) + + ! Other variables + INTEGER :: I,INFO + INTEGER :: INTERNAL, M,II + + ! Initialize BLACS + CALL BLACS_PINFO(IAM, NP) + CALL BLACS_GET(IZERO, IZERO, ICONTXT) + + ! Rectangular Grid, P x 1 + + CALL BLACS_GRIDINIT(ICONTXT, ORDER, NP, IONE) + CALL BLACS_GRIDINFO(ICONTXT, NPROW, NPCOL, MYPROW, MYPCOL) + + ! + ! Get parameters + ! + CALL GET_PARMS(ICONTXT,CMETHD,PREC,AFMT,IDIM,ISTOPC,ITMAX,ITRACE,ML) + + ! + ! Allocate and fill in the coefficient matrix, RHS and initial guess + ! + + CALL BLACS_BARRIER(ICONTXT,'All') + T1 = MPI_WTIME() + CALL CREATE_MATRIX(IDIM,A,B,X,DESC_A,PART_BLOCK,ICONTXT,AFMT) + T2 = MPI_WTIME() - T1 + + DIM=SIZE(A%ASPK) + + ALLOCATE(H%ASPK(DIM),H%IA1(DIM),H%IA2(DIM),H%PL(SIZE(A%PL)),& + & H%PL(SIZE(A%PL)),D(SIZE(A%PL)),& + & DESC_A_OUT%MATRIX_DATA(SIZE(DESC_A%MATRIX_DATA)),& + & DESC_A_OUT%HALO_INDEX(SIZE(DESC_A%HALO_INDEX)),& + & DESC_A_OUT%OVRLAP_INDEX(SIZE(DESC_A%OVRLAP_INDEX)),& + & DESC_A_OUT%OVRLAP_ELEM(SIZE(DESC_A%OVRLAP_ELEM)),& + & DESC_A_OUT%LOC_TO_GLOB(SIZE(DESC_A%LOC_TO_GLOB)),& + & DESC_A_OUT%GLOB_TO_LOC(SIZE(DESC_A%GLOB_TO_LOC)), WORK(1024)) + check_descr=15 +! work(5)=9 +!!$ WRITE(0,*)'CALLING VERIFY' +!!$ CALL F90_PSVERIFY(D,A,DESC_A,CHECK_DESCR,CONVERT_DESCR,H,& +!!$ & DESC_A_OUT,WORK) +!!$ WRITE(0,*)'VERIFY DONE',CONVERT_DESCR + + deallocate(work) + + CALL DGAMX2D(ICONTXT,'A',' ',IONE, IONE,T2,IONE,T1,T1,-1,-1,-1) + IF (IAM.EQ.0) Write(6,*) 'Matrix creation Time : ',T2 + + ! + ! Prepare the preconditioner. + ! + write(0,*)'PRECONDIZIONATORE=',prec + SELECT CASE (PREC) + CASE ('SCHW6') + IPREC = 6 + CASE ('SCHW5') + IPREC = 5 + CASE ('SCHW4') + IPREC = 4 + CASE ('SCHW3') + IPREC = 3 + CASE ('ILU') + IPREC = 2 + CASE ('DIAGSC') + IPREC = 1 + CASE ('NONE') + IPREC = 0 + CASE DEFAULT + WRITE(0,*) 'Unknown preconditioner' + CALL BLACS_ABORT(ICONTXT,-1) + END SELECT + pre%prec=iprec + pre%n_ovr=ml + pre%irenum=0 + CALL BLACS_BARRIER(ICONTXT,'All') + T1 = MPI_WTIME() + CALL PRECONDITIONER(A,PRE,DESC_A,IRET) +!!$ CALL PRECONDITIONER(IPREC,A,L,U,D,DESC_A,IRET) + TPREC = MPI_WTIME()-T1 + + CALL DGAMX2D(icontxt,'A',' ',IONE, IONE,TPREC,IONE,t1,t1,-1,-1,-1) + + IF (IAM.EQ.0) WRITE(6,*) 'Preconditioner Time : ',TPREC + + IF (IRET.NE.0) THEN + WRITE(0,*) 'Error on preconditioner',IRET + CALL BLACS_ABORT(ICONTXT,-1) + STOP + END IF + + ! + ! Iterative method parameters + ! + call dcsprt90(80+myprow,a,head='% Local A') + + write(*,*) 'Calling Iterative method', size(b),ml + CALL BLACS_BARRIER(ICONTXT,'All') + T1 = MPI_WTIME() + EPS = 1.D-9 + IF (CMETHD.EQ.'BICGSTAB') THEN + CALL F90_BICGSTAB(A,PRE,B,X,EPS,DESC_A,& + & ITMAX,ITER,ERR,IERR,ITRACE) +!!$ ELSE IF (CMETHD.EQ.'BICG') THEN +!!$ CALL F90_BICG(A,PRE,B,X,EPS,DESC_A,& +!!$ & ITMAX,ITER,ERR,IERR,ITRACE) + ELSE IF (CMETHD.EQ.'CGS') THEN + CALL F90_CGS(A,PRE,B,X,EPS,DESC_A,& + & ITMAX,ITER,ERR,IERR,ITRACE) + ELSE IF (CMETHD.EQ.'BICGSTABL') THEN + CALL F90_BICGSTABL(A,PRE,B,X,EPS,DESC_A,& + & ITMAX,ITER,ERR,IERR,ITRACE,ML) + ELSE + write(0,*) 'Unknown method ',cmethd + end IF + + CALL BLACS_BARRIER(ICONTXT,'All') + T2 = MPI_WTIME() - T1 + CALL DGAMX2D(ICONTXT,'A',' ',IONE, IONE,T2,IONE,T1,T1,-1,-1,-1) + + IF (IAM.EQ.0) THEN + WRITE(6,*) 'Time to Solve Matrix : ',T2 + WRITE(6,*) 'Time per iteration : ',T2/ITER + WRITE(6,*) 'Number of iterations : ',ITER + WRITE(6,*) 'Error on exit : ',ERR + WRITE(6,*) 'INFO on exit : ',IERR + END IF + + ! + ! Cleanup storage and exit + ! + CALL F90_PSDSFREE(B,DESC_A) + CALL F90_PSDSFREE(X,DESC_A) +!!$ CALL F90_PSDSFREE(D,DESC_A) + + CALL F90_PSSPFREE(A,DESC_A) +!!$ CALL F90_PSSPFREE(L,DESC_A) +!!$ CALL F90_PSSPFREE(U,DESC_A) + CALL F90_PSDSCFREE(DESC_A,info) + + CALL BLACS_GRIDEXIT(ICONTXT) + CALL BLACS_EXIT(0) + + STOP + +CONTAINS + ! + ! Get iteration parameters from the command line + ! + SUBROUTINE GET_PARMS(ICONTXT,CMETHD,PREC,AFMT,IDIM,ISTOPC,ITMAX,ITRACE,ML) + integer :: icontxt + Character :: CMETHD*10, PREC*10, AFMT*5 + Integer :: IDIM, IRET, ISTOPC,ITMAX,ITRACE,ML + Character*40 :: CHARBUF + INTEGER :: IARGC, NPROW, NPCOL, MYPROW, MYPCOL + EXTERNAL IARGC + INTEGER :: INTBUF(10), IP + + CALL BLACS_GRIDINFO(ICONTXT, NPROW, NPCOL, MYPROW, MYPCOL) + + IF (MYPROW==0) THEN + READ(*,*) IP + IF (IP.GE.3) THEN + READ(*,*) CMETHD + READ(*,*) PREC + READ(*,*) AFMT + + ! Convert strings in array + DO I = 1, LEN(CMETHD) + INTBUF(I) = IACHAR(CMETHD(I:I)) + END DO + ! Broadcast parameters to all processors + CALL IGEBS2D(ICONTXT,'ALL',' ',10,1,INTBUF,10) + + DO I = 1, LEN(PREC) + INTBUF(I) = IACHAR(PREC(I:I)) + END DO + ! Broadcast parameters to all processors + CALL IGEBS2D(ICONTXT,'ALL',' ',10,1,INTBUF,10) + + DO I = 1, LEN(AFMT) + INTBUF(I) = IACHAR(AFMT(I:I)) + END DO + ! Broadcast parameters to all processors + CALL IGEBS2D(ICONTXT,'ALL',' ',10,1,INTBUF,10) + + READ(*,*) IDIM + IF (IP.GE.4) THEN + READ(*,*) ISTOPC + ELSE + ISTOPC=1 + ENDIF + IF (IP.GE.5) THEN + READ(*,*) ITMAX + ELSE + ITMAX=500 + ENDIF + IF (IP.GE.6) THEN + READ(*,*) ITRACE + ELSE + ITRACE=-1 + ENDIF + IF (IP.GE.7) THEN + READ(*,*) ML + ELSE + ML=1 + ENDIF + ! Broadcast parameters to all processors + + INTBUF(1) = IDIM + INTBUF(2) = ISTOPC + INTBUF(3) = ITMAX + INTBUF(4) = ITRACE + INTBUF(5) = ML + CALL IGEBS2D(ICONTXT,'ALL',' ',5,1,INTBUF,5) + + WRITE(6,*)'Solving matrix: ELL1' + WRITE(6,*)'on grid',IDIM,'x',IDIM,'x',IDIM + WRITE(6,*)' with BLOCK data distribution, NP=',Np,& + & ' Preconditioner=',PREC,& + & ' Iterative methd=',CMETHD + ELSE + ! Wrong number of parameter, print an error message and exit + CALL PR_USAGE(0) + CALL BLACS_ABORT(ICONTXT,-1) + STOP 1 + ENDIF + ELSE + ! Receive Parameters + CALL IGEBR2D(ICONTXT,'ALL',' ',10,1,INTBUF,10,0,0) + DO I = 1, 10 + CMETHD(I:I) = ACHAR(INTBUF(I)) + END DO + CALL IGEBR2D(ICONTXT,'ALL',' ',10,1,INTBUF,10,0,0) + DO I = 1, 10 + PREC(I:I) = ACHAR(INTBUF(I)) + END DO + CALL IGEBR2D(ICONTXT,'ALL',' ',10,1,INTBUF,10,0,0) + DO I = 1, 5 + AFMT(I:I) = ACHAR(INTBUF(I)) + END DO + CALL IGEBR2D(ICONTXT,'ALL',' ',5,1,INTBUF,5,0,0) + IDIM = INTBUF(1) + ISTOPC = INTBUF(2) + ITMAX = INTBUF(3) + ITRACE = INTBUF(4) + ML = INTBUF(5) + END IF + RETURN + + END SUBROUTINE GET_PARMS + ! + ! Print an error message + ! + SUBROUTINE PR_USAGE(IOUT) + INTEGER :: IOUT + WRITE(IOUT,*)'Incorrect parameter(s) found' + WRITE(IOUT,*)' Usage: pde90 methd prec dim & + &[istop itmax itrace]' + WRITE(IOUT,*)' Where:' + WRITE(IOUT,*)' methd: CGSTAB TFQMR CGS' + WRITE(IOUT,*)' prec : ILU DIAGSC NONE' + WRITE(IOUT,*)' dim number of points along each axis' + WRITE(IOUT,*)' the size of the resulting linear ' + WRITE(IOUT,*)' system is dim**3' + WRITE(IOUT,*)' istop Stopping criterion 1, 2 or 3 [1] ' + WRITE(IOUT,*)' itmax Maximum number of iterations [500] ' + WRITE(IOUT,*)' itrace 0 (no tracing, default) or ' + WRITE(IOUT,*)' >= 0 do tracing every ITRACE' + WRITE(IOUT,*)' iterations ' + END SUBROUTINE PR_USAGE + +! +! Subroutine to allocate and fill in the coefficient matrix and +! the RHS. +! + SUBROUTINE CREATE_MATRIX(IDIM,A,B,T,DESC_A,PARTS,ICONTXT,AFMT) + ! + ! Discretize the partial diferential equation + ! + ! b1 dd(u) b2 dd(u) a1 d(u) a2 d(u) + ! - ------ - ------ - ----- - ------ + a4 u + ! dxdx dydy dx dy + ! + ! = 0 + ! + ! boundary condition: Dirichlet + ! 0< x,y<1 + ! + ! u(x,y)(2b1+2b2+a1+a2)+u(x-1,y)(-b1-a1)+u(x,y-1)(-b2-a2)+ + ! -u(x+1,y,z)b1-u(x,y+1,z)b2 + + USE TYPESP + USE TYPEDESC + USE F90TOOLS + Implicit None + INTEGER :: IDIM + integer, parameter :: nbmax=10 + Real(Kind(1.D0)),Pointer :: B(:),T(:) + Type (desc_type) :: DESC_A + Integer :: ICONTXT + INTERFACE + ! .....user passed subroutine..... + SUBROUTINE PARTS(GLOBAL_INDX,N,NP,PV,NV) + IMPLICIT NONE + INTEGER, INTENT(IN) :: GLOBAL_INDX, N, NP + INTEGER, INTENT(OUT) :: NV + INTEGER, INTENT(OUT) :: PV(*) + END SUBROUTINE PARTS + END INTERFACE ! Local variables + Type(D_SPMAT) :: A + Real(Kind(1.d0)) :: ZT(NBMAX),GLOB_X,GLOB_Y,GLOB_Z + Integer :: M,N,NNZ,GLOB_ROW,J + Type (D_SPMAT) :: ROW_MAT + Integer :: X,Y,Z,COUNTER,IA,I,INDX_OWNER + INTEGER :: NPROW,NPCOL,MYPROW,MYPCOL + Integer :: ELEMENT + INTEGER :: INFO, NV, INV + INTEGER, ALLOCATABLE :: PRV(:) + INTEGER, pointer :: ierrv(:) + Real(Kind(1.d0)), pointer :: DWORK(:) + INTEGER,POINTER :: IWORK(:) + character :: afmt*5 + ! deltah dimension of each grid cell + ! deltat discretization time + Real(Kind(1.D0)) :: DELTAH + Real(Kind(1.d0)),Parameter :: RHS=0.d0,ONE=1.d0,ZERO=0.d0 + Real(Kind(1.d0)) :: MPI_WTIME, T1, T2, T3, TINS + Real(Kind(1.d0)) :: a1, a2, a3, a4, b1, b2, b3 + external mpi_wtime,a1, a2, a3, a4, b1, b2, b3 + integer :: nb, ir1, ir2, ipr + logical :: own + ! common area + + + CALL BLACS_GRIDINFO(ICONTXT, NPROW, NPCOL, MYPROW, MYPCOL) + + DELTAH = 1.D0/(IDIM-1) + + ! Initialize array descriptor and sparse matrix storage. Provide an + ! estimate of the number of non zeroes + CALL SETERR(2) + allocate(ierrv(6)) + + ierrv(:) = 0 + M = IDIM*IDIM + N = M + NNZ = ((N*6)/(NPROW*NPCOL)) + write(*,*) 'Size: n ',n + Call F90_PSDSCALL(N,N,PARTS,ICONTXT,IERRV,DESC_A) + write(*,*) 'Allocating A : nnz',nnz + Call F90_PSSPALL(A,IERRV,DESC_A,NNZ=NNZ) + ! Define RHS from boundary conditions; also build initial guess + write(*,*) 'Allocating B' + Call F90_PSDSALL(N,B,IERRV,DESC_A) + write(*,*) 'Allocating T' + Call F90_PSDSALL(N,T,IERRV,DESC_A) + + ! We build an auxiliary matrix consisting of one row at a + ! time; just a small matrix. Might be extended to generate + ! a bunch of rows per call. + ! + ROW_MAT%DESCRA(1:1) = 'G' + ROW_MAT%FIDA = 'CSR' + write(*,*) 'Allocating ROW_MAT',20*nbmax + ALLOCATE(ROW_MAT%ASPK(20*nbmax),ROW_MAT%IA1(20*nbmax),& + &ROW_MAT%IA2(20*nbmax),PRV(NPROW),stat=info) + if (info.ne.0 ) then + write(*,*) 'Memory allocation error' + call blacs_abort(icontxt,-1) + endif + + TINS = 0.D0 + CALL BLACS_BARRIER(ICONTXT,'ALL') + T1 = MPI_WTIME() + + ! Loop over rows belonging to current process in a BLOCK + ! distribution. + + Z=0 + ROW_MAT%IA2(1)=1 + DO GLOB_ROW = 1, N + CALL PARTS(GLOB_ROW,N,NPROW,PRV,NV) + DO INV = 1, NV + INDX_OWNER = PRV(INV) + IF (INDX_OWNER == MYPROW) THEN + ! Local matrix pointer + ELEMENT=1 + ! Compute gridpoint Coordinates + IF (MOD(GLOB_ROW,(IDIM)).EQ.0) THEN + X = GLOB_ROW/(IDIM) + ELSE + X = GLOB_ROW/(IDIM)+1 + ENDIF + Y = GLOB_ROW-(X-1)*IDIM + ! GLOB_X, GLOB_Y, GLOB_X coordinates + GLOB_X=X*DELTAH + GLOB_Y=Y*DELTAH + GLOB_Z=Z*DELTAH + + + ! Check on boundary points +!!$ IF (X.EQ.1) THEN +!!$ ROW_MAT%ASPK(ELEMENT)=ONE +!!$ ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM*IDIM+(Y-1)*IDIM+(Z) +!!$ ELEMENT=ELEMENT+1 +!!$ ELSE IF (Y.EQ.1) THEN +!!$ ROW_MAT%ASPK(ELEMENT)=ONE +!!$ ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM*IDIM+(Y-1)*IDIM+(Z) +!!$ ELEMENT=ELEMENT+1 +!!$ ELSE IF (Z.EQ.1) THEN +!!$ ROW_MAT%ASPK(ELEMENT)=ONE +!!$ ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM*IDIM+(Y-1)*IDIM+(Z) +!!$ ELEMENT=ELEMENT+1 +!!$ ELSE IF (X.EQ.IDIM) THEN +!!$ ROW_MAT%ASPK(ELEMENT)=ONE +!!$ ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM*IDIM+(Y-1)*IDIM+(Z) +!!$ ELEMENT=ELEMENT+1 +!!$ ELSE IF (Y.EQ.IDIM) THEN +!!$ ROW_MAT%ASPK(ELEMENT)=ONE +!!$ ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM*IDIM+(Y-1)*IDIM+(Z) +!!$ ELEMENT=ELEMENT+1 +!!$ ELSE IF (Z.EQ.IDIM) THEN +!!$ ROW_MAT%ASPK(ELEMENT)=ONE +!!$ ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM*IDIM+(Y-1)*IDIM+(Z) +!!$ ELEMENT=ELEMENT+1 +!!$ ELSE + zt(1) = 0.d0 + ! Internal point: build discretization + ! + ! Term depending on (x-1,y,z) + ! + if (x==1) then + ROW_MAT%ASPK(ELEMENT)=-B1(GLOB_X,GLOB_Y,GLOB_Z)& + & -A1(GLOB_X,GLOB_Y,GLOB_Z) + ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& + & DELTAH) + ZT(1) = EXP(-GLOB_Y**2-GLOB_Z**2)*(-ROW_MAT%ASPK(ELEMENT)) + else + ROW_MAT%ASPK(ELEMENT)=-B1(GLOB_X,GLOB_Y,GLOB_Z)& + & -A1(GLOB_X,GLOB_Y,GLOB_Z) + ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& + & DELTAH) + ROW_MAT%IA1(ELEMENT)=(X-2)*IDIM+(Y) + ELEMENT=ELEMENT+1 + endif + ! Term depending on (x,y-1,z) + if (y==1) then + ROW_MAT%ASPK(ELEMENT)=-B2(GLOB_X,GLOB_Y,GLOB_Z)& + & -A2(GLOB_X,GLOB_Y,GLOB_Z) + ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& + & DELTAH) + ZT(1) = EXP(-GLOB_Y**2-GLOB_Z**2)*EXP(-GLOB_X)*(-ROW_MAT%ASPK(ELEMENT)) + else + ROW_MAT%ASPK(ELEMENT)=-B2(GLOB_X,GLOB_Y,GLOB_Z)& + & -A2(GLOB_X,GLOB_Y,GLOB_Z) + ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& + & DELTAH) + ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM+(Y-1) + ELEMENT=ELEMENT+1 + endif + ! Term depending on (x,y,z-1) +!!$ if (z==1) then +!!$ ROW_MAT%ASPK(ELEMENT)=-B3(GLOB_X,GLOB_Y,GLOB_Z)& +!!$ & -A3(GLOB_X,GLOB_Y,GLOB_Z) +!!$ ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& +!!$ & DELTAH) +!!$ ZT(1) = EXP(-GLOB_Y**2-GLOB_Z**2)*EXP(-GLOB_X)*(-ROW_MAT%ASPK(ELEMENT)) +!!$ else +!!$ ROW_MAT%ASPK(ELEMENT)=-B3(GLOB_X,GLOB_Y,GLOB_Z)& +!!$ & -A3(GLOB_X,GLOB_Y,GLOB_Z) +!!$ ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& +!!$ & DELTAH) +!!$ ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM*IDIM+(Y-1)*IDIM+(Z-1) +!!$ ELEMENT=ELEMENT+1 +!!$ endif + ! Term depending on (x,y,z) + ROW_MAT%ASPK(ELEMENT)=2*B1(GLOB_X,GLOB_Y,GLOB_Z)& + & +2*B2(GLOB_X,GLOB_Y,GLOB_Z)& + & +A1(GLOB_X,GLOB_Y,GLOB_Z)& + & +A2(GLOB_X,GLOB_Y,GLOB_Z) + ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& + & DELTAH) + ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM+(Y) + ELEMENT=ELEMENT+1 + ! Term depending on (x,y,z+1) +!!$ if (z==idim) then +!!$ ROW_MAT%ASPK(ELEMENT)=-B1(GLOB_X,GLOB_Y,GLOB_Z) +!!$ ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& +!!$ & DELTAH) +!!$ ZT(1) = EXP(-GLOB_Y**2-GLOB_Z**2)*EXP(-GLOB_X)*(-ROW_MAT%ASPK(ELEMENT)) +!!$ else +!!$ ROW_MAT%ASPK(ELEMENT)=-B1(GLOB_X,GLOB_Y,GLOB_Z) +!!$ ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& +!!$ & DELTAH) +!!$ ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM*IDIM+(Y-1)*IDIM+(Z+1) +!!$ ELEMENT=ELEMENT+1 +!!$ endif + ! Term depending on (x,y+1,z) + if (y==idim) then + ROW_MAT%ASPK(ELEMENT)=-B2(GLOB_X,GLOB_Y,GLOB_Z) + ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& + & DELTAH) + ZT(1) = EXP(-GLOB_Y**2-GLOB_Z**2)*EXP(-GLOB_X)*(-ROW_MAT%ASPK(ELEMENT)) + else + ROW_MAT%ASPK(ELEMENT)=-B2(GLOB_X,GLOB_Y,GLOB_Z) + ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& + & DELTAH) + ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM+(Y+1) + ELEMENT=ELEMENT+1 + endif + ! Term depending on (x+1,y,z) + if (x= 0 do tracing every itrace' + write(iout,*)' iterations ' + end subroutine pr_usage + +! +! subroutine to allocate and fill in the coefficient matrix and +! the rhs. +! + subroutine create_matrix(idim,a,b,t,desc_a,parts,icontxt,afmt,info) + ! + ! discretize the partial diferential equation + ! + ! b1 dd(u) b2 dd(u) b3 dd(u) a1 d(u) a2 d(u) a3 d(u) + ! - ------ - ------ - ------ - ----- - ------ - ------ + a4 u + ! dxdx dydy dzdz dx dy dz + ! + ! = 0 + ! + ! boundary condition: dirichlet + ! 0< x,y,z<1 + ! + ! u(x,y,z)(2b1+2b2+2b3+a1+a2+a3)+u(x-1,y,z)(-b1-a1)+u(x,y-1,z)(-b2-a2)+ + ! + u(x,y,z-1)(-b3-a3)-u(x+1,y,z)b1-u(x,y+1,z)b2-u(x,y,z+1)b3 + + use typesp + use typedesc + use f90tools + use f90methd + implicit none + integer :: idim + integer, parameter :: nbmax=10 + real(kind(1.d0)),pointer :: b(:),t(:) + type (desc_type) :: desc_a + integer :: icontxt, info + character :: afmt*5 + interface + ! .....user passed subroutine..... + subroutine parts(global_indx,n,np,pv,nv) + implicit none + integer, intent(in) :: global_indx, n, np + integer, intent(out) :: nv + integer, intent(out) :: pv(*) + end subroutine parts + end interface ! local variables + type(d_spmat) :: a + real(kind(1.d0)) :: zt(nbmax),glob_x,glob_y,glob_z + integer :: m,n,nnz,glob_row,j + type (d_spmat) :: row_mat + integer :: x,y,z,counter,ia,i,indx_owner + integer :: nprow,npcol,myprow,mypcol + integer :: element + integer :: nv, inv + integer, allocatable :: prv(:) + integer, pointer :: ierrv(:) + real(kind(1.d0)), pointer :: dwork(:) + integer,pointer :: iwork(:) + ! deltah dimension of each grid cell + ! deltat discretization time + real(kind(1.d0)) :: deltah + real(kind(1.d0)),parameter :: rhs=0.d0,one=1.d0,zero=0.d0 + real(kind(1.d0)) :: mpi_wtime, t1, t2, t3, tins + real(kind(1.d0)) :: a1, a2, a3, a4, b1, b2, b3 + external mpi_wtime,a1, a2, a3, a4, b1, b2, b3 + integer :: nb, ir1, ir2, ipr, err_act + logical :: own + ! common area + + character(len=20) :: name, ch_err + + info = 0 + name = 'create_matrix' + call psb_erractionsave(err_act) + + call blacs_gridinfo(icontxt, nprow, npcol, myprow, mypcol) + + deltah = 1.d0/(idim-1) + + ! initialize array descriptor and sparse matrix storage. provide an + ! estimate of the number of non zeroes + + m = idim*idim*idim + n = m + nnz = ((n*9)/(nprow*npcol)) + write(*,*) 'size: n ',n + call psb_dscall(n,n,parts,icontxt,desc_a,info) + write(*,*) 'allocating a : nnz',nnz + call f90_psspall(a,desc_a,info,nnz=nnz) + ! define rhs from boundary conditions; also build initial guess + write(*,*) 'allocating b' + call f90_psdsall(n,b,desc_a,info) + write(*,*) 'allocating t' + call f90_psdsall(n,t,desc_a,info) + if(info.ne.0) then + info=4010 + ch_err='allocation rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + ! we build an auxiliary matrix consisting of one row at a + ! time; just a small matrix. might be extended to generate + ! a bunch of rows per call. + ! + row_mat%descra(1:1) = 'G' + row_mat%fida = 'CSR' + write(*,*) 'allocating row_mat',20*nbmax + allocate(row_mat%aspk(20*nbmax),row_mat%ia1(20*nbmax),& + &row_mat%ia2(20*nbmax),prv(nprow),stat=info) + if (info.ne.0 ) then + info=4000 + call psb_errpush(info,name) + goto 9999 + endif + + tins = 0.d0 + call blacs_barrier(icontxt,'ALL') + t1 = mpi_wtime() + + ! loop over rows belonging to current process in a block + ! distribution. + + row_mat%ia2(1)=1 + do glob_row = 1, n + call parts(glob_row,n,nprow,prv,nv) + do inv = 1, nv + indx_owner = prv(inv) + if (indx_owner == myprow) then + ! local matrix pointer + element=0 + ! compute gridpoint coordinates + if (mod(glob_row,(idim*idim)).eq.0) then + x = glob_row/(idim*idim) + else + x = glob_row/(idim*idim)+1 + endif + if (mod((glob_row-(x-1)*idim*idim),idim).eq.0) then + y = (glob_row-(x-1)*idim*idim)/idim + else + y = (glob_row-(x-1)*idim*idim)/idim+1 + endif + z = glob_row-(x-1)*idim*idim-(y-1)*idim + ! glob_x, glob_y, glob_x coordinates + glob_x=x*deltah + glob_y=y*deltah + glob_z=z*deltah + + ! check on boundary points + if (x.eq.1) then + element=element+1 + row_mat%aspk(element)=one + row_mat%ia2(element)=(x-1)*idim*idim+(y-1)*idim+(z) + row_mat%ia1(element)=glob_row + else if (y.eq.1) then + element=element+1 + row_mat%aspk(element)=one + row_mat%ia2(element)=(x-1)*idim*idim+(y-1)*idim+(z) + row_mat%ia1(element)=glob_row + else if (z.eq.1) then + element=element+1 + row_mat%aspk(element)=one + row_mat%ia2(element)=(x-1)*idim*idim+(y-1)*idim+(z) + row_mat%ia1(element)=glob_row + else if (x.eq.idim) then + element=element+1 + row_mat%aspk(element)=one + row_mat%ia2(element)=(x-1)*idim*idim+(y-1)*idim+(z) + row_mat%ia1(element)=glob_row + else if (y.eq.idim) then + element=element+1 + row_mat%aspk(element)=one + row_mat%ia2(element)=(x-1)*idim*idim+(y-1)*idim+(z) + row_mat%ia1(element)=glob_row + else if (z.eq.idim) then + element=element+1 + row_mat%aspk(element)=one + row_mat%ia2(element)=(x-1)*idim*idim+(y-1)*idim+(z) + row_mat%ia1(element)=glob_row + else + ! internal point: build discretization + ! + ! term depending on (x-1,y,z) + ! + element=element+1 + row_mat%aspk(element)=-b1(glob_x,glob_y,glob_z)& + & -a1(glob_x,glob_y,glob_z) + row_mat%aspk(element) = row_mat%aspk(element)/(deltah*& + & deltah) + row_mat%ia2(element)=(x-2)*idim*idim+(y-1)*idim+(z) + row_mat%ia1(element)=glob_row + ! term depending on (x,y-1,z) + element=element+1 + row_mat%aspk(element)=-b2(glob_x,glob_y,glob_z)& + & -a2(glob_x,glob_y,glob_z) + row_mat%aspk(element) = row_mat%aspk(element)/(deltah*& + & deltah) + row_mat%ia2(element)=(x-1)*idim*idim+(y-2)*idim+(z) + row_mat%ia1(element)=glob_row + ! term depending on (x,y,z-1) + element=element+1 + row_mat%aspk(element)=-b3(glob_x,glob_y,glob_z)& + & -a3(glob_x,glob_y,glob_z) + row_mat%aspk(element) = row_mat%aspk(element)/(deltah*& + & deltah) + row_mat%ia2(element)=(x-1)*idim*idim+(y-1)*idim+(z-1) + row_mat%ia1(element)=glob_row + ! term depending on (x,y,z) + element=element+1 + row_mat%aspk(element)=2*b1(glob_x,glob_y,glob_z)& + & +2*b2(glob_x,glob_y,glob_z)& + & +2*b3(glob_x,glob_y,glob_z)& + & +a1(glob_x,glob_y,glob_z)& + & +a2(glob_x,glob_y,glob_z)& + & +a3(glob_x,glob_y,glob_z) + row_mat%aspk(element) = row_mat%aspk(element)/(deltah*& + & deltah) + row_mat%ia2(element)=(x-1)*idim*idim+(y-1)*idim+(z) + row_mat%ia1(element)=glob_row + ! term depending on (x,y,z+1) + element=element+1 + row_mat%aspk(element)=-b1(glob_x,glob_y,glob_z) + row_mat%aspk(element) = row_mat%aspk(element)/(deltah*& + & deltah) + row_mat%ia2(element)=(x-1)*idim*idim+(y-1)*idim+(z+1) + row_mat%ia1(element)=glob_row + ! term depending on (x,y+1,z) + element=element+1 + row_mat%aspk(element)=-b2(glob_x,glob_y,glob_z) + row_mat%aspk(element) = row_mat%aspk(element)/(deltah*& + & deltah) + row_mat%ia2(element)=(x-1)*idim*idim+(y)*idim+(z) + row_mat%ia1(element)=glob_row + ! term depending on (x+1,y,z) + element=element+1 + row_mat%aspk(element)=-b3(glob_x,glob_y,glob_z) + row_mat%aspk(element) = row_mat%aspk(element)/(deltah*& + & deltah) + row_mat%ia2(element)=(x)*idim*idim+(y-1)*idim+(z) + row_mat%ia1(element)=glob_row + endif + row_mat%m=1 + row_mat%k=n + ! row_mat%ia2(2)=element + ! ia== global row index + ia=glob_row +!!$ ia=(x-1)*idim*idim+(y-1)*idim+(z) +!!$ write(0,*) 'inserting row ',ia,' on proc',myprow + t3 = mpi_wtime() + call psb_spins(element,row_mat%ia1,row_mat%ia2,row_mat%aspk,a,desc_a,info) + if(info.ne.0) exit + tins = tins + (mpi_wtime()-t3) + ! build rhs + if (x==1) then + glob_y=(y-idim/2)*deltah + glob_z=(z-idim/2)*deltah + zt(1) = exp(-glob_y**2-glob_z**2) + else if ((y==1).or.(y==idim).or.(z==1).or.(z==idim)) then + glob_x=3*(x-1)*deltah + glob_y=(y-idim/2)*deltah + glob_z=(z-idim/2)*deltah + zt(1) = exp(-glob_y**2-glob_z**2)*exp(-glob_x) + else + zt(1) = 0.d0 + endif + call f90_psdsins(1,b,ia,zt(1:1),desc_a,info) + if(info.ne.0) exit + zt(1)=0.d0 + call f90_psdsins(1,t,ia,zt(1:1),desc_a,info) + if(info.ne.0) exit + end if + end do + end do + + call blacs_barrier(icontxt,'ALL') + t2 = mpi_wtime() + + if(info.ne.0) then + info=4010 + ch_err='insert rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + write(*,*) ' pspins time',tins + write(*,*) ' insert time',(t2-t1) + + deallocate(row_mat%aspk,row_mat%ia1,row_mat%ia2) + + write(*,*) 'calling spasb' + call blacs_barrier(icontxt,'ALL') + t1 = mpi_wtime() + call psb_dscasb(desc_a,info) + call psb_spasb(a,desc_a,info,dup=1,afmt=afmt) + call blacs_barrier(icontxt,'ALL') + t2 = mpi_wtime() + if(info.ne.0) then + info=4010 + ch_err='asb rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + write(0,*) ' assembly time',(t2-t1),' ',a%fida(1:4) + + call f90_psdsasb(b,desc_a,info) + call f90_psdsasb(t,desc_a,info) + if(info.ne.0) then + info=4010 + ch_err='asb rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if (myprow.eq.0) then + write(0,*) ' end create_matrix' + endif + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.act_abort) then + call psb_error(icontxt) + return + end if + return + end subroutine create_matrix +end program pde90 +! +! functions parametrizing the differential equation +! +function a1(x,y,z) + real(kind(1.d0)) :: a1 + real(kind(1.d0)) :: x,y,z + a1=1.d0 +end function a1 +function a2(x,y,z) + real(kind(1.d0)) :: a2 + real(kind(1.d0)) :: x,y,z + a2=2.d1*y +end function a2 +function a3(x,y,z) + real(kind(1.d0)) :: a3 + real(kind(1.d0)) :: x,y,z + a3=1.d0 +end function a3 +function a4(x,y,z) + real(kind(1.d0)) :: a4 + real(kind(1.d0)) :: x,y,z + a4=1.d0 +end function a4 +function b1(x,y,z) + real(kind(1.d0)) :: b1 + real(kind(1.d0)) :: x,y,z + b1=1.d0 +end function b1 +function b2(x,y,z) + real(kind(1.d0)) :: b2 + real(kind(1.d0)) :: x,y,z + b2=1.d0 +end function b2 +function b3(x,y,z) + real(kind(1.d0)) :: b3 + real(kind(1.d0)) :: x,y,z + b3=1.d0 +end function b3 + + diff --git a/test/pargen/ppde90log.f90 b/test/pargen/ppde90log.f90 new file mode 100644 index 00000000..11564d1b --- /dev/null +++ b/test/pargen/ppde90log.f90 @@ -0,0 +1,682 @@ +! +! This sample program shows how to build and solve a sparse linear +! +! The program solves a linear system based on the partial differential +! equation +! +! +! +! the equation generated is: +! b1 d d (u) b2 d d (u) a1 d (u)) a2 d (u))) +! - ------ - ------ + ----- + ------ + a3 u = 0 +! dx dx dy dy dx dy +! +! +! with Dirichlet boundary conditions on the unit cube +! +! 0<=x,y,z<=1 +! +! The equation is discretized with finite differences and uniform stepsize; +! the resulting discrete equation is +! +! ( u(x,y,z)(2b1+2b2+a1+a2)+u(x-1,y)(-b1-a1)+u(x,y-1)(-b2-a2)+ +! -u(x+1,y)b1-u(x,y+1)b2)*(1/h**2) +! +! Example taken from: C.T.Kelley +! Iterative Methods for Linear and Nonlinear Equations +! SIAM 1995 +! +! +! In this sample program the index space of the discretized +! computational domain is first numbered sequentially in a standard way, +! then the corresponding vector is distributed according to an HPF BLOCK +! distribution directive. +! +! Boundary conditions are set in a very simple way, by adding +! equations of the form +! +! u(x,y) = rhs(x,y) +! +Program PDE90 + USE TYPESP + USE TYPEDESC + USE TYPEPREC + USE F90TOOLS + USE F90METHD + USE F90PREC + use mpi + Implicit none + + interface + !.....user passed subroutine..... + subroutine part_block(glob_index,n,np,pv,nv) + INTEGER, INTENT(IN) :: GLOB_INDEX, N, NP + INTEGER, INTENT(OUT) :: NV + INTEGER, INTENT(OUT) :: PV(*) + end subroutine part_block + end interface + ! input parameters + Character :: CMETHD*10, PREC*10, AFMT*5 + Integer :: IDIM, IRET + + ! Miscellaneous + Integer, Parameter :: IZERO=0, IONE=1 + Character, PARAMETER :: ORDER='R' + INTEGER :: IARGC,CONVERT_DESCR,dim, CHECK_DESCR + REAL(KIND(1.D0)), PARAMETER :: DZERO = 0.D0, ONE = 1.D0 + REAL(KIND(1.D0)) :: T1, T2, TPREC, TSOLVE, T3, T4 +!!$ EXTERNAL MPI_WTIME + integer mpe_log_get_event_number,mpe_Describe_state,mpe_log_event + + ! Sparse Matrix and preconditioner + TYPE(D_SPMAT) :: A, L, U, H + TYPE(PREC_DATA) :: PRE + ! Descriptor + TYPE(desc_type) :: DESC_A, DESC_A_OUT + ! Dense Matrices + REAL(KIND(1.d0)), POINTER :: B(:), X(:), D(:), LD(:) + INTEGER, pointer :: WORK(:) + ! BLACS parameters + INTEGER :: nprow, npcol, icontxt, iam, np, myprow, mypcol + + ! Solver parameters + INTEGER :: ITER, ITMAX,IERR,ITRACE, METHD,IPREC, ISTOPC,& + & IPARM(20), ML + REAL(KIND(1.D0)) :: ERR, EPS, RPARM(20) + + ! Other variables + INTEGER :: I,INFO,iprecb,iprece,islvb,islve + INTEGER :: INTERNAL, M,II + + ! Initialize BLACS + CALL BLACS_PINFO(IAM, NP) + CALL BLACS_GET(IZERO, IZERO, ICONTXT) + iprecb = mpe_log_get_event_number() + iprece = mpe_log_get_event_number() + islvb = mpe_log_get_event_number() + islve = mpe_log_get_event_number() + if (iam==0) then + info = mpe_describe_state(iprecb,iprece,"Preconditioner","OrangeRed") + info = mpe_describe_state(islvb,islve,"Solver","DarkGreen") + endif + + + ! Rectangular Grid, P x 1 + + CALL BLACS_GRIDINIT(ICONTXT, ORDER, NP, IONE) + CALL BLACS_GRIDINFO(ICONTXT, NPROW, NPCOL, MYPROW, MYPCOL) + + + ! + ! Get parameters + ! + CALL GET_PARMS(ICONTXT,CMETHD,PREC,AFMT,IDIM,ISTOPC,ITMAX,ITRACE,ML) + + ! + ! Allocate and fill in the coefficient matrix, RHS and initial guess + ! + + CALL BLACS_BARRIER(ICONTXT,'All') + T1 = MPI_WTIME() + CALL CREATE_MATRIX(IDIM,A,B,X,DESC_A,PART_BLOCK,ICONTXT,AFMT) + T2 = MPI_WTIME() - T1 + + DIM=SIZE(A%ASPK) + + ALLOCATE(H%ASPK(DIM),H%IA1(DIM),H%IA2(DIM),H%PL(SIZE(A%PL)),& + & H%PL(SIZE(A%PL)),D(SIZE(A%PL)),& + & DESC_A_OUT%MATRIX_DATA(SIZE(DESC_A%MATRIX_DATA)),& + & DESC_A_OUT%HALO_INDEX(SIZE(DESC_A%HALO_INDEX)),& + & DESC_A_OUT%OVRLAP_INDEX(SIZE(DESC_A%OVRLAP_INDEX)),& + & DESC_A_OUT%OVRLAP_ELEM(SIZE(DESC_A%OVRLAP_ELEM)),& + & DESC_A_OUT%LOC_TO_GLOB(SIZE(DESC_A%LOC_TO_GLOB)),& + & DESC_A_OUT%GLOB_TO_LOC(SIZE(DESC_A%GLOB_TO_LOC)), WORK(1024)) + check_descr=15 +! work(5)=9 +!!$ WRITE(0,*)'CALLING VERIFY' +!!$ CALL F90_PSVERIFY(D,A,DESC_A,CHECK_DESCR,CONVERT_DESCR,H,& +!!$ & DESC_A_OUT,WORK) +!!$ WRITE(0,*)'VERIFY DONE',CONVERT_DESCR + + deallocate(work) + + CALL DGAMX2D(ICONTXT,'A',' ',IONE, IONE,T2,IONE,T1,T1,-1,-1,-1) + IF (IAM.EQ.0) Write(6,*) 'Matrix creation Time : ',T2 + + ! + ! Prepare the preconditioner. + ! + write(0,*)'PRECONDIZIONATORE=',prec + SELECT CASE (PREC) + CASE ('SCHW6') + IPREC = 6 + CASE ('SCHW5') + IPREC = 5 + CASE ('SCHW4') + IPREC = 4 + CASE ('SCHW3') + IPREC = 3 + CASE ('ILU') + IPREC = 2 + CASE ('DIAGSC') + IPREC = 1 + CASE ('NONE') + IPREC = 0 + CASE DEFAULT + WRITE(0,*) 'Unknown preconditioner' + CALL BLACS_ABORT(ICONTXT,-1) + END SELECT + pre%prec=iprec + CALL BLACS_BARRIER(ICONTXT,'All') + T1 = MPI_WTIME() + info = MPE_Log_event( iprecb, 0, "start Precond" ) + + CALL PRECONDITIONER(A,PRE,DESC_A,IRET) +!!$ CALL PRECONDITIONER(IPREC,A,L,U,D,DESC_A,IRET) + info = MPE_Log_event( iprece, 0, "end Precond" ) + TPREC = MPI_WTIME()-T1 + + CALL DGAMX2D(icontxt,'A',' ',IONE, IONE,TPREC,IONE,t1,t1,-1,-1,-1) + + IF (IAM.EQ.0) WRITE(6,*) 'Preconditioner Time : ',TPREC + + IF (IRET.NE.0) THEN + WRITE(0,*) 'Error on preconditioner',IRET + CALL BLACS_ABORT(ICONTXT,-1) + STOP + END IF + + ! + ! Iterative method parameters + ! + write(*,*) 'Calling Iterative method', size(b),ml + CALL BLACS_BARRIER(ICONTXT,'All') + T1 = MPI_WTIME() + EPS = 1.D-9 + info = MPE_Log_event( islvb, 0, "start Solver" ) + IF (CMETHD.EQ.'BICGSTAB') THEN + CALL F90_BICGSTAB(A,PRE,B,X,EPS,DESC_A,& + & ITMAX,ITER,ERR,IERR,ITRACE) +!!$ ELSE IF (CMETHD.EQ.'BICG') THEN +!!$ CALL F90_BICG(A,PRE,B,X,EPS,DESC_A,& +!!$ & ITMAX,ITER,ERR,IERR,ITRACE) +!!$ ELSE IF (CMETHD.EQ.'CGS') THEN +!!$ CALL F90_CGS(A,PRE,B,X,EPS,DESC_A,& +!!$ & ITMAX,ITER,ERR,IERR,ITRACE) +!!$ ELSE IF (CMETHD.EQ.'BICGSTABL') THEN +!!$ CALL F90_BICGSTABL(A,PRE,B,X,EPS,DESC_A,& +!!$ & ITMAX,ITER,ERR,IERR,ITRACE,ML) + ELSE + write(0,*) 'Unknown method ',cmethd + end IF + info = MPE_Log_event( islve, 0, "end Solver" ) + CALL BLACS_BARRIER(ICONTXT,'All') + T2 = MPI_WTIME() - T1 + CALL DGAMX2D(ICONTXT,'A',' ',IONE, IONE,T2,IONE,T1,T1,-1,-1,-1) + + IF (IAM.EQ.0) THEN + WRITE(6,*) 'Time to Solve Matrix : ',T2 + WRITE(6,*) 'Time per iteration : ',T2/ITER + WRITE(6,*) 'Number of iterations : ',ITER + WRITE(6,*) 'Error on exit : ',ERR + WRITE(6,*) 'INFO on exit : ',IERR + END IF + + ! + ! Cleanup storage and exit + ! + CALL F90_PSDSFREE(B,DESC_A) + CALL F90_PSDSFREE(X,DESC_A) +!!$ CALL F90_PSDSFREE(D,DESC_A) + + CALL F90_PSSPFREE(A,DESC_A) +!!$ CALL F90_PSSPFREE(L,DESC_A) +!!$ CALL F90_PSSPFREE(U,DESC_A) + CALL F90_PSDSCFREE(DESC_A,info) + + CALL BLACS_GRIDEXIT(ICONTXT) + CALL BLACS_EXIT(0) + + STOP + +CONTAINS + ! + ! Get iteration parameters from the command line + ! + SUBROUTINE GET_PARMS(ICONTXT,CMETHD,PREC,AFMT,IDIM,ISTOPC,ITMAX,ITRACE,ML) + integer :: icontxt + Character :: CMETHD*10, PREC*10, AFMT*5 + Integer :: IDIM, IRET, ISTOPC,ITMAX,ITRACE,ML + Character*40 :: CHARBUF + INTEGER :: IARGC, NPROW, NPCOL, MYPROW, MYPCOL + EXTERNAL IARGC + INTEGER :: INTBUF(10), IP + + CALL BLACS_GRIDINFO(ICONTXT, NPROW, NPCOL, MYPROW, MYPCOL) + + IF (MYPROW==0) THEN + READ(*,*) IP + IF (IP.GE.3) THEN + READ(*,*) CMETHD + READ(*,*) PREC + READ(*,*) AFMT + + ! Convert strings in array + DO I = 1, LEN(CMETHD) + INTBUF(I) = IACHAR(CMETHD(I:I)) + END DO + ! Broadcast parameters to all processors + CALL IGEBS2D(ICONTXT,'ALL',' ',10,1,INTBUF,10) + + DO I = 1, LEN(PREC) + INTBUF(I) = IACHAR(PREC(I:I)) + END DO + ! Broadcast parameters to all processors + CALL IGEBS2D(ICONTXT,'ALL',' ',10,1,INTBUF,10) + + DO I = 1, LEN(AFMT) + INTBUF(I) = IACHAR(AFMT(I:I)) + END DO + ! Broadcast parameters to all processors + CALL IGEBS2D(ICONTXT,'ALL',' ',10,1,INTBUF,10) + + READ(*,*) IDIM + IF (IP.GE.4) THEN + READ(*,*) ISTOPC + ELSE + ISTOPC=1 + ENDIF + IF (IP.GE.5) THEN + READ(*,*) ITMAX + ELSE + ITMAX=500 + ENDIF + IF (IP.GE.6) THEN + READ(*,*) ITRACE + ELSE + ITRACE=-1 + ENDIF + IF (IP.GE.7) THEN + READ(*,*) ML + ELSE + ML=1 + ENDIF + ! Broadcast parameters to all processors + + INTBUF(1) = IDIM + INTBUF(2) = ISTOPC + INTBUF(3) = ITMAX + INTBUF(4) = ITRACE + INTBUF(5) = ML + CALL IGEBS2D(ICONTXT,'ALL',' ',5,1,INTBUF,5) + + WRITE(6,*)'Solving matrix: ELL1' + WRITE(6,*)'on grid',IDIM,'x',IDIM,'x',IDIM + WRITE(6,*)' with BLOCK data distribution, NP=',Np,& + & ' Preconditioner=',PREC,& + & ' Iterative methd=',CMETHD + ELSE + ! Wrong number of parameter, print an error message and exit + CALL PR_USAGE(0) + CALL BLACS_ABORT(ICONTXT,-1) + STOP 1 + ENDIF + ELSE + ! Receive Parameters + CALL IGEBR2D(ICONTXT,'ALL',' ',10,1,INTBUF,10,0,0) + DO I = 1, 10 + CMETHD(I:I) = ACHAR(INTBUF(I)) + END DO + CALL IGEBR2D(ICONTXT,'ALL',' ',10,1,INTBUF,10,0,0) + DO I = 1, 10 + PREC(I:I) = ACHAR(INTBUF(I)) + END DO + CALL IGEBR2D(ICONTXT,'ALL',' ',10,1,INTBUF,10,0,0) + DO I = 1, 5 + AFMT(I:I) = ACHAR(INTBUF(I)) + END DO + CALL IGEBR2D(ICONTXT,'ALL',' ',5,1,INTBUF,5,0,0) + IDIM = INTBUF(1) + ISTOPC = INTBUF(2) + ITMAX = INTBUF(3) + ITRACE = INTBUF(4) + ML = INTBUF(5) + END IF + RETURN + + END SUBROUTINE GET_PARMS + ! + ! Print an error message + ! + SUBROUTINE PR_USAGE(IOUT) + INTEGER :: IOUT + WRITE(IOUT,*)'Incorrect parameter(s) found' + WRITE(IOUT,*)' Usage: pde90 methd prec dim & + &[istop itmax itrace]' + WRITE(IOUT,*)' Where:' + WRITE(IOUT,*)' methd: CGSTAB TFQMR CGS' + WRITE(IOUT,*)' prec : ILU DIAGSC NONE' + WRITE(IOUT,*)' dim number of points along each axis' + WRITE(IOUT,*)' the size of the resulting linear ' + WRITE(IOUT,*)' system is dim**3' + WRITE(IOUT,*)' istop Stopping criterion 1, 2 or 3 [1] ' + WRITE(IOUT,*)' itmax Maximum number of iterations [500] ' + WRITE(IOUT,*)' itrace 0 (no tracing, default) or ' + WRITE(IOUT,*)' >= 0 do tracing every ITRACE' + WRITE(IOUT,*)' iterations ' + END SUBROUTINE PR_USAGE + +! +! Subroutine to allocate and fill in the coefficient matrix and +! the RHS. +! + SUBROUTINE CREATE_MATRIX(IDIM,A,B,T,DESC_A,PARTS,ICONTXT,AFMT) + ! + ! Discretize the partial diferential equation + ! + ! b1 dd(u) b2 dd(u) b3 dd(u) a1 d(u) a2 d(u) a3 d(u) + ! - ------ - ------ - ------ - ----- - ------ - ------ + a4 u + ! dxdx dydy dzdz dx dy dz + ! + ! = 0 + ! + ! boundary condition: Dirichlet + ! 0< x,y,z<1 + ! + ! u(x,y,z)(2b1+2b2+2b3+a1+a2+a3)+u(x-1,y,z)(-b1-a1)+u(x,y-1,z)(-b2-a2)+ + ! + u(x,y,z-1)(-b3-a3)-u(x+1,y,z)b1-u(x,y+1,z)b2-u(x,y,z+1)b3 + + USE TYPESP + USE TYPEDESC + USE F90TOOLS + USE F90METHD + Implicit None + INTEGER :: IDIM + integer, parameter :: nbmax=10 + Real(Kind(1.D0)),Pointer :: B(:),T(:) + Type (desc_type) :: DESC_A + Integer :: ICONTXT + INTERFACE + ! .....user passed subroutine..... + SUBROUTINE PARTS(GLOBAL_INDX,N,NP,PV,NV) + IMPLICIT NONE + INTEGER, INTENT(IN) :: GLOBAL_INDX, N, NP + INTEGER, INTENT(OUT) :: NV + INTEGER, INTENT(OUT) :: PV(*) + END SUBROUTINE PARTS + END INTERFACE ! Local variables + Type(D_SPMAT) :: A + Real(Kind(1.d0)) :: ZT(NBMAX),GLOB_X,GLOB_Y,GLOB_Z + Integer :: M,N,NNZ,GLOB_ROW,J + Type (D_SPMAT) :: ROW_MAT + Integer :: X,Y,Z,COUNTER,IA,I,INDX_OWNER + INTEGER :: NPROW,NPCOL,MYPROW,MYPCOL + Integer :: ELEMENT + INTEGER :: INFO, NV, INV + INTEGER, ALLOCATABLE :: PRV(:) + INTEGER, pointer :: ierrv(:) + Real(Kind(1.d0)), pointer :: DWORK(:) + INTEGER,POINTER :: IWORK(:) + character :: afmt*5 + ! deltah dimension of each grid cell + ! deltat discretization time + Real(Kind(1.D0)) :: DELTAH + Real(Kind(1.d0)),Parameter :: RHS=0.d0,ONE=1.d0,ZERO=0.d0 + Real(Kind(1.d0)) :: MPI_WTIME, T1, T2, T3, TINS + Real(Kind(1.d0)) :: a1, a2, a3, a4, b1, b2, b3 + external mpi_wtime,a1, a2, a3, a4, b1, b2, b3 + integer :: nb, ir1, ir2, ipr + logical :: own + ! common area + + + CALL BLACS_GRIDINFO(ICONTXT, NPROW, NPCOL, MYPROW, MYPCOL) + + DELTAH = 1.D0/(IDIM-1) + + ! Initialize array descriptor and sparse matrix storage. Provide an + ! estimate of the number of non zeroes + CALL SETERR(2) + allocate(ierrv(6)) + + ierrv(:) = 0 + M = IDIM*IDIM*IDIM + N = M + NNZ = ((N*9)/(NPROW*NPCOL)) + write(*,*) 'Size: n ',n + Call F90_PSDSCALL(N,N,PARTS,ICONTXT,IERRV,DESC_A) + write(*,*) 'Allocating A : nnz',nnz + Call F90_PSSPALL(A,IERRV,DESC_A,NNZ=NNZ) + ! Define RHS from boundary conditions; also build initial guess + write(*,*) 'Allocating B' + Call F90_PSDSALL(N,B,IERRV,DESC_A) + write(*,*) 'Allocating T' + Call F90_PSDSALL(N,T,IERRV,DESC_A) + + ! We build an auxiliary matrix consisting of one row at a + ! time; just a small matrix. Might be extended to generate + ! a bunch of rows per call. + ! + ROW_MAT%DESCRA(1:1) = 'G' + ROW_MAT%FIDA = 'CSR' + write(*,*) 'Allocating ROW_MAT',20*nbmax + ALLOCATE(ROW_MAT%ASPK(20*nbmax),ROW_MAT%IA1(20*nbmax),& + &ROW_MAT%IA2(20*nbmax),PRV(NPROW),stat=info) + if (info.ne.0 ) then + write(*,*) 'Memory allocation error' + call blacs_abort(icontxt,-1) + endif + + TINS = 0.D0 + CALL BLACS_BARRIER(ICONTXT,'ALL') + T1 = MPI_WTIME() + + ! Loop over rows belonging to current process in a BLOCK + ! distribution. + + ROW_MAT%IA2(1)=1 + DO GLOB_ROW = 1, N + CALL PARTS(GLOB_ROW,N,NPROW,PRV,NV) + DO INV = 1, NV + INDX_OWNER = PRV(INV) + IF (INDX_OWNER == MYPROW) THEN + ! Local matrix pointer + ELEMENT=1 + ! Compute gridpoint Coordinates + IF (MOD(GLOB_ROW,(IDIM*IDIM)).EQ.0) THEN + X = GLOB_ROW/(IDIM*IDIM) + ELSE + X = GLOB_ROW/(IDIM*IDIM)+1 + ENDIF + IF (MOD((GLOB_ROW-(X-1)*IDIM*IDIM),IDIM).EQ.0) THEN + Y = (GLOB_ROW-(X-1)*IDIM*IDIM)/IDIM + ELSE + Y = (GLOB_ROW-(X-1)*IDIM*IDIM)/IDIM+1 + ENDIF + Z = GLOB_ROW-(X-1)*IDIM*IDIM-(Y-1)*IDIM + ! GLOB_X, GLOB_Y, GLOB_X coordinates + GLOB_X=X*DELTAH + GLOB_Y=Y*DELTAH + GLOB_Z=Z*DELTAH + + ! Check on boundary points + IF (X.EQ.1) THEN + ROW_MAT%ASPK(ELEMENT)=ONE + ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM*IDIM+(Y-1)*IDIM+(Z) + ELEMENT=ELEMENT+1 + ELSE IF (Y.EQ.1) THEN + ROW_MAT%ASPK(ELEMENT)=ONE + ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM*IDIM+(Y-1)*IDIM+(Z) + ELEMENT=ELEMENT+1 + ELSE IF (Z.EQ.1) THEN + ROW_MAT%ASPK(ELEMENT)=ONE + ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM*IDIM+(Y-1)*IDIM+(Z) + ELEMENT=ELEMENT+1 + ELSE IF (X.EQ.IDIM) THEN + ROW_MAT%ASPK(ELEMENT)=ONE + ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM*IDIM+(Y-1)*IDIM+(Z) + ELEMENT=ELEMENT+1 + ELSE IF (Y.EQ.IDIM) THEN + ROW_MAT%ASPK(ELEMENT)=ONE + ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM*IDIM+(Y-1)*IDIM+(Z) + ELEMENT=ELEMENT+1 + ELSE IF (Z.EQ.IDIM) THEN + ROW_MAT%ASPK(ELEMENT)=ONE + ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM*IDIM+(Y-1)*IDIM+(Z) + ELEMENT=ELEMENT+1 + ELSE + ! Internal point: build discretization + ! + ! Term depending on (x-1,y,z) + ! + ROW_MAT%ASPK(ELEMENT)=-B1(GLOB_X,GLOB_Y,GLOB_Z)& + & -A1(GLOB_X,GLOB_Y,GLOB_Z) + ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& + & DELTAH) + ROW_MAT%IA1(ELEMENT)=(X-2)*IDIM*IDIM+(Y-1)*IDIM+(Z) + ELEMENT=ELEMENT+1 + ! Term depending on (x,y-1,z) + ROW_MAT%ASPK(ELEMENT)=-B2(GLOB_X,GLOB_Y,GLOB_Z)& + & -A2(GLOB_X,GLOB_Y,GLOB_Z) + ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& + & DELTAH) + ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM*IDIM+(Y-2)*IDIM+(Z) + ELEMENT=ELEMENT+1 + ! Term depending on (x,y,z-1) + ROW_MAT%ASPK(ELEMENT)=-B3(GLOB_X,GLOB_Y,GLOB_Z)& + & -A3(GLOB_X,GLOB_Y,GLOB_Z) + ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& + & DELTAH) + ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM*IDIM+(Y-1)*IDIM+(Z-1) + ELEMENT=ELEMENT+1 + ! Term depending on (x,y,z) + ROW_MAT%ASPK(ELEMENT)=2*B1(GLOB_X,GLOB_Y,GLOB_Z)& + & +2*B2(GLOB_X,GLOB_Y,GLOB_Z)& + & +2*B3(GLOB_X,GLOB_Y,GLOB_Z)& + & +A1(GLOB_X,GLOB_Y,GLOB_Z)& + & +A2(GLOB_X,GLOB_Y,GLOB_Z)& + & +A3(GLOB_X,GLOB_Y,GLOB_Z) + ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& + & DELTAH) + ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM*IDIM+(Y-1)*IDIM+(Z) + ELEMENT=ELEMENT+1 + ! Term depending on (x,y,z+1) + ROW_MAT%ASPK(ELEMENT)=-B1(GLOB_X,GLOB_Y,GLOB_Z) + ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& + & DELTAH) + ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM*IDIM+(Y-1)*IDIM+(Z+1) + ELEMENT=ELEMENT+1 + ! Term depending on (x,y+1,z) + ROW_MAT%ASPK(ELEMENT)=-B2(GLOB_X,GLOB_Y,GLOB_Z) + ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& + & DELTAH) + ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM*IDIM+(Y)*IDIM+(Z) + ELEMENT=ELEMENT+1 + ! Term depending on (x+1,y,z) + ROW_MAT%ASPK(ELEMENT)=-B3(GLOB_X,GLOB_Y,GLOB_Z) + ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& + & DELTAH) + ROW_MAT%IA1(ELEMENT)=(X)*IDIM*IDIM+(Y-1)*IDIM+(Z) + ELEMENT=ELEMENT+1 + ENDIF + ROW_MAT%M=1 + ROW_MAT%K=N + ROW_MAT%IA2(2)=ELEMENT + ! IA== GLOBAL ROW INDEX + IA=GLOB_ROW +!!$ IA=(X-1)*IDIM*IDIM+(Y-1)*IDIM+(Z) +!!$ write(0,*) 'Inserting row ',ia,' On proc',myprow + T3 = MPI_WTIME() + CALL F90_PSSPINS(A,IA,1,ROW_MAT,IERRV,DESC_A) + if (ierrv(1).ne.0) then + write(0,*) 'On row ',ia,' IERRV:',ierrv(:) + endif + TINS = TINS + (MPI_WTIME()-T3) + ! Build RHS + IF (X==1) THEN + GLOB_Y=(Y-IDIM/2)*DELTAH + GLOB_Z=(Z-IDIM/2)*DELTAH + ZT(1) = EXP(-GLOB_Y**2-GLOB_Z**2) + ELSE IF ((Y==1).OR.(Y==IDIM).OR.(Z==1).OR.(Z==IDIM)) THEN + GLOB_X=3*(X-1)*DELTAH + GLOB_Y=(Y-IDIM/2)*DELTAH + GLOB_Z=(Z-IDIM/2)*DELTAH + ZT(1) = EXP(-GLOB_Y**2-GLOB_Z**2)*EXP(-GLOB_X) + ELSE + ZT(1) = 0.D0 + ENDIF + CALL F90_PSDSINS(1,B,IA,ZT(1:1),IERRV,DESC_A) + ZT(1)=0.D0 + CALL F90_PSDSINS(1,T,IA,ZT(1:1),IERRV,DESC_A) + END IF + END DO + END DO + + CALL BLACS_BARRIER(ICONTXT,'ALL') + T2 = MPI_WTIME() + + WRITE(*,*) ' pspins time',TINS + WRITE(*,*) ' Insert time',(T2-T1) + + DEALLOCATE(ROW_MAT%ASPK,ROW_MAT%IA1,ROW_MAT%IA2) + + write(*,*) 'Calling SPASB' + CALL BLACS_BARRIER(ICONTXT,'ALL') + T1 = MPI_WTIME() + + CALL F90_PSSPASB(A,IERRV,DESC_A,AFMT=AFMT,DUP=2) + + CALL BLACS_BARRIER(ICONTXT,'ALL') + T2 = MPI_WTIME() + + WRITE(0,*) ' Assembly time',(T2-T1),' ',a%fida(1:4) + + CALL F90_PSDSASB(B,IERRV,DESC_A) + CALL F90_PSDSASB(T,IERRV,DESC_A) + IF (MYPROW.EQ.0) THEN + WRITE(0,*) ' End CREATE_MATRIX' + ENDIF + RETURN + + END SUBROUTINE CREATE_MATRIX +END PROGRAM PDE90 +! +! Functions parametrizing the differential equation +! +FUNCTION A1(X,Y,Z) + REAL(KIND(1.D0)) :: A1 + REAL(KIND(1.D0)) :: X,Y,Z + A1=1.D0 +END FUNCTION A1 +FUNCTION A2(X,Y,Z) + REAL(KIND(1.D0)) :: A2 + REAL(KIND(1.D0)) :: X,Y,Z + A2=2.D1*Y +END FUNCTION A2 +FUNCTION A3(X,Y,Z) + REAL(KIND(1.D0)) :: A3 + REAL(KIND(1.D0)) :: X,Y,Z + A3=1.D0 +END FUNCTION A3 +FUNCTION A4(X,Y,Z) + REAL(KIND(1.D0)) :: A4 + REAL(KIND(1.D0)) :: X,Y,Z + A4=1.D0 +END FUNCTION A4 +FUNCTION B1(X,Y,Z) + REAL(KIND(1.D0)) :: B1 + REAL(KIND(1.D0)) :: X,Y,Z + B1=1.D0 +END FUNCTION B1 +FUNCTION B2(X,Y,Z) + REAL(KIND(1.D0)) :: B2 + REAL(KIND(1.D0)) :: X,Y,Z + B2=1.D0 +END FUNCTION B2 +FUNCTION B3(X,Y,Z) + REAL(KIND(1.D0)) :: B3 + REAL(KIND(1.D0)) :: X,Y,Z + B3=1.D0 +END FUNCTION B3 + + diff --git a/test/pargen/ppde90s.f90 b/test/pargen/ppde90s.f90 new file mode 100644 index 00000000..eb1e3d22 --- /dev/null +++ b/test/pargen/ppde90s.f90 @@ -0,0 +1,707 @@ +! +! This sample program shows how to build and solve a sparse linear +! +! The program solves a linear system based on the partial differential +! equation +! +! +! +! the equation generated is: +! b1 d d (u) b2 d d (u) a1 d (u)) a2 d (u))) +! - ------ - ------ + ----- + ------ + a3 u = 0 +! dx dx dy dy dx dy +! +! +! with Dirichlet boundary conditions on the unit cube +! +! 0<=x,y,z<=1 +! +! The equation is discretized with finite differences and uniform stepsize; +! the resulting discrete equation is +! +! ( u(x,y,z)(2b1+2b2+a1+a2)+u(x-1,y)(-b1-a1)+u(x,y-1)(-b2-a2)+ +! -u(x+1,y)b1-u(x,y+1)b2)*(1/h**2) +! +! Example taken from: C.T.Kelley +! Iterative Methods for Linear and Nonlinear Equations +! SIAM 1995 +! +! +! In this sample program the index space of the discretized +! computational domain is first numbered sequentially in a standard way, +! then the corresponding vector is distributed according to an HPF BLOCK +! distribution directive. +! +! Boundary conditions are set in a very simple way, by adding +! equations of the form +! +! u(x,y) = rhs(x,y) +! +Program PDE90 + USE F90SPARSE + Implicit none + + interface + !.....user passed subroutine..... + subroutine part_block(glob_index,n,np,pv,nv) + INTEGER, INTENT(IN) :: GLOB_INDEX, N, NP + INTEGER, INTENT(OUT) :: NV + INTEGER, INTENT(OUT) :: PV(*) + end subroutine part_block + end interface + ! input parameters + Character :: CMETHD*10, PREC*10, AFMT*5 + Integer :: IDIM, IRET + + ! Miscellaneous + Integer, Parameter :: IZERO=0, IONE=1 + Character, PARAMETER :: ORDER='R' + INTEGER :: IARGC,CONVERT_DESCR,dim, CHECK_DESCR + REAL(KIND(1.D0)), PARAMETER :: DZERO = 0.D0, ONE = 1.D0 + REAL(KIND(1.D0)) :: MPI_WTIME, T1, T2, TPREC, TSOLVE, T3, T4 + EXTERNAL MPI_WTIME + + ! Sparse Matrix and preconditioner + TYPE(D_SPMAT) :: A, L, U, H + TYPE(D_PREC) :: PRE + ! Descriptor + TYPE(desc_type) :: DESC_A, DESC_A_OUT + ! Dense Matrices + REAL(KIND(1.d0)), POINTER :: B(:), X(:), D(:),LD(:) + INTEGER, pointer :: WORK(:) + ! BLACS parameters + INTEGER :: nprow, npcol, icontxt, iam, np, myprow, mypcol + + ! Solver parameters + INTEGER :: ITER, ITMAX,IERR,ITRACE, METHD,IPREC, ISTOPC,& + & IPARM(20), ML + REAL(KIND(1.D0)) :: ERR, EPS, RPARM(20) + + ! Other variables + INTEGER :: I,INFO + INTEGER :: INTERNAL, M,II + + ! Initialize BLACS + CALL BLACS_PINFO(IAM, NP) + CALL BLACS_GET(IZERO, IZERO, ICONTXT) + + ! Rectangular Grid, P x 1 + + CALL BLACS_GRIDINIT(ICONTXT, ORDER, NP, IONE) + CALL BLACS_GRIDINFO(ICONTXT, NPROW, NPCOL, MYPROW, MYPCOL) + + ! + ! Get parameters + ! + CALL GET_PARMS(ICONTXT,CMETHD,PREC,AFMT,IDIM,ISTOPC,ITMAX,ITRACE,ML) + + ! + ! Allocate and fill in the coefficient matrix, RHS and initial guess + ! + + CALL BLACS_BARRIER(ICONTXT,'All') + T1 = MPI_WTIME() + CALL CREATE_MATRIX(IDIM,A,B,X,DESC_A,PART_BLOCK,ICONTXT,AFMT) + T2 = MPI_WTIME() - T1 + + DIM=SIZE(A%ASPK) + + ALLOCATE(H%ASPK(DIM),H%IA1(DIM),H%IA2(DIM),H%PL(SIZE(A%PL)),& + & H%PL(SIZE(A%PL)),D(SIZE(A%PL)),& + & DESC_A_OUT%MATRIX_DATA(SIZE(DESC_A%MATRIX_DATA)),& + & DESC_A_OUT%HALO_INDEX(SIZE(DESC_A%HALO_INDEX)),& + & DESC_A_OUT%OVRLAP_INDEX(SIZE(DESC_A%OVRLAP_INDEX)),& + & DESC_A_OUT%OVRLAP_ELEM(SIZE(DESC_A%OVRLAP_ELEM)),& + & DESC_A_OUT%LOC_TO_GLOB(SIZE(DESC_A%LOC_TO_GLOB)),& + & DESC_A_OUT%GLOB_TO_LOC(SIZE(DESC_A%GLOB_TO_LOC)), WORK(1024)) + check_descr=15 +! work(5)=9 +!!$ WRITE(0,*)'CALLING VERIFY' +!!$ CALL F90_PSVERIFY(D,A,DESC_A,CHECK_DESCR,CONVERT_DESCR,H,& +!!$ & DESC_A_OUT,WORK) +!!$ WRITE(0,*)'VERIFY DONE',CONVERT_DESCR + + deallocate(work) + + CALL DGAMX2D(ICONTXT,'A',' ',IONE, IONE,T2,IONE,T1,T1,-1,-1,-1) + IF (IAM.EQ.0) Write(6,*) 'Matrix creation Time : ',T2 + + ! + ! Prepare the preconditioner. + ! + write(0,*)'PRECONDIZIONATORE=',prec + SELECT CASE (PREC) + CASE ('SCHW6') + IPREC = 6 + CASE ('SCHW5') + IPREC = 5 + CASE ('SCHW4') + IPREC = 4 + CASE ('SCHW3') + IPREC = 3 + CASE ('ILU') + IPREC = 2 + CASE ('DIAGSC') + IPREC = 1 + CASE ('NONE') + IPREC = 0 + CASE DEFAULT + WRITE(0,*) 'Unknown preconditioner' + CALL BLACS_ABORT(ICONTXT,-1) + END SELECT + pre%prec=iprec + pre%n_ovr=ml + pre%irenum=0 + CALL BLACS_BARRIER(ICONTXT,'All') + T1 = MPI_WTIME() + CALL PRECONDITIONER(A,PRE,DESC_A,IRET) +!!$ CALL PRECONDITIONER(IPREC,A,L,U,D,DESC_A,IRET) + TPREC = MPI_WTIME()-T1 + + CALL DGAMX2D(icontxt,'A',' ',IONE, IONE,TPREC,IONE,t1,t1,-1,-1,-1) + + IF (IAM.EQ.0) WRITE(6,*) 'Preconditioner Time : ',TPREC + + IF (IRET.NE.0) THEN + WRITE(0,*) 'Error on preconditioner',IRET + CALL BLACS_ABORT(ICONTXT,-1) + STOP + END IF + + ! + ! Iterative method parameters + ! + call dcsprt90(80+myprow,a,head='% Local A') + + write(*,*) 'Calling Iterative method', size(b),ml + CALL BLACS_BARRIER(ICONTXT,'All') + T1 = MPI_WTIME() + EPS = 1.D-9 + IF (CMETHD.EQ.'BICGSTAB') THEN + CALL F90_BICGSTAB(A,PRE,B,X,EPS,DESC_A,& + & ITMAX,ITER,ERR,IERR,ITRACE) +!!$ ELSE IF (CMETHD.EQ.'BICG') THEN +!!$ CALL F90_BICG(A,PRE,B,X,EPS,DESC_A,& +!!$ & ITMAX,ITER,ERR,IERR,ITRACE) + ELSE IF (CMETHD.EQ.'CGS') THEN + CALL F90_CGS(A,PRE,B,X,EPS,DESC_A,& + & ITMAX,ITER,ERR,IERR,ITRACE) + ELSE IF (CMETHD.EQ.'BICGSTABL') THEN + CALL F90_BICGSTABL(A,PRE,B,X,EPS,DESC_A,& + & ITMAX,ITER,ERR,IERR,ITRACE,ML) + ELSE + write(0,*) 'Unknown method ',cmethd + end IF + + CALL BLACS_BARRIER(ICONTXT,'All') + T2 = MPI_WTIME() - T1 + CALL DGAMX2D(ICONTXT,'A',' ',IONE, IONE,T2,IONE,T1,T1,-1,-1,-1) + + IF (IAM.EQ.0) THEN + WRITE(6,*) 'Time to Solve Matrix : ',T2 + WRITE(6,*) 'Time per iteration : ',T2/ITER + WRITE(6,*) 'Number of iterations : ',ITER + WRITE(6,*) 'Error on exit : ',ERR + WRITE(6,*) 'INFO on exit : ',IERR + END IF + + ! + ! Cleanup storage and exit + ! + CALL F90_PSDSFREE(B,DESC_A) + CALL F90_PSDSFREE(X,DESC_A) +!!$ CALL F90_PSDSFREE(D,DESC_A) + + CALL F90_PSSPFREE(A,DESC_A) +!!$ CALL F90_PSSPFREE(L,DESC_A) +!!$ CALL F90_PSSPFREE(U,DESC_A) + CALL F90_PSDSCFREE(DESC_A,info) + + CALL BLACS_GRIDEXIT(ICONTXT) + CALL BLACS_EXIT(0) + + STOP + +CONTAINS + ! + ! Get iteration parameters from the command line + ! + SUBROUTINE GET_PARMS(ICONTXT,CMETHD,PREC,AFMT,IDIM,ISTOPC,ITMAX,ITRACE,ML) + integer :: icontxt + Character :: CMETHD*10, PREC*10, AFMT*5 + Integer :: IDIM, IRET, ISTOPC,ITMAX,ITRACE,ML + Character*40 :: CHARBUF + INTEGER :: IARGC, NPROW, NPCOL, MYPROW, MYPCOL + EXTERNAL IARGC + INTEGER :: INTBUF(10), IP + + CALL BLACS_GRIDINFO(ICONTXT, NPROW, NPCOL, MYPROW, MYPCOL) + + IF (MYPROW==0) THEN + READ(*,*) IP + IF (IP.GE.3) THEN + READ(*,*) CMETHD + READ(*,*) PREC + READ(*,*) AFMT + + ! Convert strings in array + DO I = 1, LEN(CMETHD) + INTBUF(I) = IACHAR(CMETHD(I:I)) + END DO + ! Broadcast parameters to all processors + CALL IGEBS2D(ICONTXT,'ALL',' ',10,1,INTBUF,10) + + DO I = 1, LEN(PREC) + INTBUF(I) = IACHAR(PREC(I:I)) + END DO + ! Broadcast parameters to all processors + CALL IGEBS2D(ICONTXT,'ALL',' ',10,1,INTBUF,10) + + DO I = 1, LEN(AFMT) + INTBUF(I) = IACHAR(AFMT(I:I)) + END DO + ! Broadcast parameters to all processors + CALL IGEBS2D(ICONTXT,'ALL',' ',10,1,INTBUF,10) + + READ(*,*) IDIM + IF (IP.GE.4) THEN + READ(*,*) ISTOPC + ELSE + ISTOPC=1 + ENDIF + IF (IP.GE.5) THEN + READ(*,*) ITMAX + ELSE + ITMAX=500 + ENDIF + IF (IP.GE.6) THEN + READ(*,*) ITRACE + ELSE + ITRACE=-1 + ENDIF + IF (IP.GE.7) THEN + READ(*,*) ML + ELSE + ML=1 + ENDIF + ! Broadcast parameters to all processors + + INTBUF(1) = IDIM + INTBUF(2) = ISTOPC + INTBUF(3) = ITMAX + INTBUF(4) = ITRACE + INTBUF(5) = ML + CALL IGEBS2D(ICONTXT,'ALL',' ',5,1,INTBUF,5) + + WRITE(6,*)'Solving matrix: ELL1' + WRITE(6,*)'on grid',IDIM,'x',IDIM,'x',IDIM + WRITE(6,*)' with BLOCK data distribution, NP=',Np,& + & ' Preconditioner=',PREC,& + & ' Iterative methd=',CMETHD + ELSE + ! Wrong number of parameter, print an error message and exit + CALL PR_USAGE(0) + CALL BLACS_ABORT(ICONTXT,-1) + STOP 1 + ENDIF + ELSE + ! Receive Parameters + CALL IGEBR2D(ICONTXT,'ALL',' ',10,1,INTBUF,10,0,0) + DO I = 1, 10 + CMETHD(I:I) = ACHAR(INTBUF(I)) + END DO + CALL IGEBR2D(ICONTXT,'ALL',' ',10,1,INTBUF,10,0,0) + DO I = 1, 10 + PREC(I:I) = ACHAR(INTBUF(I)) + END DO + CALL IGEBR2D(ICONTXT,'ALL',' ',10,1,INTBUF,10,0,0) + DO I = 1, 5 + AFMT(I:I) = ACHAR(INTBUF(I)) + END DO + CALL IGEBR2D(ICONTXT,'ALL',' ',5,1,INTBUF,5,0,0) + IDIM = INTBUF(1) + ISTOPC = INTBUF(2) + ITMAX = INTBUF(3) + ITRACE = INTBUF(4) + ML = INTBUF(5) + END IF + RETURN + + END SUBROUTINE GET_PARMS + ! + ! Print an error message + ! + SUBROUTINE PR_USAGE(IOUT) + INTEGER :: IOUT + WRITE(IOUT,*)'Incorrect parameter(s) found' + WRITE(IOUT,*)' Usage: pde90 methd prec dim & + &[istop itmax itrace]' + WRITE(IOUT,*)' Where:' + WRITE(IOUT,*)' methd: CGSTAB TFQMR CGS' + WRITE(IOUT,*)' prec : ILU DIAGSC NONE' + WRITE(IOUT,*)' dim number of points along each axis' + WRITE(IOUT,*)' the size of the resulting linear ' + WRITE(IOUT,*)' system is dim**3' + WRITE(IOUT,*)' istop Stopping criterion 1, 2 or 3 [1] ' + WRITE(IOUT,*)' itmax Maximum number of iterations [500] ' + WRITE(IOUT,*)' itrace 0 (no tracing, default) or ' + WRITE(IOUT,*)' >= 0 do tracing every ITRACE' + WRITE(IOUT,*)' iterations ' + END SUBROUTINE PR_USAGE + +! +! Subroutine to allocate and fill in the coefficient matrix and +! the RHS. +! + SUBROUTINE CREATE_MATRIX(IDIM,A,B,T,DESC_A,PARTS,ICONTXT,AFMT) + ! + ! Discretize the partial diferential equation + ! + ! b1 dd(u) b2 dd(u) b3 dd(u) a1 d(u) a2 d(u) a3 d(u) + ! - ------ - ------ - ------ - ----- - ------ - ------ + a4 u + ! dxdx dydy dzdz dx dy dz + ! + ! = 0 + ! + ! boundary condition: Dirichlet + ! 0< x,y,z<1 + ! + ! u(x,y,z)(2b1+2b2+2b3+a1+a2+a3)+u(x-1,y,z)(-b1-a1)+u(x,y-1,z)(-b2-a2)+ + ! + u(x,y,z-1)(-b3-a3)-u(x+1,y,z)b1-u(x,y+1,z)b2-u(x,y,z+1)b3 + + USE TYPESP + USE TYPEDESC + USE F90TOOLS + USE F90METHD + Implicit None + INTEGER :: IDIM + integer, parameter :: nbmax=10 + Real(Kind(1.D0)),Pointer :: B(:),T(:) + Type (desc_type) :: DESC_A + Integer :: ICONTXT + INTERFACE + ! .....user passed subroutine..... + SUBROUTINE PARTS(GLOBAL_INDX,N,NP,PV,NV) + IMPLICIT NONE + INTEGER, INTENT(IN) :: GLOBAL_INDX, N, NP + INTEGER, INTENT(OUT) :: NV + INTEGER, INTENT(OUT) :: PV(*) + END SUBROUTINE PARTS + END INTERFACE ! Local variables + Type(D_SPMAT) :: A + Real(Kind(1.d0)) :: ZT(NBMAX),GLOB_X,GLOB_Y,GLOB_Z + Integer :: M,N,NNZ,GLOB_ROW,J + Type (D_SPMAT) :: ROW_MAT + Integer :: X,Y,Z,COUNTER,IA,I,INDX_OWNER + INTEGER :: NPROW,NPCOL,MYPROW,MYPCOL + Integer :: ELEMENT + INTEGER :: INFO, NV, INV + INTEGER, ALLOCATABLE :: PRV(:) + INTEGER, pointer :: ierrv(:) + Real(Kind(1.d0)), pointer :: DWORK(:) + INTEGER,POINTER :: IWORK(:) + character :: afmt*5 + ! deltah dimension of each grid cell + ! deltat discretization time + Real(Kind(1.D0)) :: DELTAH + Real(Kind(1.d0)),Parameter :: RHS=0.d0,ONE=1.d0,ZERO=0.d0 + Real(Kind(1.d0)) :: MPI_WTIME, T1, T2, T3, TINS + Real(Kind(1.d0)) :: a1, a2, a3, a4, b1, b2, b3 + external mpi_wtime,a1, a2, a3, a4, b1, b2, b3 + integer :: nb, ir1, ir2, ipr + logical :: own + ! common area + + + CALL BLACS_GRIDINFO(ICONTXT, NPROW, NPCOL, MYPROW, MYPCOL) + + DELTAH = 1.D0/(IDIM-1) + + ! Initialize array descriptor and sparse matrix storage. Provide an + ! estimate of the number of non zeroes + CALL SETERR(2) + allocate(ierrv(6)) + + ierrv(:) = 0 + M = IDIM*IDIM*IDIM + N = M + NNZ = ((N*9)/(NPROW*NPCOL)) + write(*,*) 'Size: n ',n + Call F90_PSDSCALL(N,N,PARTS,ICONTXT,IERRV,DESC_A) + write(*,*) 'Allocating A : nnz',nnz + Call F90_PSSPALL(A,IERRV,DESC_A,NNZ=NNZ) + ! Define RHS from boundary conditions; also build initial guess + write(*,*) 'Allocating B' + Call F90_PSDSALL(N,B,IERRV,DESC_A) + write(*,*) 'Allocating T' + Call F90_PSDSALL(N,T,IERRV,DESC_A) + + ! We build an auxiliary matrix consisting of one row at a + ! time; just a small matrix. Might be extended to generate + ! a bunch of rows per call. + ! + ROW_MAT%DESCRA(1:1) = 'G' + ROW_MAT%FIDA = 'CSR' + write(*,*) 'Allocating ROW_MAT',20*nbmax + ALLOCATE(ROW_MAT%ASPK(20*nbmax),ROW_MAT%IA1(20*nbmax),& + &ROW_MAT%IA2(20*nbmax),PRV(NPROW),stat=info) + if (info.ne.0 ) then + write(*,*) 'Memory allocation error' + call blacs_abort(icontxt,-1) + endif + + TINS = 0.D0 + CALL BLACS_BARRIER(ICONTXT,'ALL') + T1 = MPI_WTIME() + + ! Loop over rows belonging to current process in a BLOCK + ! distribution. + + ROW_MAT%IA2(1)=1 + DO GLOB_ROW = 1, N + CALL PARTS(GLOB_ROW,N,NPROW,PRV,NV) + DO INV = 1, NV + INDX_OWNER = PRV(INV) + IF (INDX_OWNER == MYPROW) THEN + ! Local matrix pointer + ELEMENT=1 + ! Compute gridpoint Coordinates + IF (MOD(GLOB_ROW,(IDIM*IDIM)).EQ.0) THEN + X = GLOB_ROW/(IDIM*IDIM) + ELSE + X = GLOB_ROW/(IDIM*IDIM)+1 + ENDIF + IF (MOD((GLOB_ROW-(X-1)*IDIM*IDIM),IDIM).EQ.0) THEN + Y = (GLOB_ROW-(X-1)*IDIM*IDIM)/IDIM + ELSE + Y = (GLOB_ROW-(X-1)*IDIM*IDIM)/IDIM+1 + ENDIF + Z = GLOB_ROW-(X-1)*IDIM*IDIM-(Y-1)*IDIM + ! GLOB_X, GLOB_Y, GLOB_X coordinates + GLOB_X=X*DELTAH + GLOB_Y=Y*DELTAH + GLOB_Z=Z*DELTAH + + + ! Check on boundary points +!!$ IF (X.EQ.1) THEN +!!$ ROW_MAT%ASPK(ELEMENT)=ONE +!!$ ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM*IDIM+(Y-1)*IDIM+(Z) +!!$ ELEMENT=ELEMENT+1 +!!$ ELSE IF (Y.EQ.1) THEN +!!$ ROW_MAT%ASPK(ELEMENT)=ONE +!!$ ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM*IDIM+(Y-1)*IDIM+(Z) +!!$ ELEMENT=ELEMENT+1 +!!$ ELSE IF (Z.EQ.1) THEN +!!$ ROW_MAT%ASPK(ELEMENT)=ONE +!!$ ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM*IDIM+(Y-1)*IDIM+(Z) +!!$ ELEMENT=ELEMENT+1 +!!$ ELSE IF (X.EQ.IDIM) THEN +!!$ ROW_MAT%ASPK(ELEMENT)=ONE +!!$ ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM*IDIM+(Y-1)*IDIM+(Z) +!!$ ELEMENT=ELEMENT+1 +!!$ ELSE IF (Y.EQ.IDIM) THEN +!!$ ROW_MAT%ASPK(ELEMENT)=ONE +!!$ ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM*IDIM+(Y-1)*IDIM+(Z) +!!$ ELEMENT=ELEMENT+1 +!!$ ELSE IF (Z.EQ.IDIM) THEN +!!$ ROW_MAT%ASPK(ELEMENT)=ONE +!!$ ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM*IDIM+(Y-1)*IDIM+(Z) +!!$ ELEMENT=ELEMENT+1 +!!$ ELSE + zt(1) = 0.d0 + ! Internal point: build discretization + ! + ! Term depending on (x-1,y,z) + ! + if (x==1) then + ROW_MAT%ASPK(ELEMENT)=-B1(GLOB_X,GLOB_Y,GLOB_Z)& + & -A1(GLOB_X,GLOB_Y,GLOB_Z) + ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& + & DELTAH) + ZT(1) = EXP(-GLOB_Y**2-GLOB_Z**2)*(-ROW_MAT%ASPK(ELEMENT)) + else + ROW_MAT%ASPK(ELEMENT)=-B1(GLOB_X,GLOB_Y,GLOB_Z)& + & -A1(GLOB_X,GLOB_Y,GLOB_Z) + ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& + & DELTAH) + ROW_MAT%IA1(ELEMENT)=(X-2)*IDIM*IDIM+(Y-1)*IDIM+(Z) + ELEMENT=ELEMENT+1 + endif + ! Term depending on (x,y-1,z) + if (y==1) then + ROW_MAT%ASPK(ELEMENT)=-B2(GLOB_X,GLOB_Y,GLOB_Z)& + & -A2(GLOB_X,GLOB_Y,GLOB_Z) + ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& + & DELTAH) + ZT(1) = EXP(-GLOB_Y**2-GLOB_Z**2)*EXP(-GLOB_X)*(-ROW_MAT%ASPK(ELEMENT)) + else + ROW_MAT%ASPK(ELEMENT)=-B2(GLOB_X,GLOB_Y,GLOB_Z)& + & -A2(GLOB_X,GLOB_Y,GLOB_Z) + ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& + & DELTAH) + ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM*IDIM+(Y-2)*IDIM+(Z) + ELEMENT=ELEMENT+1 + endif + ! Term depending on (x,y,z-1) + if (z==1) then + ROW_MAT%ASPK(ELEMENT)=-B3(GLOB_X,GLOB_Y,GLOB_Z)& + & -A3(GLOB_X,GLOB_Y,GLOB_Z) + ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& + & DELTAH) + ZT(1) = EXP(-GLOB_Y**2-GLOB_Z**2)*EXP(-GLOB_X)*(-ROW_MAT%ASPK(ELEMENT)) + else + ROW_MAT%ASPK(ELEMENT)=-B3(GLOB_X,GLOB_Y,GLOB_Z)& + & -A3(GLOB_X,GLOB_Y,GLOB_Z) + ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& + & DELTAH) + ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM*IDIM+(Y-1)*IDIM+(Z-1) + ELEMENT=ELEMENT+1 + endif + ! Term depending on (x,y,z) + ROW_MAT%ASPK(ELEMENT)=2*B1(GLOB_X,GLOB_Y,GLOB_Z)& + & +2*B2(GLOB_X,GLOB_Y,GLOB_Z)& + & +2*B3(GLOB_X,GLOB_Y,GLOB_Z)& + & +A1(GLOB_X,GLOB_Y,GLOB_Z)& + & +A2(GLOB_X,GLOB_Y,GLOB_Z)& + & +A3(GLOB_X,GLOB_Y,GLOB_Z) + ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& + & DELTAH) + ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM*IDIM+(Y-1)*IDIM+(Z) + ELEMENT=ELEMENT+1 + ! Term depending on (x,y,z+1) + if (z==idim) then + ROW_MAT%ASPK(ELEMENT)=-B1(GLOB_X,GLOB_Y,GLOB_Z) + ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& + & DELTAH) + ZT(1) = EXP(-GLOB_Y**2-GLOB_Z**2)*EXP(-GLOB_X)*(-ROW_MAT%ASPK(ELEMENT)) + else + ROW_MAT%ASPK(ELEMENT)=-B1(GLOB_X,GLOB_Y,GLOB_Z) + ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& + & DELTAH) + ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM*IDIM+(Y-1)*IDIM+(Z+1) + ELEMENT=ELEMENT+1 + endif + ! Term depending on (x,y+1,z) + if (y==idim) then + ROW_MAT%ASPK(ELEMENT)=-B2(GLOB_X,GLOB_Y,GLOB_Z) + ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& + & DELTAH) + ZT(1) = EXP(-GLOB_Y**2-GLOB_Z**2)*EXP(-GLOB_X)*(-ROW_MAT%ASPK(ELEMENT)) + else + ROW_MAT%ASPK(ELEMENT)=-B2(GLOB_X,GLOB_Y,GLOB_Z) + ROW_MAT%ASPK(ELEMENT) = ROW_MAT%ASPK(ELEMENT)/(DELTAH*& + & DELTAH) + ROW_MAT%IA1(ELEMENT)=(X-1)*IDIM*IDIM+(Y)*IDIM+(Z) + ELEMENT=ELEMENT+1 + endif + ! Term depending on (x+1,y,z) + if (x